[clang] Patch series to reapply #118734 and substantially improve it (PR #120534)

Chandler Carruth via cfe-commits cfe-commits at lists.llvm.org
Tue Dec 24 02:06:36 PST 2024


https://github.com/chandlerc updated https://github.com/llvm/llvm-project/pull/120534

>From 9d031c423a782c43f3fd716bd5db215436e36b95 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc at gmail.com>
Date: Sat, 21 Dec 2024 07:00:28 +0000
Subject: [PATCH 01/10] Mechanically port bulk of x86 builtins to TableGen

The goal is to make incremental (if small) progress towards fully
TableGen'ed builtins, and to unblock #120534 by gaining access to more
powerful TableGen-based representations.

The bulk `.td` file addition was generated with the help of a very rough
Python script. That script made no attempt to be robust or reusable, it
specifically handled only the cases in the X86 `.def` file.

Four entries from the `.def` file were not handled automatically as they
used `BUILTIN` rather than `TARGET_BUILTIN`. These were ported by hand
to an empty-feature `TargetBuiltin` entry, which seems like a better
match.

For all the automatically ported entries, the results were compared by
sorting and diffing the `.def` file and the generated `.inc` file. The
only differences were:

- Different horizontal whitespace

- Additional entries that had already been ported to the `.td` file.

- Systematically using `Oi` instead of `LLi` for the type `long long
  int`. The `.def` file uses a mixture of `Oi` and `LLi`. I chose the
  shorter encoding.

This gives me high confidence in the correctness of the change.
---
 clang/include/clang/Basic/BuiltinsBase.td     |   11 +-
 clang/include/clang/Basic/BuiltinsX86.def     | 2225 -------
 clang/include/clang/Basic/BuiltinsX86.td      | 5387 +++++++++++++++++
 clang/include/clang/Basic/TargetBuiltins.h    |    2 -
 clang/lib/Basic/Targets/X86.cpp               |    8 -
 clang/utils/TableGen/ClangBuiltinsEmitter.cpp |   24 +
 6 files changed, 5419 insertions(+), 2238 deletions(-)
 delete mode 100644 clang/include/clang/Basic/BuiltinsX86.def

diff --git a/clang/include/clang/Basic/BuiltinsBase.td b/clang/include/clang/Basic/BuiltinsBase.td
index cff182f3f282cb..afed3c815d3290 100644
--- a/clang/include/clang/Basic/BuiltinsBase.td
+++ b/clang/include/clang/Basic/BuiltinsBase.td
@@ -95,9 +95,6 @@ class CustomEntry {
 }
 
 class AtomicBuiltin : Builtin;
-class TargetBuiltin : Builtin {
-  string Features = "";
-}
 
 class LibBuiltin<string header, string languages = "ALL_LANGUAGES"> : Builtin {
   string Header = header;
@@ -122,6 +119,14 @@ class OCL_DSELangBuiltin : LangBuiltin<"OCL_DSE">;
 class OCL_GASLangBuiltin : LangBuiltin<"OCL_GAS">;
 class OCLLangBuiltin : LangBuiltin<"ALL_OCL_LANGUAGES">;
 
+class TargetBuiltin : Builtin {
+  string Features = "";
+}
+class TargetLibBuiltin : TargetBuiltin {
+  string Header;
+  string Languages = "ALL_LANGUAGES";
+}
+
 class Template<list<string> substitutions,
                list<string> affixes,
                bit as_prefix = 0> {
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
deleted file mode 100644
index 352b3a9ec594a7..00000000000000
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ /dev/null
@@ -1,2225 +0,0 @@
-//===--- BuiltinsX86.def - X86 Builtin function database --------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the X86-specific builtin function database.  Users of
-// this file must define the BUILTIN macro to make use of this information.
-//
-//===----------------------------------------------------------------------===//
-
-// The format of this database matches clang/Basic/Builtins.def.
-
-// FIXME: Ideally we would be able to pull this information from what
-// LLVM already knows about X86 builtins. We need to match the LLVM
-// definition anyway, since code generation will lower to the
-// intrinsic if one exists.
-
-#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
-#   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
-#endif
-
-#if defined(BUILTIN) && !defined(TARGET_HEADER_BUILTIN)
-#  define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANG, FEATURE) BUILTIN(ID, TYPE, ATTRS)
-#endif
-
-// MMX
-//
-// All MMX instructions will be generated via builtins. Any MMX vector
-// types (<1 x i64>, <2 x i32>, etc.) that aren't used by these builtins will be
-// expanded by the back-end.
-// FIXME: _mm_prefetch must be a built-in because it takes a compile-time constant
-// argument and our prior approach of using a #define to the current built-in
-// doesn't work in the presence of re-declaration of _mm_prefetch for windows.
-TARGET_BUILTIN(_mm_prefetch, "vcC*i", "nc", "mmx")
-
-// SSE intrinsics.
-
-TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "n", "sse")
-TARGET_HEADER_BUILTIN(_mm_setcsr, "vUi", "nh",XMMINTRIN_H, ALL_LANGUAGES, "sse")
-TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "n", "sse")
-TARGET_HEADER_BUILTIN(_mm_getcsr, "Ui", "nh", XMMINTRIN_H, ALL_LANGUAGES, "sse")
-TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "nV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_sfence, "v", "n", "sse")
-TARGET_HEADER_BUILTIN(_mm_sfence, "v", "nh", XMMINTRIN_H, ALL_LANGUAGES, "sse")
-TARGET_BUILTIN(__builtin_ia32_rcpps, "V4fV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_rcpss, "V4fV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_rsqrtps, "V4fV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_rsqrtss, "V4fV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_shufps, "V4fV4fV4fIi", "ncV:128:", "sse")
-
-TARGET_BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "nV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_movmskpd, "iV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_movnti, "vi*i", "n", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pshufd, "V4iV4iIi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pshuflw, "V8sV8sIi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pshufhw, "V8sV8sIi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2OiV16cV16c", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_shufpd, "V2dV2dV2dIi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2OiV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttsd2si, "iV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtsd2ss, "V4fV4fV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "n", "sse2")
-TARGET_HEADER_BUILTIN(_mm_clflush, "vvC*", "nh", EMMINTRIN_H, ALL_LANGUAGES, "sse2")
-TARGET_BUILTIN(__builtin_ia32_lfence, "v", "n", "sse2")
-TARGET_HEADER_BUILTIN(_mm_lfence, "v", "nh", EMMINTRIN_H, ALL_LANGUAGES, "sse2")
-TARGET_BUILTIN(__builtin_ia32_mfence, "v", "n", "sse2")
-TARGET_HEADER_BUILTIN(_mm_mfence, "v", "nh", EMMINTRIN_H, ALL_LANGUAGES, "sse2")
-TARGET_BUILTIN(__builtin_ia32_pause, "v", "n", "")
-TARGET_HEADER_BUILTIN(_mm_pause, "v", "nh", EMMINTRIN_H, ALL_LANGUAGES, "")
-TARGET_BUILTIN(__builtin_ia32_pmuludq128, "V2OiV4iV4i", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psraw128, "V8sV8sV8s", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrad128, "V4iV4iV4i", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrlw128, "V8sV8sV8s", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrld128, "V4iV4iV4i", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrlq128, "V2OiV2OiV2Oi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psllw128, "V8sV8sV8s", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pslld128, "V4iV4iV4i", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psllq128, "V2OiV2OiV2Oi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psllwi128, "V8sV8si", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pslldi128, "V4iV4ii", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psllqi128, "V2OiV2Oii", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrlwi128, "V8sV8si", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrldi128, "V4iV4ii", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrlqi128, "V2OiV2Oii", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrawi128, "V8sV8si", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psradi128, "V4iV4ii", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pmaddwd128, "V4iV8sV8s", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pslldqi128_byteshift, "V2OiV2OiIi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrldqi128_byteshift, "V2OiV2OiIi", "ncV:128:", "sse2")
-
-TARGET_BUILTIN(__builtin_ia32_monitor, "vvC*UiUi", "n", "sse3")
-TARGET_BUILTIN(__builtin_ia32_mwait, "vUiUi", "n", "sse3")
-TARGET_BUILTIN(__builtin_ia32_lddqu, "V16ccC*", "nV:128:", "sse3")
-
-TARGET_BUILTIN(__builtin_ia32_palignr128, "V16cV16cV16cIi", "ncV:128:", "ssse3")
-
-TARGET_BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fIc", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pblendw128, "V8sV8sV8sIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_blendpd, "V2dV2dV2dIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_blendps, "V4fV4fV4fIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "ncV:128:", "sse4.1")
-
-TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2OiV4iV4i", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_roundsd, "V2dV2dV2dIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_roundpd, "V2dV2dIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_dpps, "V4fV4fV4fIc", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_dppd, "V2dV2dV2dIc", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_ptestz128, "iV2OiV2Oi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_ptestc128, "iV2OiV2Oi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2OiV2Oi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16cIc", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_phminposuw128, "V8sV8s", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v16qi, "cV16cIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v16qi, "V16cV16ccIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v4si, "V4iV4iiIi", "ncV:128:", "sse4.1")
-
-// SSE 4.2
-TARGET_BUILTIN(__builtin_ia32_pcmpistrm128, "V16cV16cV16cIc", "ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpistri128, "iV16cV16cIc", "ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestrm128, "V16cV16ciV16ciIc", "ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestri128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
-
-TARGET_BUILTIN(__builtin_ia32_pcmpistria128, "iV16cV16cIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpistric128, "iV16cV16cIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpistrio128, "iV16cV16cIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpistris128, "iV16cV16cIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpistriz128, "iV16cV16cIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestria128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestric128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestrio128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
-
-TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "crc32")
-TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "crc32")
-TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "crc32")
-
-// SSE4a
-TARGET_BUILTIN(__builtin_ia32_extrqi, "V2OiV2OiIcIc", "ncV:128:", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_extrq, "V2OiV2OiV16c", "ncV:128:", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_insertqi, "V2OiV2OiV2OiIcIc", "ncV:128:", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_insertq, "V2OiV2OiV2Oi", "ncV:128:", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_movntsd, "vd*V2d", "nV:128:", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_movntss, "vf*V4f", "nV:128:", "sse4a")
-
-// AES
-TARGET_BUILTIN(__builtin_ia32_aesenc128, "V2OiV2OiV2Oi", "ncV:128:", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesenclast128, "V2OiV2OiV2Oi", "ncV:128:", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesdec128, "V2OiV2OiV2Oi", "ncV:128:", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesdeclast128, "V2OiV2OiV2Oi", "ncV:128:", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2OiV2Oi", "ncV:128:", "aes")
-TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2OiV2OiIc", "ncV:128:", "aes")
-
-// VAES
-TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512,vaes")
-TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512,vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512,vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512,vaes")
-
-// GFNI
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "ncV:128:", "gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v32qi, "V32cV32cV32cIc", "ncV:256:", "avx,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512f,evex512,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v16qi, "V16cV16cV16cIc", "ncV:128:", "gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v32qi, "V32cV32cV32cIc", "ncV:256:", "avx,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512f,evex512,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v16qi, "V16cV16cV16c", "ncV:128:", "gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "ncV:256:", "avx,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "ncV:512:", "avx512f,evex512,gfni")
-
-// CLMUL
-TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2OiV2OiV2OiIc", "ncV:128:", "pclmul")
-
-// VPCLMULQDQ
-TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4OiV4OiV4OiIc", "ncV:256:", "vpclmulqdq")
-TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8OiV8OiV8OiIc", "ncV:512:", "avx512f,evex512,vpclmulqdq")
-
-// AVX
-TARGET_BUILTIN(__builtin_ia32_vpermilvarpd, "V2dV2dV2Oi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarps, "V4fV4fV4i", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4Oi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarps256, "V8fV8fV8i", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_blendpd256, "V4dV4dV4dIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilpd, "V2dV2dIi", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilps, "V4fV4fIi", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilpd256, "V4dV4dIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilps256, "V8fV8fIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4iIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_roundpd256, "V4dV4dIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_roundps256, "V8fV8fIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestzpd, "iV2dV2d", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestcpd, "iV2dV2d", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestnzcpd, "iV2dV2d", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestzps, "iV4fV4f", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestcps, "iV4fV4f", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestnzcps, "iV4fV4f", "ncV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestzpd256, "iV4dV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestcpd256, "iV4dV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestnzcpd256, "iV4dV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestzps256, "iV8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestcps256, "iV8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestnzcps256, "iV8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_ptestz256, "iV4OiV4Oi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_ptestc256, "iV4OiV4Oi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_ptestnzc256, "iV4OiV4Oi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_movmskpd256, "iV4d", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_movmskps256, "iV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vzeroall, "v", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_vzeroupper, "v", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "nV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2Oi", "nV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4i", "nV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4Oi", "nV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskloadps256, "V8fV8fC*V8i", "nV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2OiV2d", "nV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4iV4f", "nV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4OiV4d", "nV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8iV8f", "nV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v32qi, "cV32cIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v16hi, "sV16sIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v8si, "iV8iIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v32qi, "V32cV32ccIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v16hi, "V16sV16ssIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "ncV:256:", "avx")
-
-// AVX2
-TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_packusdw256, "V16sV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pavgb256, "V32cV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pavgw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pblendw256, "V16sV16sV16sIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phaddsw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phsubw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaddubsw256, "V16sV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaddwd256, "V8iV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4OiV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmulhw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmuludq256, "V4OiV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psadbw256, "V4OiV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pshufb256, "V32cV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pshufd256, "V8iV8iIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pshuflw256, "V16sV16sIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pshufhw256, "V16sV16sIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psignb256, "V32cV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psignw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psignd256, "V8iV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllwi256, "V16sV16si", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllw256, "V16sV16sV8s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pslldqi256_byteshift, "V4OiV4OiIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pslldi256, "V8iV8ii", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pslld256, "V8iV8iV4i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllqi256, "V4OiV4Oii", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllq256, "V4OiV4OiV2Oi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrawi256, "V16sV16si", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psraw256, "V16sV16sV8s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psradi256, "V8iV8ii", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrad256, "V8iV8iV4i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrldqi256_byteshift, "V4OiV4OiIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlwi256, "V16sV16si", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlw256, "V16sV16sV8s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrldi256, "V8iV8ii", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrld256, "V8iV8iV4i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlqi256, "V4OiV4Oii", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4OiV4OiV2Oi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pblendd128, "V4iV4iV4iIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pblendd256, "V8iV8iV8iIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_permdf256, "V4dV4dIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_permti256, "V4OiV4OiV4OiIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_permdi256, "V4OiV4OiIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_extract128i256, "V2OiV4OiIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_insert128i256, "V4OiV4OiV2OiIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskloadq256, "V4OiV4OiC*V4Oi", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskloadd, "V4iV4iC*V4i", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskloadq, "V2OiV2OiC*V2Oi", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskstored256, "vV8i*V8iV8i", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskstoreq256, "vV4Oi*V4OiV4Oi", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskstored, "vV4i*V4iV4i", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskstoreq, "vV2Oi*V2OiV2Oi", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllv8si, "V8iV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllv4si, "V4iV4iV4i", "ncV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllv4di, "V4OiV4OiV4Oi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllv2di, "V2OiV2OiV2Oi", "ncV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrav8si, "V8iV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrav4si, "V4iV4iV4i", "ncV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlv8si, "V8iV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlv4si, "V4iV4iV4i", "ncV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlv4di, "V4OiV4OiV4Oi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlv2di, "V2OiV2OiV2Oi", "ncV:128:", "avx2")
-
-// GATHER
-TARGET_BUILTIN(__builtin_ia32_gatherd_pd, "V2dV2ddC*V4iV2dIc", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherd_pd256, "V4dV4ddC*V4iV4dIc", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_pd, "V2dV2ddC*V2OiV2dIc", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_pd256, "V4dV4ddC*V4OiV4dIc", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherd_ps, "V4fV4ffC*V4iV4fIc", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherd_ps256, "V8fV8ffC*V8iV8fIc", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_ps, "V4fV4ffC*V2OiV4fIc", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_ps256, "V4fV4ffC*V4OiV4fIc", "nV:256:", "avx2")
-
-TARGET_BUILTIN(__builtin_ia32_gatherd_q, "V2OiV2OiOiC*V4iV2OiIc", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherd_q256, "V4OiV4OiOiC*V4iV4OiIc", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_q, "V2OiV2OiOiC*V2OiV2OiIc", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_q256, "V4OiV4OiOiC*V4OiV4OiIc", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherd_d, "V4iV4iiC*V4iV4iIc", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherd_d256, "V8iV8iiC*V8iV8iIc", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_d, "V4iV4iiC*V2OiV4iIc", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_d256, "V4iV4iiC*V4OiV4iIc", "nV:256:", "avx2")
-
-// F16C
-TARGET_BUILTIN(__builtin_ia32_vcvtps2ph, "V8sV4fIi", "ncV:128:", "f16c")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256, "V8sV8fIi", "ncV:256:", "f16c")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2ps, "V4fV8s", "ncV:128:", "f16c")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256, "V8fV8s", "ncV:256:", "f16c")
-
-// RDRAND
-TARGET_BUILTIN(__builtin_ia32_rdrand16_step, "UiUs*", "n", "rdrnd")
-TARGET_BUILTIN(__builtin_ia32_rdrand32_step, "UiUi*", "n", "rdrnd")
-
-// FXSR
-TARGET_BUILTIN(__builtin_ia32_fxrstor, "vv*", "n", "fxsr")
-TARGET_BUILTIN(__builtin_ia32_fxsave, "vv*", "n", "fxsr")
-
-// XSAVE
-TARGET_BUILTIN(__builtin_ia32_xsave, "vv*UOi", "n", "xsave")
-TARGET_BUILTIN(__builtin_ia32_xrstor, "vv*UOi", "n", "xsave")
-TARGET_BUILTIN(__builtin_ia32_xgetbv, "UOiUi", "n", "xsave")
-TARGET_HEADER_BUILTIN(_xgetbv, "UWiUi", "nh", IMMINTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_BUILTIN(__builtin_ia32_xsetbv, "vUiUOi", "n", "xsave")
-TARGET_HEADER_BUILTIN(_xsetbv, "vUiUWi", "nh", IMMINTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_BUILTIN(__builtin_ia32_xsaveopt, "vv*UOi", "n", "xsaveopt")
-TARGET_BUILTIN(__builtin_ia32_xrstors, "vv*UOi", "n", "xsaves")
-TARGET_BUILTIN(__builtin_ia32_xsavec, "vv*UOi", "n", "xsavec")
-TARGET_BUILTIN(__builtin_ia32_xsaves, "vv*UOi", "n", "xsaves")
-
-// SHSTK
-TARGET_BUILTIN(__builtin_ia32_incsspd, "vUi", "n", "shstk")
-TARGET_BUILTIN(__builtin_ia32_rdsspd, "UiUi", "n", "shstk")
-TARGET_BUILTIN(__builtin_ia32_saveprevssp, "v", "n", "shstk")
-TARGET_BUILTIN(__builtin_ia32_rstorssp, "vv*", "n", "shstk")
-TARGET_BUILTIN(__builtin_ia32_wrssd, "vUiv*", "n", "shstk")
-TARGET_BUILTIN(__builtin_ia32_wrussd, "vUiv*", "n", "shstk")
-TARGET_BUILTIN(__builtin_ia32_setssbsy, "v", "n", "shstk")
-TARGET_BUILTIN(__builtin_ia32_clrssbsy, "vv*", "n", "shstk")
-
-//CLFLUSHOPT
-TARGET_BUILTIN(__builtin_ia32_clflushopt, "vvC*", "n", "clflushopt")
-
-//CLWB
-TARGET_BUILTIN(__builtin_ia32_clwb, "vvC*", "n", "clwb")
-
-//WB[NO]INVD
-TARGET_BUILTIN(__builtin_ia32_wbinvd, "v", "n", "")
-TARGET_BUILTIN(__builtin_ia32_wbnoinvd, "v", "n", "wbnoinvd")
-
-// ADX
-TARGET_BUILTIN(__builtin_ia32_addcarryx_u32, "UcUcUiUiUi*", "nE", "")
-TARGET_BUILTIN(__builtin_ia32_subborrow_u32, "UcUcUiUiUi*", "nE", "")
-
-// RDSEED
-TARGET_BUILTIN(__builtin_ia32_rdseed16_step, "UiUs*", "n", "rdseed")
-TARGET_BUILTIN(__builtin_ia32_rdseed32_step, "UiUi*", "n", "rdseed")
-
-// LZCNT
-TARGET_BUILTIN(__builtin_ia32_lzcnt_u16, "UsUs", "ncE", "lzcnt")
-TARGET_BUILTIN(__builtin_ia32_lzcnt_u32, "UiUi", "ncE", "lzcnt")
-
-// BMI
-TARGET_BUILTIN(__builtin_ia32_bextr_u32, "UiUiUi", "ncE", "bmi")
-TARGET_BUILTIN(__builtin_ia32_tzcnt_u16, "UsUs", "ncE", "")
-TARGET_BUILTIN(__builtin_ia32_tzcnt_u32, "UiUi", "ncE", "")
-
-// BMI2
-TARGET_BUILTIN(__builtin_ia32_bzhi_si, "UiUiUi", "ncE", "bmi2")
-TARGET_BUILTIN(__builtin_ia32_pdep_si, "UiUiUi", "ncE", "bmi2")
-TARGET_BUILTIN(__builtin_ia32_pext_si, "UiUiUi", "ncE", "bmi2")
-
-// TBM
-TARGET_BUILTIN(__builtin_ia32_bextri_u32, "UiUiIUi", "ncE", "tbm")
-
-// LWP
-TARGET_BUILTIN(__builtin_ia32_llwpcb, "vv*", "n", "lwp")
-TARGET_BUILTIN(__builtin_ia32_slwpcb, "v*", "n", "lwp")
-TARGET_BUILTIN(__builtin_ia32_lwpins32, "UcUiUiIUi", "n", "lwp")
-TARGET_BUILTIN(__builtin_ia32_lwpval32, "vUiUiIUi", "n", "lwp")
-
-// SHA
-TARGET_BUILTIN(__builtin_ia32_sha1rnds4, "V4iV4iV4iIc", "ncV:128:", "sha")
-TARGET_BUILTIN(__builtin_ia32_sha1nexte, "V4iV4iV4i", "ncV:128:", "sha")
-TARGET_BUILTIN(__builtin_ia32_sha1msg1, "V4iV4iV4i", "ncV:128:", "sha")
-TARGET_BUILTIN(__builtin_ia32_sha1msg2, "V4iV4iV4i", "ncV:128:", "sha")
-TARGET_BUILTIN(__builtin_ia32_sha256rnds2, "V4iV4iV4iV4i", "ncV:128:", "sha")
-TARGET_BUILTIN(__builtin_ia32_sha256msg1, "V4iV4iV4i", "ncV:128:", "sha")
-TARGET_BUILTIN(__builtin_ia32_sha256msg2, "V4iV4iV4i", "ncV:128:", "sha")
-
-// FMA
-TARGET_BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "ncV:128:", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "ncV:128:", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "ncV:128:", "fma")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "ncV:128:", "fma")
-TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "ncV:128:", "fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "ncV:128:", "fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "ncV:128:", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "ncV:128:", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "ncV:256:", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "ncV:256:", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "ncV:256:", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "ncV:256:", "fma|fma4")
-
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_maskz, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmsubpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps512_maskz, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmsubps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_maskz, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_maskz, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
-
-// XOP
-TARGET_BUILTIN(__builtin_ia32_vpmacssww, "V8sV8sV8sV8s", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacsww, "V8sV8sV8sV8s", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacsswd, "V4iV8sV8sV4i", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacswd, "V4iV8sV8sV4i", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacssdd, "V4iV4iV4iV4i", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacsdd, "V4iV4iV4iV4i", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacssdql, "V2OiV4iV4iV2Oi", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacsdql, "V2OiV4iV4iV2Oi", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacssdqh, "V2OiV4iV4iV2Oi", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacsdqh, "V2OiV4iV4iV2Oi", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmadcsswd, "V4iV8sV8sV4i", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmadcswd, "V4iV8sV8sV4i", "ncV:128:", "xop")
-
-TARGET_BUILTIN(__builtin_ia32_vphaddbw, "V8sV16c", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddbd, "V4iV16c", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddbq, "V2OiV16c", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddwd, "V4iV8s", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddwq, "V2OiV8s", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphadddq, "V2OiV4i", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddubw, "V8sV16c", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddubd, "V4iV16c", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddubq, "V2OiV16c", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphadduwd, "V4iV8s", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphadduwq, "V2OiV8s", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddudq, "V2OiV4i", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphsubbw, "V8sV16c", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphsubwd, "V4iV8s", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphsubdq, "V2OiV4i", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpperm, "V16cV16cV16cV16c", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotb, "V16cV16cV16c", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotw, "V8sV8sV8s", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotd, "V4iV4iV4i", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotq, "V2OiV2OiV2Oi", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotbi, "V16cV16cIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotwi, "V8sV8sIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotdi, "V4iV4iIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotqi, "V2OiV2OiIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshlb, "V16cV16cV16c", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshlw, "V8sV8sV8s", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshld, "V4iV4iV4i", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshlq, "V2OiV2OiV2Oi", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshab, "V16cV16cV16c", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshaw, "V8sV8sV8s", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshad, "V4iV4iV4i", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshaq, "V2OiV2OiV2Oi", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomub, "V16cV16cV16cIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomuw, "V8sV8sV8sIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomud, "V4iV4iV4iIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomuq, "V2OiV2OiV2OiIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomb, "V16cV16cV16cIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomw, "V8sV8sV8sIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomd, "V4iV4iV4iIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomq, "V2OiV2OiV2OiIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpermil2pd, "V2dV2dV2dV2OiIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpermil2pd256, "V4dV4dV4dV4OiIc", "ncV:256:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpermil2ps, "V4fV4fV4fV4iIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpermil2ps256, "V8fV8fV8fV8iIc", "ncV:256:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vfrczss, "V4fV4f", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vfrczsd, "V2dV2d", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vfrczps, "V4fV4f", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vfrczpd, "V2dV2d", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vfrczps256, "V8fV8f", "ncV:256:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vfrczpd256, "V4dV4d", "ncV:256:", "xop")
-
-TARGET_BUILTIN(__builtin_ia32_xbegin, "i", "n", "rtm")
-TARGET_BUILTIN(__builtin_ia32_xend, "v", "n", "rtm")
-TARGET_BUILTIN(__builtin_ia32_xabort, "vIc", "n", "rtm")
-TARGET_BUILTIN(__builtin_ia32_xtest, "i", "n", "rtm")
-
-BUILTIN(__builtin_ia32_rdpmc, "UOii", "")
-BUILTIN(__builtin_ia32_rdtsc, "UOi", "")
-BUILTIN(__rdtsc, "UOi", "")
-BUILTIN(__builtin_ia32_rdtscp, "UOiUi*", "")
-
-TARGET_BUILTIN(__builtin_ia32_rdpid, "Ui", "n", "rdpid")
-TARGET_BUILTIN(__builtin_ia32_rdpru, "ULLii", "n", "rdpru")
-
-// PKU
-TARGET_BUILTIN(__builtin_ia32_rdpkru, "Ui", "n", "pku")
-TARGET_BUILTIN(__builtin_ia32_wrpkru, "vUi", "n", "pku")
-
-// AVX-512
-TARGET_BUILTIN(__builtin_ia32_sqrtpd512, "V8dV8dIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_sqrtps512, "V16fV16fIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2udq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_cmpps512_mask,   "UsV16fV16fIiUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cmpps256_mask,   "UcV8fV8fIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmpps128_mask,   "UcV4fV4fIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmppd512_mask, "UcV8dV8dIiUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cmppd256_mask, "UcV4dV4dIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmppd128_mask, "UcV2dV2dIiUc", "ncV:128:", "avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_rndscaleps_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_rndscalepd_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2udq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_minps512, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_minpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_maxps512, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_maxpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8OiV16iV16i", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8OiV16iV16i", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8OiOiC*V8OiUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_loadups512_mask, "V16ffC*V16fUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_loadaps512_mask, "V16fV16fC*V16fUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_loadupd512_mask, "V8ddC*V8dUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_loadapd512_mask, "V8dV8dC*V8dUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_storedqudi512_mask, "vOi*V8OiUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_storedqusi512_mask, "vi*V16iUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_storeupd512_mask, "vd*V8dUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_storeapd512_mask, "vV8d*V8dUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_alignq512, "V8OiV8OiV8OiIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_alignd512, "V16iV16iV16iIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_alignd128, "V4iV4iV4iIi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_alignd256, "V8iV8iV8iIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_alignq128, "V2OiV2OiV2OiIi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_alignq256, "V4OiV4OiV4OiIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "ncV:512:", "avx512f,evex512")
-
-// AVX-VNNI and AVX512-VNNI
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni,evex512")
-
-// AVX-VNNI-INT8
-TARGET_BUILTIN(__builtin_ia32_vpdpbssd128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpbssd256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpbssds128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpbssds256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpbsud128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpbsud256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpbsuds128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpbsuds256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpbuud128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpbuud256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpbuuds128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpbuuds256, "V8iV8iV8iV8i", "ncV:256:", "avxvnniint8|avx10.2-256")
-
-// MOVRS
-TARGET_BUILTIN(__builtin_ia32_prefetchrs, "vvC*", "nc", "movrs")
-
-TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2dvC*V2OiUcIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2OiV2OivC*V2OiUcIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div4df, "V4dV4dvC*V4OiUcIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div4di, "V4OiV4OivC*V4OiUcIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div4sf, "V4fV4fvC*V2OiUcIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div4si, "V4iV4ivC*V2OiUcIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div8sf, "V4fV4fvC*V4OiUcIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div8si, "V4iV4ivC*V4OiUcIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv2df, "V2dV2dvC*V4iUcIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv2di, "V2OiV2OivC*V4iUcIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv4df, "V4dV4dvC*V4iUcIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv4di, "V4OiV4OivC*V4iUcIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv4sf, "V4fV4fvC*V4iUcIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv4si, "V4iV4ivC*V4iUcIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv8sf, "V8fV8fvC*V8iUcIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv8si, "V8iV8ivC*V8iUcIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8dvC*V8iUcIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16fvC*V16iUsIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8dvC*V8OiUcIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv16sf, "V8fV8fvC*V8OiUcIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_gathersiv8di, "V8OiV8OivC*V8iUcIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_gathersiv16si, "V16iV16ivC*V16iUsIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv8di, "V8OiV8OivC*V8OiUcIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv16si, "V8iV8ivC*V8OiUcIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_scattersiv8df, "vv*UcV8iV8dIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_scattersiv16sf, "vv*UsV16iV16fIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv8df,  "vv*UcV8OiV8dIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv16sf, "vv*UcV8OiV8fIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_scattersiv8di,  "vv*UcV8iV8OiIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vv*UsV16iV16iIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv8di,  "vv*UcV8OiV8OiIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8OiV8iIi", "nV:512:", "avx512f,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_knotdi, "UOiUOi", "nc", "avx512bw")
-
-TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmpq128_mask, "UcV2OiV2OiIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmpw128_mask, "UcV8sV8sIiUc", "ncV:128:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cmpb256_mask, "UiV32cV32cIiUi", "ncV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cmpd256_mask, "UcV8iV8iIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmpq256_mask, "UcV4OiV4OiIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmpw256_mask, "UsV16sV16sIiUs", "ncV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cmpb512_mask, "UOiV64cV64cIiUOi", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_cmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_ucmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ucmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ucmpq128_mask, "UcV2OiV2OiIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ucmpw128_mask, "UcV8sV8sIiUc", "ncV:128:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ucmpb256_mask, "UiV32cV32cIiUi", "ncV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ucmpd256_mask, "UcV8iV8iIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ucmpq256_mask, "UcV4OiV4OiIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ucmpw256_mask, "UsV16sV16sIiUs", "ncV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ucmpb512_mask, "UOiV64cV64cIiUOi", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_packuswb512, "V64cV32sV32s", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_pavgb512, "V64cV64cV64c", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_pavgw512, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128, "V2OiV2Oi", "ncV:128:", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256, "V4OiV4Oi", "ncV:256:", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpconflictsi_128, "V4iV4i", "ncV:128:", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpconflictsi_256, "V8iV8i", "ncV:256:", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpconflictdi_512, "V8OiV8Oi", "ncV:512:", "avx512cd,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512, "V16iV16i", "ncV:512:", "avx512cd,evex512")
-TARGET_BUILTIN(__builtin_ia32_vplzcntd_512, "V16iV16i", "ncV:512:", "avx512cd,evex512")
-TARGET_BUILTIN(__builtin_ia32_vplzcntq_512, "V8OiV8Oi", "ncV:512:", "avx512cd,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "ncV:128:", "avx512vl,avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "ncV:256:", "avx512vl,avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "UOiV64cV64cUOi", "ncV:512:", "avx512bitalg,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_addpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_addps512, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_divpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_divps512, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_mulpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_mulps512, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_subpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_subps512, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_pmaddubsw512, "V32sV64cV64c", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmaddwd512, "V16iV32sV32s", "ncV:512:", "avx512bw,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_addss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_divss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_mulss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_subss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_maxss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_minss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_addsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_divsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_mulsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_subsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_maxsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_minsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
-
-TARGET_BUILTIN(__builtin_ia32_compressdf128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressdf256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressdi128_mask, "V2OiV2OiV2OiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressdi256_mask, "V4OiV4OiV4OiUc", "ncV:256:", "avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_compresshi128_mask, "V8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compresshi256_mask, "V16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressqi128_mask, "V16cV16cV16cUs", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressqi256_mask, "V32cV32cV32cUi", "ncV:256:", "avx512vl,avx512vbmi2")
-
-TARGET_BUILTIN(__builtin_ia32_compresssf128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compresssf256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compresssi128_mask, "V4iV4iV4iUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compresssi256_mask, "V8iV8iV8iUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoredf128_mask, "vV2d*V2dUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoredf256_mask, "vV4d*V4dUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoredi128_mask, "vV2Oi*V2OiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoredi256_mask, "vV4Oi*V4OiUc", "nV:256:", "avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_compressstorehi128_mask, "vV8s*V8sUc", "nV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressstorehi256_mask, "vV16s*V16sUs", "nV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressstoreqi128_mask, "vV16c*V16cUs", "nV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressstoreqi256_mask, "vV32c*V32cUi", "nV:256:", "avx512vl,avx512vbmi2")
-
-TARGET_BUILTIN(__builtin_ia32_compressstoresf128_mask, "vV4f*V4fUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoresi256_mask, "vV8i*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2dq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2ps_mask, "V4fV2dV4fUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2udq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2udq256_mask, "V4iV4dV4iUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtps2udq128_mask, "V4iV4fV4iUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtps2udq256_mask, "V8iV8fV8iUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2dq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2udq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2udq256_mask, "V4iV4dV4iUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expanddf256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2OiV2OiV2OiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expanddi256_mask, "V4OiV4OiV4OiUc", "ncV:256:", "avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_expandhi128_mask, "V8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandhi256_mask, "V16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandqi128_mask, "V16cV16cV16cUs", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandqi256_mask, "V32cV32cV32cUi", "ncV:256:", "avx512vl,avx512vbmi2")
-
-TARGET_BUILTIN(__builtin_ia32_expandloaddf128_mask, "V2dV2dC*V2dUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandloaddf256_mask, "V4dV4dC*V4dUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandloaddi128_mask, "V4iV2OiC*V2OiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandloaddi256_mask, "V4OiV4OiC*V4OiUc", "nV:256:", "avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_expandloadhi128_mask, "V8sV8sC*V8sUc", "nV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandloadhi256_mask, "V16sV16sC*V16sUs", "nV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandloadqi128_mask, "V16cV16cC*V16cUs", "nV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandloadqi256_mask, "V32cV32cC*V32cUi", "nV:256:", "avx512vl,avx512vbmi2")
-
-TARGET_BUILTIN(__builtin_ia32_expandloadsf128_mask, "V4fV4fC*V4fUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandloadsf256_mask, "V8fV8fC*V8fUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandloadsi128_mask, "V4iV4iC*V4iUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandloadsi256_mask, "V8iV8iC*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandsf128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandsf256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandsi128_mask, "V4iV4iV4iUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandsi256_mask, "V8iV8iV8iUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rndscaleps_256_mask, "V8fV8fIiV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scalefpd128_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scalefpd256_mask, "V4dV4dV4dV4dUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scalefps128_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scalefps256_mask, "V8fV8fV8fV8fUc", "ncV:256:", "avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_scatterdiv2df, "vv*UcV2OiV2dIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv2di, "vv*UcV2OiV2OiIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv4df, "vv*UcV4OiV4dIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv4di, "vv*UcV4OiV4OiIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv4sf, "vv*UcV2OiV4fIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv4si, "vv*UcV2OiV4iIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv8sf, "vv*UcV4OiV4fIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv8si, "vv*UcV4OiV4iIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv2df, "vv*UcV4iV2dIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv2di, "vv*UcV4iV2OiIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv4df, "vv*UcV4iV4dIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv4di, "vv*UcV4iV4OiIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv4sf, "vv*UcV4iV4fIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv4si, "vv*UcV4iV4iIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv8sf, "vv*UcV8iV8fIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv8si, "vv*UcV8iV8iIi", "nV:256:", "avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_vpermi2vard128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2vard256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2vard512, "V16iV16iV16iV16i", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varpd128, "V2dV2dV2OiV2d", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varpd256, "V4dV4dV4OiV4d", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varpd512, "V8dV8dV8OiV8d", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varps128, "V4fV4fV4iV4f", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varps256, "V8fV8fV8iV8f", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varps512, "V16fV16fV16iV16f", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varqi128, "V16cV16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varqi256, "V32cV32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varqi512, "V64cV64cV64cV64c", "ncV:512:", "avx512vbmi,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128, "V8sV8sV8sV8s", "ncV:128:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256, "V16sV16sV16sV16s", "ncV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varhi512, "V32sV32sV32sV32s", "ncV:512:", "avx512bw,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_vpshldd128, "V4iV4iV4iIi", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldd256, "V8iV8iV8iIi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldd512, "V16iV16iV16iIi", "ncV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpshldq128, "V2OiV2OiV2OiIi", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldq256, "V4OiV4OiV4OiIi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldq512, "V8OiV8OiV8OiIi", "ncV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpshldw128, "V8sV8sV8sIi", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldw256, "V16sV16sV16sIi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldw512, "V32sV32sV32sIi", "ncV:512:", "avx512vbmi2,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_vpshldvd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpshldvq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpshldvw128, "V8sV8sV8sV8s", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvw256, "V16sV16sV16sV16s", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvw512, "V32sV32sV32sV32s", "ncV:512:", "avx512vbmi2,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_vpshrdvd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvw128, "V8sV8sV8sV8s", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvw256, "V16sV16sV16sV16s", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvw512, "V32sV32sV32sV32s", "ncV:512:", "avx512vbmi2,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_vpshrdd128, "V4iV4iV4iIi", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdd256, "V8iV8iV8iIi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdd512, "V16iV16iV16iIi", "ncV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq128, "V2OiV2OiV2OiIi", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq256, "V4OiV4OiV4OiIi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq512, "V8OiV8OiV8OiIi", "ncV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpshrdw128, "V8sV8sV8sIi", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdw256, "V16sV16sV16sIi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdw512, "V32sV32sV32sIi", "ncV:512:", "avx512vbmi2,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2OiV2dV2OiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4OiV4dV4OiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2OiV2dV2OiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq256_mask, "V4OiV4dV4OiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2qq128_mask, "V2OiV4fV2OiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2qq256_mask, "V4OiV4fV4OiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2uqq128_mask, "V2OiV4fV2OiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2uqq256_mask, "V4OiV4fV4OiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtqq2ps128_mask, "V4fV2OiV4fUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2qq128_mask, "V2OiV2dV2OiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2qq256_mask, "V4OiV4dV4OiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq128_mask, "V2OiV2dV2OiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq256_mask, "V4OiV4dV4OiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2qq128_mask, "V2OiV4fV2OiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2qq256_mask, "V4OiV4fV4OiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2uqq128_mask, "V2OiV4fV2OiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2uqq256_mask, "V4OiV4fV4OiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps128_mask, "V4fV2OiV4fUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangepd128_mask, "V2dV2dV2dIiV2dUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangepd256_mask, "V4dV4dV4dIiV4dUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangeps128_mask, "V4fV4fV4fIiV4fUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangeps256_mask, "V8fV8fV8fIiV8fUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangesd128_round_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangess128_round_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reducepd128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reducepd256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reduceps128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reduceps256_mask, "V8fV8fIiV8fUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reducesd_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reducess_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_pmovswb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovswb256_mask, "V16cV16sV16cUs", "ncV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovuswb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovuswb256_mask, "V16cV16sV16cUs", "ncV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovwb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2qq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtps2qq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtps2uqq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtqq2pd512_mask, "V8dV8OiV8dUcIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtqq2ps512_mask, "V8fV8OiV8fUcIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2qq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvttps2qq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvttps2uqq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd512_mask, "V8dV8OiV8dUcIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps512_mask, "V8fV8OiV8fUcIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_rangepd512_mask, "V8dV8dV8dIiV8dUcIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_rangeps512_mask, "V16fV16fV16fIiV16fUsIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_reducepd512_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_reduceps512_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_prold512, "V16iV16iIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_prolq512, "V8OiV8OiIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_prold128, "V4iV4iIi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prold256, "V8iV8iIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolq128, "V2OiV2OiIi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolq256, "V4OiV4OiIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolvd512, "V16iV16iV16i", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_prolvq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_prord512, "V16iV16iIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_prorq512, "V8OiV8OiIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_prolvd128, "V4iV4iV4i", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolvd256, "V8iV8iV8i", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolvq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolvq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prord128, "V4iV4iIi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prord256, "V8iV8iIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorq128, "V2OiV2OiIi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorq256, "V4OiV4OiIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorvd512, "V16iV16iV16i", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_prorvq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_prorvd128, "V4iV4iV4i", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorvd256, "V8iV8iV8i", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorvq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorvq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pshufhw512, "V32sV32sIi", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_pshuflw512, "V32sV32sIi", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_psllv32hi, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_psllw512, "V32sV32sV8s", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_psllwi512, "V32sV32si", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_psllv16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psllv8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pslldi512, "V16iV16ii", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psllqi512, "V8OiV8Oii", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrlv32hi, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrlv16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psrlv8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psrldi512, "V16iV16ii", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrlqi512, "V8OiV8Oii", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrav32hi, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrav16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psrav8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psravq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psravq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psraw512, "V32sV32sV8s", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrawi512, "V32sV32si", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrlw512, "V32sV32sV8s", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrlwi512, "V32sV32si", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_pslldqi512_byteshift, "V8OiV8OiIi", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrldqi512_byteshift, "V8OiV8OiIi", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4iC*V4iUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8iC*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa32load512_mask, "V16iV16iC*V16iUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_movdqa32store512_mask, "vV16i*V16iUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_movdqa64load512_mask, "V8OiV8OiC*V8OiUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_movdqa64store512_mask, "vV8Oi*V8OiUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_movdqa32store128_mask, "vV4i*V4iUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa32store256_mask, "vV8i*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa64load128_mask, "V2OiV2OiC*V2OiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4OiV4OiC*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2Oi*V2OiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4Oi*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512ifma,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512ifma,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512ifma,avx512vl|avxifma")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512ifma,avx512vl|avxifma")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512ifma,avx512vl|avxifma")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512ifma,avx512vl|avxifma")
-TARGET_BUILTIN(__builtin_ia32_vcomisd, "iV2dV2dIiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kunpckdi, "UOiUOiUOi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kunpcksi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32sC*V32sUi", "nV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64cC*V64cUOi", "nV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_mask, "V8dV8dV8dV8OiIiUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_maskz, "V8dV8dV8dV8OiIiUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_fixupimmps512_mask, "V16fV16fV16fV16iIiUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_fixupimmps512_maskz, "V16fV16fV16fV16iIiUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_fixupimmsd_mask, "V2dV2dV2dV2OiIiUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmsd_maskz, "V2dV2dV2dV2OiIiUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmss_mask, "V4fV4fV4fV4iIiUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmss_maskz, "V4fV4fV4fV4iIiUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_getexpsd128_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_getexpss128_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_getmantsd_round_mask, "V2dV2dV2dIiV2dUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_getmantss_round_mask, "V4fV4fV4fIiV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loaddquhi128_mask, "V8sV8sC*V8sUc", "nV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddquhi256_mask, "V16sV16sC*V16sUs", "nV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddquqi128_mask, "V16cV16cC*V16cUs", "nV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddquqi256_mask, "V32cV32cC*V32cUi", "nV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_mask, "V2dV2dV2dV2OiIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_maskz, "V2dV2dV2dV2OiIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_mask, "V4dV4dV4dV4OiIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_maskz, "V4dV4dV4dV4OiIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmps128_mask, "V4fV4fV4fV4iIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmps128_maskz, "V4fV4fV4fV4iIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, "V8fV8fV8fV8iIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmps256_maskz, "V8fV8fV8fV8iIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadapd128_mask, "V2dV2dC*V2dUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V2dV2dC*V2dUc", "nV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4dC*V4dUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4fC*V4fUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V4fV4fC*V4fUc", "nV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8fC*V8fUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2OiV2OiC*V2OiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4OiV4OiC*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddqusi128_mask, "V4iV4iC*V4iUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddqusi256_mask, "V8iV8iC*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadupd128_mask, "V2dV2dC*V2dUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadupd256_mask, "V4dV4dC*V4dUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadups128_mask, "V4fV4fC*V4fUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadups256_mask, "V8fV8fC*V8fUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storedquhi512_mask, "vV32s*V32sUi", "nV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_storedquqi512_mask, "vV64c*V64cUOi", "nV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_storedquhi128_mask, "vV8s*V8sUc", "nV:128:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_storedquhi256_mask, "vV16s*V16sUs", "nV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, "vV16c*V16cUs", "nV:128:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_storedquqi256_mask, "vV32c*V32cUi", "nV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_storeapd128_mask, "vV2d*V2dUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV2d*V2dUc", "nV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storeapd256_mask, "vV4d*V4dUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storeaps128_mask, "vV4f*V4fUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV4f*V4fUc", "nV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storeaps256_mask, "vV8f*V8fUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, "vV2Oi*V2OiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, "vV4Oi*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storedqusi128_mask, "vV4i*V4iUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storedqusi256_mask, "vV8i*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storeupd128_mask, "vV2d*V2dUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storeupd256_mask, "vV4d*V4dUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storeups128_mask, "vV4f*V4fUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storeups256_mask, "vV8f*V8fUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rcp14pd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rcp14pd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rcp14ps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rcp14ps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vplzcntd_128, "V4iV4i", "ncV:128:", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vplzcntd_256, "V8iV8i", "ncV:256:", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vplzcntq_128, "V2OiV2Oi", "ncV:128:", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vplzcntq_256, "V4OiV4Oi", "ncV:256:", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtsd2si32, "iV2dIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi32, "UiV2dIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtss2si32, "iV4fIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtss2usi32, "UiV4fIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttsd2si32, "iV2dIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi32, "UiV2dIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttss2si32, "iV4fIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttss2usi32, "UiV4fIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermilpd512, "V8dV8dIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpermilps512, "V16fV16fIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512, "V8dV8dV8Oi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarps512, "V16fV16fV16i", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_rndscalesd_round_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rndscaless_round_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scalefpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_scalefps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_scalefsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scalefss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psradi512, "V16iV16ii", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psraqi512, "V8OiV8Oii", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psraq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psraq256, "V4OiV4OiV2Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psraqi128, "V2OiV2Oii", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psraqi256, "V4OiV4Oii", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pslld512, "V16iV16iV4i", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psllq512, "V8OiV8OiV2Oi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psllv16si, "V16iV16iV16i", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psllv8di, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrad512, "V16iV16iV4i", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psraq512, "V8OiV8OiV2Oi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrav16si, "V16iV16iV16i", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrav8di, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrld512, "V16iV16iV4i", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrlq512, "V8OiV8OiV2Oi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrlv16si, "V16iV16iV16i", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_psrlv8di, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pternlogd512_mask, "V16iV16iV16iV16iIiUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pternlogd512_maskz, "V16iV16iV16iV16iIiUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pternlogq512_mask, "V8OiV8OiV8OiV8OiIiUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pternlogq512_maskz, "V8OiV8OiV8OiV8OiIiUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pternlogd128_mask, "V4iV4iV4iV4iIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogd128_maskz, "V4iV4iV4iV4iIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogd256_mask, "V8iV8iV8iV8iIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogd256_maskz, "V8iV8iV8iV8iIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogq128_mask, "V2OiV2OiV2OiV2OiIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogq128_maskz, "V2OiV2OiV2OiV2OiIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogq256_mask, "V4OiV4OiV4OiV4OiIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogq256_maskz, "V4OiV4OiV4OiV4OiIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_shuf_f32x4, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_shuf_f64x2, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_shuf_i32x4, "V16iV16iV16iIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_shuf_i64x2, "V8OiV8OiV8OiIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_shufpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_shufps512, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_256, "V8fV8fV8fIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_256, "V4dV4dV4dIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_256, "V8iV8iV8iIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_shuf_i64x2_256, "V4OiV4OiV4OiIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_sqrtsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_sqrtss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14pd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14pd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14ps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14ps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtb2mask512, "UOiV64c", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2b512, "V64cUOi", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2w512, "V32sUi", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtd2mask512, "UsV16i", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2d512, "V16iUs", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2q512, "V8OiUc", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtq2mask512, "UcV8Oi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtb2mask128, "UsV16c", "ncV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtb2mask256, "UiV32c", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2b128, "V16cUs", "ncV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2b256, "V32cUi", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2w128, "V8sUc", "ncV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2w256, "V16sUs", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtd2mask128, "UcV4i", "ncV:128:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtd2mask256, "UcV8i", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2d128, "V4iUc", "ncV:128:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2d256, "V8iUc", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2q128, "V2OiUc", "ncV:128:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2q256, "V4OiUc", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtq2mask128, "UcV2Oi", "ncV:128:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtq2mask256, "UcV4Oi", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovsdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovsdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb512_mask, "V16cV8OiV16cUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb512mem_mask, "vV16c*V8OiUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd512_mask, "V8iV8OiV8iUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd512mem_mask, "vV8i*V8OiUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw512_mask, "V8sV8OiV8sUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw512mem_mask, "vV8s*V8OiUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovsdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovswb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovsdb256_mask, "V16cV8iV16cUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsdb256mem_mask, "vV16c*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovswb256mem_mask, "vV16c*V16sUs", "nV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovsdw128_mask, "V8sV4iV8sUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsdw128mem_mask, "vV8s*V4iUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsdw256_mask, "V8sV8iV8sUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsdw256mem_mask, "vV8s*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb128_mask, "V16cV2OiV16cUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb128mem_mask, "vV16c*V2OiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb256_mask, "V16cV4OiV16cUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb256mem_mask, "vV16c*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd128_mask, "V4iV2OiV4iUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd128mem_mask, "vV4i*V2OiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd256_mask, "V4iV4OiV4iUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd256mem_mask, "vV4i*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw128_mask, "V8sV2OiV8sUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw128mem_mask, "vV8s*V2OiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw256_mask, "V8sV4OiV8sUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw256mem_mask, "vV8s*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovusdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovuswb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovusdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovusdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb512_mask, "V16cV8OiV16cUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb512mem_mask, "vV16c*V8OiUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd512_mask, "V8iV8OiV8iUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd512mem_mask, "vV8i*V8OiUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw512_mask, "V8sV8OiV8sUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw512mem_mask, "vV8s*V8OiUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovusdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovuswb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovusdb256_mask, "V16cV8iV16cUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusdb256mem_mask, "vV16c*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovuswb256mem_mask, "vV16c*V16sUs", "nV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovusdw128_mask, "V8sV4iV8sUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusdw128mem_mask, "vV8s*V4iUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusdw256_mask, "V8sV8iV8sUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusdw256mem_mask, "vV8s*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb128_mask, "V16cV2OiV16cUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb128mem_mask, "vV16c*V2OiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb256_mask, "V16cV4OiV16cUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb256mem_mask, "vV16c*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd128_mask, "V4iV2OiV4iUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd128mem_mask, "vV4i*V2OiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd256_mask, "V4iV4OiV4iUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd256mem_mask, "vV4i*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw128_mask, "V8sV2OiV8sUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw128mem_mask, "vV8s*V2OiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4OiV8sUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8OiV16cUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8OiUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovqd512_mask, "V8iV8OiV8iUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8OiUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8OiV8sUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8OiUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovwb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovdb256_mask, "V16cV8iV16cUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovdb256mem_mask, "vV16c*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovwb256mem_mask, "vV16c*V16sUs", "nV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovdw128_mask, "V8sV4iV8sUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovdw128mem_mask, "vV8s*V4iUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovdw256_mask, "V8sV8iV8sUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovdw256mem_mask, "vV8s*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqb128_mask, "V16cV2OiV16cUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqb128mem_mask, "vV16c*V2OiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqb256_mask, "V16cV4OiV16cUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqb256mem_mask, "vV16c*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqd128_mask, "V4iV2OiV4iUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqd128mem_mask, "vV4i*V2OiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqd256mem_mask, "vV4i*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqw128_mask, "V8sV2OiV8sUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2OiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4OiV8sUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf32x8_mask, "V8fV16fIiV8fUc", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_extractf64x2_512_mask, "V2dV8dIiV2dUc", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_extracti32x8_mask, "V8iV16iIiV8iUc", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_extracti64x2_512_mask, "V2OiV8OiIiV2OiUc", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_extracti32x4_mask, "V4iV16iIiV4iUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_extracti64x4_mask, "V4OiV8OiIiV4OiUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_extractf64x2_256_mask, "V2dV4dIiV2dUc", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extracti64x2_256_mask, "V2OiV4OiIiV2OiUc", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf32x4_256_mask, "V4fV8fIiV4fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extracti32x4_256_mask, "V4iV8iIiV4iUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_insertf32x8, "V16fV16fV8fIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_insertf64x2_512, "V8dV8dV2dIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_inserti32x8, "V16iV16iV8iIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_inserti64x2_512, "V8OiV8OiV2OiIi", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_insertf64x4, "V8dV8dV4dIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_inserti64x4, "V8OiV8OiV4OiIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_insertf64x2_256, "V4dV4dV2dIi", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_inserti64x2_256, "V4OiV4OiV2OiIi", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_insertf32x4_256, "V8fV8fV4fIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_inserti32x4_256, "V8iV8iV4iIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_insertf32x4, "V16fV16fV4fIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_inserti32x4, "V16iV16iV4iIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_getmantpd128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getmantpd256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getmantps128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getmantps256_mask, "V8fV8fIiV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getmantpd512_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_getmantps512_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_getexppd512_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_getexpps512_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask,  "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddss3_maskz, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask,  "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_maskz, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask3, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmsubss3_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permdf512, "V8dV8dIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_permdi512, "V8OiV8OiIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_permvarhi512, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_permvardf512, "V8dV8dV8Oi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_permvardi512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_permvarsf512, "V16fV16fV16i", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_permvarsi512, "V16iV16iV16i", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_permvarqi512, "V64cV64cV64c", "ncV:512:", "avx512vbmi,evex512")
-TARGET_BUILTIN(__builtin_ia32_permvarqi128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvarqi256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvarhi128, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvarhi256, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvardf256, "V4dV4dV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvardi256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc", "ncV:128:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc", "ncV:128:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fpclassps256_mask, "UcV8fIiUc", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fpclassps512_mask, "UsV16fIiUs", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "ncV:512:", "avx512dq,evex512")
-TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "ncV:128:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_kaddqi, "UcUcUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_kaddhi, "UsUsUs", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_kaddsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kadddi, "UOiUOiUOi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kanddi, "UOiUOiUOi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kandnqi, "UcUcUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kandnsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kandndi, "UOiUOiUOi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_korqi, "UcUcUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kordi, "UOiUOiUOi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kortestcqi, "iUcUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_kortestzqi, "iUcUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kortestcsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iUOiUOi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iUOiUOi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ktestcqi, "iUcUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_ktestzqi, "iUcUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_ktestchi, "iUsUs", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_ktestzhi, "iUsUs", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_ktestcsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ktestzsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iUOiUOi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iUOiUOi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kxnorsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kxnordi, "UOiUOiUOi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kxorqi, "UcUcUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kxordi, "UOiUOiUOi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftliqi, "UcUcIUi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_kshiftlihi, "UsUsIUi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kshiftlisi, "UiUiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "UOiUOiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftriqi, "UcUcIUi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_kshiftrihi, "UsUsIUi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kshiftrisi, "UiUiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftridi, "UOiUOiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kmovb, "UcUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_kmovw, "UsUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kmovd, "UiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kmovq, "UOiUOi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_dbpsadbw512, "V32sV64cV64cIi", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8OiV64cV64c", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8OiV8OiV8OiUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_compresshi512_mask, "V32sV32sV32sUi", "ncV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_compressqi512_mask, "V64cV64cV64cUOi", "ncV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pshufd512, "V16iV16iIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8OiV8OiV8OiUc", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_expandhi512_mask, "V32sV32sV32sUi", "ncV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_expandqi512_mask, "V64cV64cV64cUOi", "ncV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8OiV8OiC*V8OiUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_expandloadhi512_mask, "V32sV32sC*V32sUi", "nV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_expandloadqi512_mask, "V64cV64cC*V64cUOi", "nV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_expandloadsf512_mask, "V16fV16fC*V16fUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_expandloadsi512_mask, "V16iV16iC*V16iUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8Oi*V8OiUc", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_compressstorehi512_mask, "vV32s*V32sUi", "nV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_compressstoreqi512_mask, "vV64c*V64cUOi", "nV:512:", "avx512vbmi2,evex512")
-TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_compressstoresi512_mask, "vV16i*V16iUs", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2ps_mask, "V4fV8sV4fUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256_mask, "V8fV8sV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2ph_mask, "V8sV4fIiV8sUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256_mask, "V8sV8fIiV8sUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtw2mask512, "UiV32s", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtw2mask128, "UcV8s", "ncV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtw2mask256, "UsV16s", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_round_mask, "V4fV4fV2dV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtsi2ss32, "V4fV4fiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtss2sd_round_mask, "V2dV2dV4fV2dUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512, "V64cV64cV64c", "ncV:512:", "avx512vbmi,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl")
-
-// bf16 intrinsics
-TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_128, "V8yV4fV4f", "ncV:128:", "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_256, "V16yV8fV8f", "ncV:256:", "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_512, "V32yV16fV16f", "ncV:512:", "avx512bf16,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_128_mask, "V8yV4fV8yUc", "ncV:128:", "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_256_mask, "V8yV8fV8yUc", "ncV:256:", "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_512_mask, "V16yV16fV16yUs", "ncV:512:", "avx512bf16,evex512")
-TARGET_BUILTIN(__builtin_ia32_dpbf16ps_128, "V4fV4fV8yV8y", "ncV:128:", "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_dpbf16ps_256, "V8fV8fV16yV16y", "ncV:256:", "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_dpbf16ps_512, "V16fV16fV32yV32y", "ncV:512:", "avx512bf16,evex512")
-TARGET_BUILTIN(__builtin_ia32_cvtsbf162ss_32, "fy", "nc", "avx512bf16")
-
-TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_512, "vV8OiV8OiUc*Uc*", "nV:512:", "avx512vp2intersect,evex512")
-TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_256, "vV4OiV4OiUc*Uc*", "nV:256:", "avx512vp2intersect,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_128, "vV2OiV2OiUc*Uc*", "nV:128:", "avx512vp2intersect,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_512, "vV16iV16iUs*Us*", "nV:512:", "avx512vp2intersect,evex512")
-TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_256, "vV8iV8iUc*Uc*", "nV:256:", "avx512vp2intersect,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_128, "vV4iV4iUc*Uc*", "nV:128:", "avx512vp2intersect,avx512vl")
-
-// AVX512 fp16 intrinsics
-TARGET_BUILTIN(__builtin_ia32_vcomish,       "iV8xV8xIiIi",    "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_addph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_subph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_mulph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_divph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_maxph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_minph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_minph256,      "V16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_minph128,      "V8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_maxph256,      "V16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_maxph128,      "V8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_addsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_divsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_mulsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_subsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_maxsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_minsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_cmpph512_mask, "UiV32xV32xIiUiIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_cmpph256_mask, "UsV16xV16xIiUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmpph128_mask, "UcV8xV8xIiUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmpsh_mask, "UcV8xV8xIiUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8xC*V8xUc", "nV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_storesh128_mask, "vV8x*V8xUc", "nV:128:", "avx512fp16")
-
-TARGET_BUILTIN(__builtin_ia32_rcpph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rcpph256_mask, "V16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rcpph512_mask, "V32xV32xV32xUi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_rsqrtph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rsqrtph256_mask, "V16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rsqrtph512_mask, "V32xV32xV32xUi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_getmantph128_mask, "V8xV8xIiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getmantph256_mask, "V16xV16xIiV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getmantph512_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_getexpph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getexpph256_mask, "V16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getexpph512_mask, "V32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_scalefph128_mask, "V8xV8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scalefph256_mask, "V16xV16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scalefph512_mask, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_rndscaleph_128_mask, "V8xV8xIiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rndscaleph_256_mask, "V16xV16xIiV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rndscaleph_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_reduceph128_mask, "V8xV8xIiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_reduceph256_mask, "V16xV16xIiV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_reduceph512_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_rcpsh_mask, "V8xV8xV8xV8xUc", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_rsqrtsh_mask, "V8xV8xV8xV8xUc", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_getmantsh_round_mask, "V8xV8xV8xIiV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_getexpsh128_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_scalefsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_rndscalesh_round_mask, "V8xV8xV8xV8xUcIiIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_reducesh_mask, "V8xV8xV8xV8xUcIiIi", "ncV:128:", "avx512fp16")
-
-TARGET_BUILTIN(__builtin_ia32_sqrtph, "V8xV8x", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_sqrtph256, "V16xV16x", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_sqrtph512, "V32xV32xIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_sqrtsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_fpclassph128_mask, "UcV8xIiUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fpclassph256_mask, "UsV16xIiUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fpclassph512_mask, "UiV32xIiUi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_fpclasssh_mask, "UcV8xIiUc", "ncV:128:", "avx512fp16")
-
-TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph128_mask, "V8xV2dV8xUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph256_mask, "V8xV4dV8xUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph512_mask, "V8xV8dV8xUcIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2pd128_mask, "V2dV8xV2dUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2pd256_mask, "V4dV8xV4dUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2pd512_mask, "V8dV8xV8dUcIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtsh2ss_round_mask, "V4fV4fV8xV4fUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vcvtss2sh_round_mask, "V8xV8xV4fV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vcvtsd2sh_round_mask, "V8xV8xV2dV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vcvtsh2sd_round_mask, "V2dV2dV8xV2dUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2w128_mask, "V8sV8xV8sUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2w256_mask, "V16sV16xV16sUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2w128_mask, "V8sV8xV8sUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2w256_mask, "V16sV16xV16sUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtw2ph128_mask, "V8xV8sV8xUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtw2ph256_mask, "V16xV16sV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtw2ph512_mask, "V32xV32sV32xUiIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2uw128_mask, "V8UsV8xV8UsUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2uw256_mask, "V16UsV16xV16UsUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2uw128_mask, "V8UsV8xV8UsUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2uw256_mask, "V16UsV16xV16UsUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph128_mask, "V8xV8UsV8xUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph256_mask, "V16xV16UsV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph512_mask, "V32xV32UsV32xUiIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2dq128_mask, "V4iV8xV4iUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2dq256_mask, "V8iV8xV8iUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2udq128_mask, "V4UiV8xV4UiUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2udq256_mask, "V8UiV8xV8UiUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph128_mask, "V8xV4iV8xUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph256_mask, "V8xV8iV8xUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph512_mask, "V16xV16iV16xUsIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph128_mask, "V8xV4UiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph256_mask, "V8xV8UiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph512_mask, "V16xV16UiV16xUsIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2dq128_mask, "V4iV8xV4iUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2dq256_mask, "V8iV8xV8iUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2udq128_mask, "V4UiV8xV4UiUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2udq256_mask, "V8UiV8xV8UiUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph128_mask, "V8xV2OiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph256_mask, "V8xV4OiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph512_mask, "V8xV8OiV8xUcIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2qq128_mask, "V2OiV8xV2OiUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2qq256_mask, "V4OiV8xV4OiUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph128_mask, "V8xV2UOiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph256_mask, "V8xV4UOiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph512_mask, "V8xV8UOiV8xUcIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq128_mask, "V2UOiV8xV2UOiUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq256_mask, "V4UOiV8xV4UOiUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2qq128_mask, "V2OiV8xV2OiUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2qq256_mask, "V4OiV8xV4OiUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq128_mask, "V2UOiV8xV2UOiUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq256_mask, "V4UOiV8xV4UOiUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtsh2si32, "iV8xIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vcvtsh2usi32, "UiV8xIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vcvtusi2sh, "V8xV8xUiIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vcvtsi2sh, "V8xV8xiIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vcvttsh2si32, "iV8xIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi32, "UiV8xIi", "ncV:128:", "avx512fp16")
-
-TARGET_BUILTIN(__builtin_ia32_vcvtph2psx128_mask, "V4fV8xV4fUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2psx256_mask, "V8fV8xV8fUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2psx512_mask, "V16fV16xV16fUsIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2phx128_mask, "V8xV4fV8xUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2phx256_mask, "V8xV8fV8xUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2phx512_mask, "V16xV16fV16xUsIi", "ncV:512:", "avx512fp16,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_vfmaddph, "V8xV8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfmaddph256, "V16xV16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfmaddph512_mask,  "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddph512_maskz, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubph, "V8xV8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubph256, "V16xV16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubph512_mask, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubph512_maskz, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmsubph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_vfmaddsh3_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsh3_maskz, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsh3_mask3, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfmsubsh3_mask3, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
-
-TARGET_BUILTIN(__builtin_ia32_vfmaddcph128_mask,  "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcph128_maskz,  "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_mask,  "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_maskz,  "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcph512_mask,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcph512_maskz,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcph512_mask3,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcph128_mask,  "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcph128_maskz,  "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_mask,  "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_maskz,  "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcph512_mask,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcph512_maskz,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcph512_mask3,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_mask,   "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_maskz,   "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_mask,  "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_maskz,  "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_round_mask,  "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_round_mask3,  "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_round_mask,  "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_round_mask3,  "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
-
-TARGET_BUILTIN(__builtin_ia32_vfmulcsh_mask,   "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfcmulcsh_mask,  "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfmulcph128_mask,  "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfmulcph256_mask,  "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfmulcph512_mask,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_vfcmulcph128_mask,  "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfcmulcph256_mask,  "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfcmulcph512_mask,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
-
-// generic select intrinsics
-TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectb_512, "V64cUOiV64cV64c", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_selectw_128, "V8sUcV8sV8s", "ncV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectw_256, "V16sUsV16sV16s", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectw_512, "V32sUiV32sV32s", "ncV:512:", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_selectd_128, "V4iUcV4iV4i", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectd_256, "V8iUcV8iV8i", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectd_512, "V16iUsV16iV16i", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_selectph_128, "V8xUcV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectph_256, "V16xUsV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectph_512, "V32xUiV32xV32x", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_selectpbf_128, "V8yUcV8yV8y", "ncV:128:", "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectpbf_256, "V16yUsV16yV16y", "ncV:256:", "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectpbf_512, "V32yUiV32yV32y", "ncV:512:", "avx512bf16,evex512")
-TARGET_BUILTIN(__builtin_ia32_selectq_128, "V2OiUcV2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectq_256, "V4OiUcV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectq_512, "V8OiUcV8OiV8Oi", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_selectps_128, "V4fUcV4fV4f", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectps_256, "V8fUcV8fV8f", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectps_512, "V16fUsV16fV16f", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_selectpd_128, "V2dUcV2dV2d", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectpd_256, "V4dUcV4dV4d", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectpd_512, "V8dUcV8dV8d", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_selectsh_128, "V8xUcV8xV8x", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_selectsbf_128, "V8yUcV8yV8y", "ncV:128:", "avx512bf16")
-TARGET_BUILTIN(__builtin_ia32_selectss_128, "V4fUcV4fV4f", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_selectsd_128, "V2dUcV2dV2d", "ncV:128:", "avx512f")
-
-// generic reduction intrinsics
-TARGET_BUILTIN(__builtin_ia32_reduce_fadd_pd512, "ddV8d", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ps512, "ffV16f", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph512, "xxV32x", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph256, "xxV16x", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph128, "xxV8x", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmax_pd512, "dV8d", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ps512, "fV16f", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph512, "xV32x", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph256, "xV16x", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph128, "xV8x", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmin_pd512, "dV8d", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ps512, "fV16f", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph512, "xV32x", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph256, "xV16x", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph128, "xV8x", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmul_pd512, "ddV8d", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ps512, "ffV16f", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph512, "xxV32x", "ncV:512:", "avx512fp16,evex512")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph256, "xxV16x", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph128, "xxV8x", "ncV:128:", "avx512fp16,avx512vl")
-
-// MONITORX/MWAITX
-TARGET_BUILTIN(__builtin_ia32_monitorx, "vvC*UiUi", "n", "mwaitx")
-TARGET_BUILTIN(__builtin_ia32_mwaitx, "vUiUiUi", "n", "mwaitx")
-
-// WAITPKG
-TARGET_BUILTIN(__builtin_ia32_umonitor, "vvC*", "n", "waitpkg")
-TARGET_BUILTIN(__builtin_ia32_umwait, "UcUiUiUi", "n", "waitpkg")
-TARGET_BUILTIN(__builtin_ia32_tpause, "UcUiUiUi", "n", "waitpkg")
-
-// CLZERO
-TARGET_BUILTIN(__builtin_ia32_clzero, "vv*", "n", "clzero")
-
-// CLDEMOTE
-TARGET_BUILTIN(__builtin_ia32_cldemote, "vvC*", "n", "cldemote")
-
-// Direct Move
-TARGET_BUILTIN(__builtin_ia32_directstore_u32, "vUi*Ui", "n", "movdiri")
-TARGET_BUILTIN(__builtin_ia32_movdir64b, "vv*vC*", "n", "movdir64b")
-
-// PTWRITE
-TARGET_BUILTIN(__builtin_ia32_ptwrite32, "vUi", "n", "ptwrite")
-
-// INVPCID
-TARGET_BUILTIN(__builtin_ia32_invpcid, "vUiv*", "nc", "invpcid")
-
-// ENQCMD
-TARGET_BUILTIN(__builtin_ia32_enqcmd, "Ucv*vC*", "n", "enqcmd")
-TARGET_BUILTIN(__builtin_ia32_enqcmds, "Ucv*vC*", "n", "enqcmd")
-
-// KEY LOCKER
-TARGET_BUILTIN(__builtin_ia32_loadiwkey, "vV2OiV2OiV2OiUi", "nV:128:", "kl")
-TARGET_BUILTIN(__builtin_ia32_encodekey128_u32, "UiUiV2Oiv*", "nV:128:", "kl")
-TARGET_BUILTIN(__builtin_ia32_encodekey256_u32, "UiUiV2OiV2Oiv*", "nV:128:", "kl")
-TARGET_BUILTIN(__builtin_ia32_aesenc128kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
-TARGET_BUILTIN(__builtin_ia32_aesenc256kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
-TARGET_BUILTIN(__builtin_ia32_aesdec128kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
-TARGET_BUILTIN(__builtin_ia32_aesdec256kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
-TARGET_BUILTIN(__builtin_ia32_aesencwide128kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")
-TARGET_BUILTIN(__builtin_ia32_aesencwide256kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")
-TARGET_BUILTIN(__builtin_ia32_aesdecwide128kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")
-TARGET_BUILTIN(__builtin_ia32_aesdecwide256kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")
-
-// SERIALIZE
-TARGET_BUILTIN(__builtin_ia32_serialize, "v", "n", "serialize")
-
-// TSXLDTRK
-TARGET_BUILTIN(__builtin_ia32_xsusldtrk, "v", "n", "tsxldtrk")
-TARGET_BUILTIN(__builtin_ia32_xresldtrk, "v", "n", "tsxldtrk")
-
-// RAO-INT
-TARGET_BUILTIN(__builtin_ia32_aadd32, "vv*Si", "n", "raoint")
-TARGET_BUILTIN(__builtin_ia32_aand32, "vv*Si", "n", "raoint")
-TARGET_BUILTIN(__builtin_ia32_aor32, "vv*Si", "n", "raoint")
-TARGET_BUILTIN(__builtin_ia32_axor32, "vv*Si", "n", "raoint")
-
-// MSVC
-TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-
-TARGET_HEADER_BUILTIN(_ReadWriteBarrier, "v", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_ReadBarrier,      "v", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_WriteBarrier,     "v", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-
-TARGET_HEADER_BUILTIN(__cpuid,   "vi*i",  "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__cpuidex, "vi*ii", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-
-TARGET_HEADER_BUILTIN(__emul,  "LLiii",    "nch", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__emulu, "ULLiUiUi", "nch", INTRIN_H, ALL_MS_LANGUAGES, "")
-
-TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-
-TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__int2c, "v",       "nhr", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__ud2,   "v",       "nhr", INTRIN_H, ALL_MS_LANGUAGES, "")
-
-TARGET_HEADER_BUILTIN(__readfsbyte,  "UcUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__readfsword,  "UsUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__readfsdword, "UNiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__readfsqword, "ULLiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-
-TARGET_HEADER_BUILTIN(__readgsbyte,  "UcUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__readgsword,  "UsUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__readgsdword, "UNiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(__readgsqword, "ULLiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-
-// AVX10.2 VNNI FP16
-TARGET_BUILTIN(__builtin_ia32_vdpphps128, "V4fV4fV8xV8x", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vdpphps256, "V8fV8fV16xV16x", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vdpphps512, "V16fV16fV32xV32x", "ncV:512:", "avx10.2-512")
-
-// AVX10.2 VNNI INT8
-TARGET_BUILTIN(__builtin_ia32_vpdpbssd512, "V16iV16iV16iV16i", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vpdpbssds512, "V16iV16iV16iV16i", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vpdpbsud512, "V16iV16iV16iV16i", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vpdpbsuds512, "V16iV16iV16iV16i", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vpdpbuud512, "V16iV16iV16iV16i", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vpdpbuuds512, "V16iV16iV16iV16i", "ncV:512:", "avx10.2-512")
-
-// AVX10.2 VNNI INT16
-TARGET_BUILTIN(__builtin_ia32_vpdpwsud512, "V16iV16iV16iV16i", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vpdpwsuds512, "V16iV16iV16iV16i", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vpdpwusd512, "V16iV16iV16iV16i", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vpdpwusds512, "V16iV16iV16iV16i", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vpdpwuud512, "V16iV16iV16iV16i", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vpdpwuuds512, "V16iV16iV16iV16i", "nV:512:", "avx10.2-512")
-
-// AVX10.2 VMPSADBW
-TARGET_BUILTIN(__builtin_ia32_mpsadbw512, "V32sV64cV64cIc", "ncV:512:", "avx10.2-512")
-
-// AVX10.2 YMM Rounding
-TARGET_BUILTIN(__builtin_ia32_vaddpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vaddph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vaddps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcmppd256_round_mask, "UcV4dV4dIiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcmpph256_round_mask, "UsV16xV16xIiUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcmpps256_round_mask, "UcV8fV8fIiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph256_round_mask, "V8xV8iV8xUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtdq2ps256_round_mask, "V8fV8iV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtpd2dq256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph256_round_mask, "V8xV4dV8xUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtpd2ps256_round_mask, "V4fV4dV4fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtpd2qq256_round_mask, "V4LLiV4dV4LLiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtpd2udq256_round_mask, "V4UiV4dV4UiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtpd2uqq256_round_mask, "V4ULLiV4dV4ULLiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2dq256_round_mask, "V8iV8xV8iUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2pd256_round_mask, "V4dV8xV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2psx256_round_mask, "V8fV8xV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2qq256_round_mask, "V4LLiV8xV4LLiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2udq256_round_mask, "V8UiV8xV8UiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq256_round_mask, "V4ULLiV8xV4ULLiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2uw256_round_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2w256_round_mask, "V16sV16xV16sUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2dq256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2pd256_round_mask, "V4dV4fV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2phx256_round_mask, "V8xV8fV8xUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2qq256_round_mask, "V4LLiV4fV4LLiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2udq256_round_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2uqq256_round_mask, "V4ULLiV4fV4ULLiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtqq2pd256_round_mask, "V4dV4LLiV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph256_round_mask, "V8xV4LLiV8xUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtqq2ps256_round_mask, "V4fV4LLiV4fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2dq256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2qq256_round_mask, "V4LLiV4dV4LLiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2udq256_round_mask, "V4UiV4dV4UiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqq256_round_mask, "V4ULLiV4dV4ULLiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2dq256_round_mask, "V8iV8xV8iUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2qq256_round_mask, "V4LLiV8xV4LLiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2udq256_round_mask, "V8UiV8xV8UiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq256_round_mask, "V4ULLiV8xV4ULLiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2uw256_round_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2w256_round_mask, "V16sV16xV16sUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2dq256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2qq256_round_mask, "V4LLiV4fV4LLiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2udq256_round_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2uqq256_round_mask, "V4ULLiV4fV4ULLiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph256_round_mask, "V8xV8UiV8xUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtudq2ps256_round_mask, "V8fV8UiV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtuqq2pd256_round_mask, "V4dV4ULLiV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph256_round_mask, "V8xV4ULLiV8xUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ps256_round_mask, "V4fV4ULLiV4fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph256_round_mask, "V16xV16UsV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtw2ph256_round_mask, "V16xV16sV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vdivpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vdivph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vdivps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_round_maskz, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfcmulcph256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfixupimmpd256_round_mask, "V4dV4dV4dV4LLiIiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfixupimmpd256_round_maskz, "V4dV4dV4dV4LLiIiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfixupimmps256_round_mask, "V8fV8fV8fV8iIiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfixupimmps256_round_maskz, "V8fV8fV8fV8iIiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd256_round_mask, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd256_round_maskz, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd256_round_mask3, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddph256_round_mask, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddph256_round_maskz, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddph256_round_mask3, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps256_round_maskz, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_round_maskz, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256_round_mask, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256_round_maskz, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256_round_mask3, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubph256_round_mask, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubph256_round_maskz, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubph256_round_mask3, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256_round_maskz, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmsubpd256_round_mask3, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmsubph256_round_mask3, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmsubps256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd256_round_mask3, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddph256_round_mask3, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddps256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmulcph256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vgetexppd256_round_mask, "V4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vgetexpph256_round_mask, "V16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vgetexpps256_round_mask, "V8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vgetmantpd256_round_mask, "V4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vgetmantph256_round_mask, "V16xV16xIiV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vgetmantps256_round_mask, "V8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vmaxpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vmaxph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vmaxps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vminpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vminph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vminps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vmulpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vmulph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vmulps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vrangepd256_round_mask, "V4dV4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vrangeps256_round_mask, "V8fV8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vreducepd256_round_mask, "V4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vreduceph256_round_mask, "V16xV16xIiV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vreduceps256_round_mask, "V8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vrndscalepd256_round_mask, "V4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vrndscaleph256_round_mask, "V16xV16xIiV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vrndscaleps256_round_mask, "V8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vscalefpd256_round_mask, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vscalefph256_round_mask, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vscalefps256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vsqrtpd256_round, "V4dV4dIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vsqrtph256_round, "V16xV16xIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vsqrtps256_round, "V8fV8fIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vsubpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vsubph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vsubps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
-
-// AVX-VNNI-INT16
-TARGET_BUILTIN(__builtin_ia32_vpdpwsud128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpwsud256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpwsuds128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpwsuds256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpwusd128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpwusd256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpwusds128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpwusds256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpwuud128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpwuud256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpwuuds128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vpdpwuuds256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256")
-
-// AVX10.2 SATCVT-DS
-TARGET_BUILTIN(__builtin_ia32_vcvttsd2sis32, "iV2dIi", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttsd2usis32, "UiV2dIi", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttss2sis32, "iV4fIi", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttss2usis32, "UiV4fIi", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs128_mask, "V4iV2dV4iUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs512_round_mask, "V8iV8dV8iUcIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs128_mask, "V4iV2dV4iUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs512_round_mask, "V8iV8dV8iUcIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs128_mask,  "V2OiV2dV2OiUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs256_round_mask, "V4OiV4dV4OiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs512_round_mask, "V8OiV8dV8OiUcIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs128_mask, "V2OiV2dV2OiUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs256_round_mask, "V4OiV4dV4OiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs512_round_mask, "V8OiV8dV8OiUcIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs128_mask, "V4iV4fV4iUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs512_round_mask, "V16iV16fV16iUsIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs128_mask, "V4iV4fV4iUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs512_round_mask, "V16iV16fV16iUsIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs128_mask, "V2OiV4fV2OiUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs256_round_mask, "V4OiV4fV4OiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs512_round_mask, "V8OiV8fV8OiUcIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs128_mask, "V2OiV4fV2OiUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs256_round_mask, "V4OiV4fV4OiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs512_round_mask, "V8OiV8fV8OiUcIi", "nV:512:", "avx10.2-512")
-
-// AVX-NE-CONVERT
-TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps128, "V4fyC*", "nV:128:", "avxneconvert")
-TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps256, "V8fyC*", "nV:256:", "avxneconvert")
-TARGET_BUILTIN(__builtin_ia32_vbcstnesh2ps128, "V4fxC*", "nV:128:", "avxneconvert")
-TARGET_BUILTIN(__builtin_ia32_vbcstnesh2ps256, "V8fxC*", "nV:256:", "avxneconvert")
-TARGET_BUILTIN(__builtin_ia32_vcvtneebf162ps128, "V4fV8yC*", "nV:128:", "avxneconvert")
-TARGET_BUILTIN(__builtin_ia32_vcvtneebf162ps256, "V8fV16yC*", "nV:256:", "avxneconvert")
-TARGET_BUILTIN(__builtin_ia32_vcvtneeph2ps128, "V4fV8xC*", "nV:128:", "avxneconvert")
-TARGET_BUILTIN(__builtin_ia32_vcvtneeph2ps256, "V8fV16xC*", "nV:256:", "avxneconvert")
-TARGET_BUILTIN(__builtin_ia32_vcvtneobf162ps128, "V4fV8yC*", "nV:128:", "avxneconvert")
-TARGET_BUILTIN(__builtin_ia32_vcvtneobf162ps256, "V8fV16yC*", "nV:256:", "avxneconvert")
-TARGET_BUILTIN(__builtin_ia32_vcvtneoph2ps128, "V4fV8xC*", "nV:128:", "avxneconvert")
-TARGET_BUILTIN(__builtin_ia32_vcvtneoph2ps256, "V8fV16xC*", "nV:256:", "avxneconvert")
-TARGET_BUILTIN(__builtin_ia32_vcvtneps2bf16128, "V8yV4f", "nV:128:", "avx512bf16,avx512vl|avxneconvert")
-TARGET_BUILTIN(__builtin_ia32_vcvtneps2bf16256, "V8yV8f", "nV:256:", "avx512bf16,avx512vl|avxneconvert")
-
-// SHA512
-TARGET_BUILTIN(__builtin_ia32_vsha512msg1, "V4ULLiV4ULLiV2ULLi", "nV:256:", "sha512")
-TARGET_BUILTIN(__builtin_ia32_vsha512msg2, "V4ULLiV4ULLiV4ULLi", "nV:256:", "sha512")
-TARGET_BUILTIN(__builtin_ia32_vsha512rnds2, "V4ULLiV4ULLiV4ULLiV2ULLi", "nV:256:", "sha512")
-
-TARGET_HEADER_BUILTIN(_InterlockedAnd64,         "WiWiD*Wi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_InterlockedDecrement64,   "WiWiD*",   "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_InterlockedExchange64,    "WiWiD*Wi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd64, "WiWiD*Wi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_InterlockedExchangeSub64, "WiWiD*Wi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_InterlockedIncrement64,   "WiWiD*",   "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_InterlockedOr64,          "WiWiD*Wi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-TARGET_HEADER_BUILTIN(_InterlockedXor64,         "WiWiD*Wi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-
-// SM3
-TARGET_BUILTIN(__builtin_ia32_vsm3msg1, "V4UiV4UiV4UiV4Ui", "nV:128:", "sm3")
-TARGET_BUILTIN(__builtin_ia32_vsm3msg2, "V4UiV4UiV4UiV4Ui", "nV:128:", "sm3")
-TARGET_BUILTIN(__builtin_ia32_vsm3rnds2, "V4UiV4UiV4UiV4UiIUi", "nV:128:", "sm3")
-
-// SM4
-TARGET_BUILTIN(__builtin_ia32_vsm4key4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
-TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
-TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
-TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
-
-// SM4_EVEX
-TARGET_BUILTIN(__builtin_ia32_vsm4key4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
-TARGET_BUILTIN(__builtin_ia32_vsm4rnds4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
-
-// AVX10 MINMAX
-TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16512, "V32yV32yV32yIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vminmaxpd128_mask, "V2dV2dV2dIiV2dUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vminmaxpd256_round_mask, "V4dV4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vminmaxpd512_round_mask, "V8dV8dV8dIiV8dUcIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vminmaxph128_mask, "V8xV8xV8xIiV8xUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vminmaxph256_round_mask, "V16xV16xV16xIiV16xUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vminmaxph512_round_mask, "V32xV32xV32xIiV32xUiIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vminmaxps128_mask, "V4fV4fV4fIiV4fUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vminmaxps256_round_mask, "V8fV8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vminmaxps512_round_mask, "V16fV16fV16fIiV16fUsIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vminmaxsd_round_mask, "V2dV2dV2dIiV2dUcIi", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vminmaxsh_round_mask, "V8xV8xV8xIiV8xUcIi", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vminmaxss_round_mask, "V4fV4fV4fIiV4fUcIi", "nV:128:", "avx10.2-256")
-
-// AVX10.2 SATCVT
-TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs128, "V8UsV8y", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs256, "V16UsV16y", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs512, "V32UsV32y", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs128, "V8UsV8y", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs256, "V16UsV16y", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs512, "V32UsV32y", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs128, "V8UsV8y", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs256, "V16UsV16y", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs512, "V32UsV32y", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs128, "V8UsV8y", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs256, "V16UsV16y", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs512, "V32UsV32y", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
-
-// AVX10.2 CONVERT
-TARGET_BUILTIN(__builtin_ia32_vcvt2ps2phx128_mask, "V8xV4fV4fV8xUc", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvt2ps2phx256_mask, "V16xV8fV8fV16xUsIi", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvt2ps2phx512_mask, "V32xV16fV16fV32xUiIi", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8s_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8s_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8s_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8s_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8s_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8s_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8_128, "V16cV8xV8x", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8_256, "V32cV16xV16x", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8_512, "V64cV32xV32x", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8s_128, "V16cV8xV8x", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8s_256, "V32cV16xV16x", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8s_512, "V64cV32xV32x", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8_128, "V16cV8xV8x", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8_256, "V32cV16xV16x", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8_512, "V64cV32xV32x", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8s_128, "V16cV8xV8x", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8s_256, "V32cV16xV16x", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8s_512, "V64cV32xV32x", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvthf8_2ph128_mask, "V8xV16cV8xUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvthf8_2ph256_mask, "V16xV16cV16xUs", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvthf8_2ph512_mask, "V32xV32cV32xUi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8s_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8s_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8s_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512")
-
-// AVX10.2 BF16
-TARGET_BUILTIN(__builtin_ia32_loadsbf16128_mask, "V8yV8yC*V8yUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_storesbf16128_mask, "vV8y*V8yUc", "nV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vaddnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vaddnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vaddnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vdivnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vdivnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vdivnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vmaxpbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vmaxpbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vmaxpbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vminpbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vminpbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vminpbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vmulnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vmulnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vmulnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vsubnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vsubnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vsubnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcomsbf16eq, "iV8yV8y", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcomsbf16lt, "iV8yV8y", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcomsbf16neq, "iV8yV8y", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcomsbf16ge, "iV8yV8y", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcomsbf16gt, "iV8yV8y", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcomsbf16le, "iV8yV8y", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcmppbf16512_mask,"UiV32yV32yIiUi", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcmppbf16256_mask,"UsV16yV16yIiUs", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcmppbf16128_mask,"UcV8yV8yIiUc", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16128_mask, "UcV8yIiUc", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16256_mask, "UsV16yIiUs", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16512_mask, "UiV32yIiUi", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vscalefpbf16128_mask, "V8yV8yV8yV8yUc", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vscalefpbf16256_mask, "V16yV16yV16yV16yUs", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vscalefpbf16512_mask, "V32yV32yV32yV32yUi", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vrcppbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vrcppbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vrcppbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vgetexppbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vgetexppbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vgetexppbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vreducenepbf16128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vreducenepbf16256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vreducenepbf16512_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16512_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16, "V8yV8y", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16256, "V16yV16y", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16512, "V32yV32y", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh512, "V32yV32yV32yV32y", "ncV:512:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh256, "V16yV16yV16yV16y", "ncV:256:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh128, "V8yV8yV8yV8y", "ncV:128:", "avx10.2-256")
-
-#undef BUILTIN
-#undef TARGET_BUILTIN
-#undef TARGET_HEADER_BUILTIN
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index cf8d2771310e3c..be2802f3908ff3 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -17,6 +17,21 @@ class X86Builtin<string prototype> : TargetBuiltin {
   let Prototype = prototype;
 }
 
+class X86NoPrefixBuiltin<string prototype> : TargetBuiltin {
+  let Spellings = [NAME];
+  let Prototype = prototype;
+}
+
+class X86LibBuiltin<string prototype> : TargetLibBuiltin {
+  let Spellings = [NAME];
+  let Prototype = prototype;
+}
+
+def rdpmc : X86Builtin<"unsigned long long int(int)">;
+def rdtsc : X86Builtin<"unsigned long long int()">;
+def __rdtsc : X86NoPrefixBuiltin<"unsigned long long int()">;
+def rdtscp : X86Builtin<"unsigned long long int(unsigned int*)">;
+
 // Undefined Values
 def undef128 : X86Builtin<"_Vector<2, double>()"> {
   let Attributes = [Const, NoThrow, RequiredVectorWidth<128>];
@@ -135,3 +150,5375 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in
     def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
   }
 }
+
+
+// Mechanically ported builtins from the original `.def` file.
+//
+// TODO: Build structured ways of synthesizing relevant groups and improve the
+// organization of the builtins below this point (and move them above it). The
+// current formulation is based on what was easiest to recognize from the
+// pre-TableGen version.
+
+let Features = "mmx", Attributes = [NoThrow, Const] in {
+  def _mm_prefetch : X86NoPrefixBuiltin<"void(char const *, int)">;
+}
+
+let Features = "sse", Attributes = [NoThrow] in {
+  def ldmxcsr : X86Builtin<"void(unsigned int)">;
+}
+
+let Features = "sse", Header = "xmmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
+  def _mm_setcsr : X86LibBuiltin<"void(unsigned int)">;
+}
+
+let Features = "sse", Attributes = [NoThrow] in {
+  def stmxcsr : X86Builtin<"unsigned int()">;
+}
+
+let Features = "sse", Header = "xmmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
+  def _mm_getcsr : X86LibBuiltin<"unsigned int()">;
+}
+
+let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtss2si : X86Builtin<"int(_Vector<4, float>)">;
+  def cvttss2si : X86Builtin<"int(_Vector<4, float>)">;
+}
+
+let Features = "sse", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def movmskps : X86Builtin<"int(_Vector<4, float>)">;
+}
+
+let Features = "sse", Attributes = [NoThrow] in {
+  def sfence : X86Builtin<"void()">;
+}
+
+let Features = "sse", Header = "xmmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
+  def _mm_sfence : X86LibBuiltin<"void()">;
+}
+
+let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rcpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
+  def rcpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
+  def rsqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
+  def rsqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
+  def sqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
+  def sqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
+  def shufps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
+}
+
+let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def maskmovdqu : X86Builtin<"void(_Vector<16, char>, _Vector<16, char>, char *)">;
+}
+
+let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def movmskpd : X86Builtin<"int(_Vector<2, double>)">;
+  def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">;
+}
+
+let Features = "sse2", Attributes = [NoThrow] in {
+  def movnti : X86Builtin<"void(int *, int)">;
+}
+
+let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
+  def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
+  def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
+  def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
+  def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
+  def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
+  def shufpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
+  def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">;
+  def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
+  def cvttpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">;
+  def cvtsd2si : X86Builtin<"int(_Vector<2, double>)">;
+  def cvttsd2si : X86Builtin<"int(_Vector<2, double>)">;
+  def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">;
+  def cvtps2dq : X86Builtin<"_Vector<4, int>(_Vector<4, float>)">;
+  def cvttps2dq : X86Builtin<"_Vector<4, int>(_Vector<4, float>)">;
+}
+
+let Features = "sse2", Attributes = [NoThrow] in {
+  def clflush : X86Builtin<"void(void const *)">;
+}
+
+let Features = "sse2", Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
+  def _mm_clflush : X86LibBuiltin<"void(void const *)">;
+}
+
+let Features = "sse2", Attributes = [NoThrow] in {
+  def lfence : X86Builtin<"void()">;
+}
+
+let Features = "sse2", Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
+  def _mm_lfence : X86LibBuiltin<"void()">;
+}
+
+let Features = "sse2", Attributes = [NoThrow] in {
+  def mfence : X86Builtin<"void()">;
+}
+
+let Features = "sse2", Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
+  def _mm_mfence : X86LibBuiltin<"void()">;
+}
+
+let Attributes = [NoThrow] in {
+  def pause : X86Builtin<"void()">;
+}
+
+let Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
+  def _mm_pause : X86LibBuiltin<"void()">;
+}
+
+let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
+  def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+  def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+  def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+  def psrld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+  def psrlq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+  def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+  def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def psllwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
+  def pslldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
+  def psllqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">;
+  def psrlwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
+  def psrldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
+  def psrlqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">;
+  def psrawi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
+  def psradi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
+  def pmaddwd128 : X86Builtin<"_Vector<4, int>(_Vector<8, short>, _Vector<8, short>)">;
+  def pslldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
+  def psrldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
+}
+
+let Features = "sse3", Attributes = [NoThrow] in {
+  def monitor : X86Builtin<"void(void const *, unsigned int, unsigned int)">;
+  def mwait : X86Builtin<"void(unsigned int, unsigned int)">;
+}
+
+let Features = "sse3", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def lddqu : X86Builtin<"_Vector<16, char>(char const *)">;
+}
+
+let Features = "ssse3", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def palignr128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant int)">;
+}
+
+let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
+  def pblendvb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
+  def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
+  def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
+  def blendps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
+  def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
+  def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
+  def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
+  def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
+  def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
+  def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
+  def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
+  def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
+  def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
+  def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
+  def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def ptestc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
+  def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
+  def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
+  def vec_set_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, char, _Constant int)">;
+  def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
+}
+
+let Features = "sse4.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pcmpistrm128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
+  def pcmpistri128 : X86Builtin<"int(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
+  def pcmpestrm128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, int, _Vector<16, char>, int, _Constant char)">;
+  def pcmpestri128 : X86Builtin<"int(_Vector<16, char>, int, _Vector<16, char>, int, _Constant char)">;
+  def pcmpistria128 : X86Builtin<"int(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
+  def pcmpistric128 : X86Builtin<"int(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
+  def pcmpistrio128 : X86Builtin<"int(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
+  def pcmpistris128 : X86Builtin<"int(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
+  def pcmpistriz128 : X86Builtin<"int(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
+  def pcmpestria128 : X86Builtin<"int(_Vector<16, char>, int, _Vector<16, char>, int, _Constant char)">;
+  def pcmpestric128 : X86Builtin<"int(_Vector<16, char>, int, _Vector<16, char>, int, _Constant char)">;
+  def pcmpestrio128 : X86Builtin<"int(_Vector<16, char>, int, _Vector<16, char>, int, _Constant char)">;
+  def pcmpestris128 : X86Builtin<"int(_Vector<16, char>, int, _Vector<16, char>, int, _Constant char)">;
+  def pcmpestriz128 : X86Builtin<"int(_Vector<16, char>, int, _Vector<16, char>, int, _Constant char)">;
+}
+
+let Features = "crc32", Attributes = [NoThrow, Const] in {
+  def crc32qi : X86Builtin<"unsigned int(unsigned int, unsigned char)">;
+  def crc32hi : X86Builtin<"unsigned int(unsigned int, unsigned short)">;
+  def crc32si : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
+}
+
+let Features = "sse4a", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def extrqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char, _Constant char)">;
+  def extrq : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<16, char>)">;
+  def insertqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant char, _Constant char)">;
+  def insertq : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "sse4a", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def movntsd : X86Builtin<"void(double *, _Vector<2, double>)">;
+  def movntss : X86Builtin<"void(float *, _Vector<4, float>)">;
+}
+
+let Features = "aes", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def aesenc128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def aesenclast128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def aesdec128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def aesdeclast128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def aesimc128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>)">;
+  def aeskeygenassist128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">;
+}
+
+let Features = "vaes", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def aesenc256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512f,evex512,vaes", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def aesenc512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
+}
+
+let Features = "vaes", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def aesenclast256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512f,evex512,vaes", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def aesenclast512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
+}
+
+let Features = "vaes", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def aesdec256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512f,evex512,vaes", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def aesdec512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
+}
+
+let Features = "vaes", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def aesdeclast256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512f,evex512,vaes", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def aesdeclast512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
+}
+
+let Features = "gfni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vgf2p8affineinvqb_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
+}
+
+let Features = "avx,gfni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vgf2p8affineinvqb_v32qi : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
+}
+
+let Features = "avx512f,evex512,gfni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vgf2p8affineinvqb_v64qi : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, _Constant char)">;
+}
+
+let Features = "gfni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vgf2p8affineqb_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
+}
+
+let Features = "avx,gfni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vgf2p8affineqb_v32qi : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
+}
+
+let Features = "avx512f,evex512,gfni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vgf2p8affineqb_v64qi : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, _Constant char)">;
+}
+
+let Features = "gfni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vgf2p8mulb_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
+}
+
+let Features = "avx,gfni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vgf2p8mulb_v32qi : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
+}
+
+let Features = "avx512f,evex512,gfni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vgf2p8mulb_v64qi : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
+}
+
+let Features = "pclmul", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pclmulqdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant char)">;
+}
+
+let Features = "vpclmulqdq", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pclmulqdq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant char)">;
+}
+
+let Features = "avx512f,evex512,vpclmulqdq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pclmulqdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant char)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">;
+  def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">;
+  def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
+  def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
+  def blendpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
+  def blendps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
+  def blendvpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">;
+  def blendvps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">;
+  def shufpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
+  def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
+  def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
+  def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
+  def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
+  def vextractf128_pd256 : X86Builtin<"_Vector<2, double>(_Vector<4, double>, _Constant int)">;
+  def vextractf128_ps256 : X86Builtin<"_Vector<4, float>(_Vector<8, float>, _Constant int)">;
+  def vextractf128_si256 : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int)">;
+  def cvtpd2ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, double>)">;
+  def cvtps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">;
+  def cvttpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
+  def cvtpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
+  def cvttps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">;
+  def vperm2f128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
+  def vperm2f128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
+  def vperm2f128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpermilpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
+  def vpermilps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
+  def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
+  def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">;
+  def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">;
+  def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">;
+  def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">;
+  def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
+  def rsqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
+  def rcpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
+  def roundpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
+  def roundps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vtestzpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
+  def vtestcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
+  def vtestnzcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
+  def vtestzps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
+  def vtestcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
+  def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
+  def vtestcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
+  def vtestnzcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
+  def vtestzps256 : X86Builtin<"int(_Vector<8, float>, _Vector<8, float>)">;
+  def vtestcps256 : X86Builtin<"int(_Vector<8, float>, _Vector<8, float>)">;
+  def vtestnzcps256 : X86Builtin<"int(_Vector<8, float>, _Vector<8, float>)">;
+  def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
+  def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
+  def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
+  def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
+  def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
+}
+
+let Features = "avx", Attributes = [NoThrow] in {
+  def vzeroall : X86Builtin<"void()">;
+  def vzeroupper : X86Builtin<"void()">;
+}
+
+let Features = "avx", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def lddqu256 : X86Builtin<"_Vector<32, char>(char const *)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def maskloadpd : X86Builtin<"_Vector<2, double>(_Vector<2, double const *>, _Vector<2, long long int>)">;
+  def maskloadps : X86Builtin<"_Vector<4, float>(_Vector<4, float const *>, _Vector<4, int>)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def maskloadpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double const *>, _Vector<4, long long int>)">;
+  def maskloadps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float const *>, _Vector<8, int>)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def maskstorepd : X86Builtin<"void(_Vector<2, double *>, _Vector<2, long long int>, _Vector<2, double>)">;
+  def maskstoreps : X86Builtin<"void(_Vector<4, float *>, _Vector<4, int>, _Vector<4, float>)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def maskstorepd256 : X86Builtin<"void(_Vector<4, double *>, _Vector<4, long long int>, _Vector<4, double>)">;
+  def maskstoreps256 : X86Builtin<"void(_Vector<8, float *>, _Vector<8, int>, _Vector<8, float>)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vec_ext_v32qi : X86Builtin<"char(_Vector<32, char>, _Constant int)">;
+  def vec_ext_v16hi : X86Builtin<"short(_Vector<16, short>, _Constant int)">;
+  def vec_ext_v8si : X86Builtin<"int(_Vector<8, int>, _Constant int)">;
+  def vec_set_v32qi : X86Builtin<"_Vector<32, char>(_Vector<32, char>, char, _Constant int)">;
+  def vec_set_v16hi : X86Builtin<"_Vector<16, short>(_Vector<16, short>, short, _Constant int)">;
+  def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
+  def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
+  def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
+  def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
+  def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
+  def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
+  def pavgb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
+  def pavgw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+  def pblendvb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">;
+  def pblendw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">;
+  def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+  def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+  def phaddsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+  def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+  def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+  def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+  def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">;
+  def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">;
+  def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
+  def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
+  def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+  def pmulhuw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+  def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+  def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
+  def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
+  def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
+  def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
+  def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
+  def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
+  def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
+  def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+  def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+  def psllwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
+  def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
+  def pslldqi256_byteshift : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
+  def pslldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
+  def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
+  def psllqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
+  def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
+  def psrawi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
+  def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
+  def psradi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
+  def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
+  def psrldqi256_byteshift : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
+  def psrlwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
+  def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
+  def psrldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
+  def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
+  def psrlqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
+  def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
+  def pblendd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
+  def pblendd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
+  def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+  def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
+  def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
+  def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
+  def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
+  def extract128i256 : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int)">;
+  def insert128i256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def maskloadd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int const *>, _Vector<8, int>)">;
+  def maskloadq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int const *>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def maskloadd : X86Builtin<"_Vector<4, int>(_Vector<4, int const *>, _Vector<4, int>)">;
+  def maskloadq : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int const *>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def maskstored256 : X86Builtin<"void(_Vector<8, int *>, _Vector<8, int>, _Vector<8, int>)">;
+  def maskstoreq256 : X86Builtin<"void(_Vector<4, long long int *>, _Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def maskstored : X86Builtin<"void(_Vector<4, int *>, _Vector<4, int>, _Vector<4, int>)">;
+  def maskstoreq : X86Builtin<"void(_Vector<2, long long int *>, _Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def psllv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def psllv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def psllv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def psllv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def psrav8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def psrav4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def psrlv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def psrlv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def psrlv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def gatherd_pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, double const *, _Vector<4, int>, _Vector<2, double>, _Constant char)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def gatherd_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, double const *, _Vector<4, int>, _Vector<4, double>, _Constant char)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def gatherq_pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, double const *, _Vector<2, long long int>, _Vector<2, double>, _Constant char)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def gatherq_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, double const *, _Vector<4, long long int>, _Vector<4, double>, _Constant char)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def gatherd_ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, float const *, _Vector<4, int>, _Vector<4, float>, _Constant char)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def gatherd_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, float const *, _Vector<8, int>, _Vector<8, float>, _Constant char)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def gatherq_ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, float const *, _Vector<2, long long int>, _Vector<4, float>, _Constant char)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def gatherq_ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, float const *, _Vector<4, long long int>, _Vector<4, float>, _Constant char)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def gatherd_q : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, long long int const *, _Vector<4, int>, _Vector<2, long long int>, _Constant char)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def gatherd_q256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, long long int const *, _Vector<4, int>, _Vector<4, long long int>, _Constant char)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def gatherq_q : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, long long int const *, _Vector<2, long long int>, _Vector<2, long long int>, _Constant char)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def gatherq_q256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, long long int const *, _Vector<4, long long int>, _Vector<4, long long int>, _Constant char)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def gatherd_d : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int const *, _Vector<4, int>, _Vector<4, int>, _Constant char)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def gatherd_d256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int const *, _Vector<8, int>, _Vector<8, int>, _Constant char)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def gatherq_d : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int const *, _Vector<2, long long int>, _Vector<4, int>, _Constant char)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def gatherq_d256 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int const *, _Vector<4, long long int>, _Vector<4, int>, _Constant char)">;
+}
+
+let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtps2ph : X86Builtin<"_Vector<8, short>(_Vector<4, float>, _Constant int)">;
+}
+
+let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtps2ph256 : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int)">;
+}
+
+let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtph2ps : X86Builtin<"_Vector<4, float>(_Vector<8, short>)">;
+}
+
+let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtph2ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, short>)">;
+}
+
+let Features = "rdrnd", Attributes = [NoThrow] in {
+  def rdrand16_step : X86Builtin<"unsigned int(unsigned short *)">;
+  def rdrand32_step : X86Builtin<"unsigned int(unsigned int *)">;
+}
+
+let Features = "fxsr", Attributes = [NoThrow] in {
+  def fxrstor : X86Builtin<"void(void *)">;
+  def fxsave : X86Builtin<"void(void *)">;
+}
+
+let Features = "xsave", Attributes = [NoThrow] in {
+  def xsave : X86Builtin<"void(void *, unsigned long long int)">;
+  def xrstor : X86Builtin<"void(void *, unsigned long long int)">;
+  def xgetbv : X86Builtin<"unsigned long long int(unsigned int)">;
+}
+
+let Header = "immintrin.h", Languages = "ALL_MS_LANGUAGES", Attributes = [NoThrow, RequireDeclaration] in {
+  def _xgetbv : X86LibBuiltin<"uint64_t(unsigned int)">;
+}
+
+let Features = "xsave", Attributes = [NoThrow] in {
+  def xsetbv : X86Builtin<"void(unsigned int, unsigned long long int)">;
+}
+
+let Header = "immintrin.h", Languages = "ALL_MS_LANGUAGES", Attributes = [NoThrow, RequireDeclaration] in {
+  def _xsetbv : X86LibBuiltin<"void(unsigned int, uint64_t)">;
+}
+
+let Features = "xsaveopt", Attributes = [NoThrow] in {
+  def xsaveopt : X86Builtin<"void(void *, unsigned long long int)">;
+}
+
+let Features = "xsaves", Attributes = [NoThrow] in {
+  def xrstors : X86Builtin<"void(void *, unsigned long long int)">;
+}
+
+let Features = "xsavec", Attributes = [NoThrow] in {
+  def xsavec : X86Builtin<"void(void *, unsigned long long int)">;
+}
+
+let Features = "xsaves", Attributes = [NoThrow] in {
+  def xsaves : X86Builtin<"void(void *, unsigned long long int)">;
+}
+
+let Features = "shstk", Attributes = [NoThrow] in {
+  def incsspd : X86Builtin<"void(unsigned int)">;
+  def rdsspd : X86Builtin<"unsigned int(unsigned int)">;
+  def saveprevssp : X86Builtin<"void()">;
+  def rstorssp : X86Builtin<"void(void *)">;
+  def wrssd : X86Builtin<"void(unsigned int, void *)">;
+  def wrussd : X86Builtin<"void(unsigned int, void *)">;
+  def setssbsy : X86Builtin<"void()">;
+  def clrssbsy : X86Builtin<"void(void *)">;
+}
+
+let Features = "clflushopt", Attributes = [NoThrow] in {
+  def clflushopt : X86Builtin<"void(void const *)">;
+}
+
+let Features = "clwb", Attributes = [NoThrow] in {
+  def clwb : X86Builtin<"void(void const *)">;
+}
+
+let Attributes = [NoThrow] in {
+  def wbinvd : X86Builtin<"void()">;
+}
+
+let Features = "wbnoinvd", Attributes = [NoThrow] in {
+  def wbnoinvd : X86Builtin<"void()">;
+}
+
+let Attributes = [NoThrow, Constexpr] in {
+  def addcarryx_u32 : X86Builtin<"unsigned char(unsigned char, unsigned int, unsigned int, unsigned int *)">;
+  def subborrow_u32 : X86Builtin<"unsigned char(unsigned char, unsigned int, unsigned int, unsigned int *)">;
+}
+
+let Features = "rdseed", Attributes = [NoThrow] in {
+  def rdseed16_step : X86Builtin<"unsigned int(unsigned short *)">;
+  def rdseed32_step : X86Builtin<"unsigned int(unsigned int *)">;
+}
+
+let Features = "lzcnt", Attributes = [NoThrow, Const, Constexpr] in {
+  def lzcnt_u16 : X86Builtin<"unsigned short(unsigned short)">;
+  def lzcnt_u32 : X86Builtin<"unsigned int(unsigned int)">;
+}
+
+let Features = "bmi", Attributes = [NoThrow, Const, Constexpr] in {
+  def bextr_u32 : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
+}
+
+let Attributes = [NoThrow, Const, Constexpr] in {
+  def tzcnt_u16 : X86Builtin<"unsigned short(unsigned short)">;
+  def tzcnt_u32 : X86Builtin<"unsigned int(unsigned int)">;
+}
+
+let Features = "bmi2", Attributes = [NoThrow, Const, Constexpr] in {
+  def bzhi_si : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
+  def pdep_si : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
+  def pext_si : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
+}
+
+let Features = "tbm", Attributes = [NoThrow, Const, Constexpr] in {
+  def bextri_u32 : X86Builtin<"unsigned int(unsigned int, _Constant unsigned int)">;
+}
+
+let Features = "lwp", Attributes = [NoThrow] in {
+  def llwpcb : X86Builtin<"void(void *)">;
+  def slwpcb : X86Builtin<"void *()">;
+  def lwpins32 : X86Builtin<"unsigned char(unsigned int, unsigned int, _Constant unsigned int)">;
+  def lwpval32 : X86Builtin<"void(unsigned int, unsigned int, _Constant unsigned int)">;
+}
+
+let Features = "sha", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def sha1rnds4 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant char)">;
+  def sha1nexte : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+  def sha1msg1 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+  def sha1msg2 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+  def sha256rnds2 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+  def sha256msg1 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+  def sha256msg2 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "fma|fma4", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vfmaddps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
+  def vfmaddpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
+}
+
+let Features = "fma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vfmaddss3 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
+  def vfmaddsd3 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
+}
+
+let Features = "fma4", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vfmaddss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
+  def vfmaddsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
+}
+
+let Features = "fma|fma4", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vfmaddsubps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
+  def vfmaddsubpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
+}
+
+let Features = "fma|fma4", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vfmaddps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">;
+  def vfmaddpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">;
+  def vfmaddsubps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">;
+  def vfmaddsubpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vfmaddpd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Vector<8, double>, unsigned char, _Constant int)">;
+  def vfmaddpd512_maskz : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Vector<8, double>, unsigned char, _Constant int)">;
+  def vfmaddpd512_mask3 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Vector<8, double>, unsigned char, _Constant int)">;
+  def vfmsubpd512_mask3 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Vector<8, double>, unsigned char, _Constant int)">;
+  def vfmaddps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+  def vfmaddps512_maskz : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+  def vfmaddps512_mask3 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+  def vfmsubps512_mask3 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+  def vfmaddsubpd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Vector<8, double>, unsigned char, _Constant int)">;
+  def vfmaddsubpd512_maskz : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Vector<8, double>, unsigned char, _Constant int)">;
+  def vfmaddsubpd512_mask3 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Vector<8, double>, unsigned char, _Constant int)">;
+  def vfmsubaddpd512_mask3 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Vector<8, double>, unsigned char, _Constant int)">;
+  def vfmaddsubps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+  def vfmaddsubps512_maskz : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+  def vfmaddsubps512_mask3 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+  def vfmsubaddps512_mask3 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+}
+
+let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpmacssww : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">;
+  def vpmacsww : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">;
+  def vpmacsswd : X86Builtin<"_Vector<4, int>(_Vector<8, short>, _Vector<8, short>, _Vector<4, int>)">;
+  def vpmacswd : X86Builtin<"_Vector<4, int>(_Vector<8, short>, _Vector<8, short>, _Vector<4, int>)">;
+  def vpmacssdd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+  def vpmacsdd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+  def vpmacssdql : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>, _Vector<2, long long int>)">;
+  def vpmacsdql : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>, _Vector<2, long long int>)">;
+  def vpmacssdqh : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>, _Vector<2, long long int>)">;
+  def vpmacsdqh : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>, _Vector<2, long long int>)">;
+  def vpmadcsswd : X86Builtin<"_Vector<4, int>(_Vector<8, short>, _Vector<8, short>, _Vector<4, int>)">;
+  def vpmadcswd : X86Builtin<"_Vector<4, int>(_Vector<8, short>, _Vector<8, short>, _Vector<4, int>)">;
+  def vphaddbw : X86Builtin<"_Vector<8, short>(_Vector<16, char>)">;
+  def vphaddbd : X86Builtin<"_Vector<4, int>(_Vector<16, char>)">;
+  def vphaddbq : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>)">;
+  def vphaddwd : X86Builtin<"_Vector<4, int>(_Vector<8, short>)">;
+  def vphaddwq : X86Builtin<"_Vector<2, long long int>(_Vector<8, short>)">;
+  def vphadddq : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>)">;
+  def vphaddubw : X86Builtin<"_Vector<8, short>(_Vector<16, char>)">;
+  def vphaddubd : X86Builtin<"_Vector<4, int>(_Vector<16, char>)">;
+  def vphaddubq : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>)">;
+  def vphadduwd : X86Builtin<"_Vector<4, int>(_Vector<8, short>)">;
+  def vphadduwq : X86Builtin<"_Vector<2, long long int>(_Vector<8, short>)">;
+  def vphaddudq : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>)">;
+  def vphsubbw : X86Builtin<"_Vector<8, short>(_Vector<16, char>)">;
+  def vphsubwd : X86Builtin<"_Vector<4, int>(_Vector<8, short>)">;
+  def vphsubdq : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>)">;
+  def vpperm : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
+  def vprotb : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
+  def vprotw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+  def vprotd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+  def vprotq : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
+  def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
+  def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
+  def vprotqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">;
+  def vpshlb : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
+  def vpshlw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+  def vpshld : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+  def vpshlq : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def vpshab : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
+  def vpshaw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+  def vpshad : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+  def vpshaq : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def vpcomub : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
+  def vpcomuw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant char)">;
+  def vpcomud : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant char)">;
+  def vpcomuq : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant char)">;
+  def vpcomb : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
+  def vpcomw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant char)">;
+  def vpcomd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant char)">;
+  def vpcomq : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant char)">;
+  def vpermil2pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, long long int>, _Constant char)">;
+}
+
+let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpermil2pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, long long int>, _Constant char)">;
+}
+
+let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpermil2ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, int>, _Constant char)">;
+}
+
+let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpermil2ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, int>, _Constant char)">;
+}
+
+let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vfrczss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
+  def vfrczsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
+  def vfrczps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
+  def vfrczpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
+}
+
+let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vfrczps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
+  def vfrczpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">;
+}
+
+let Features = "rtm", Attributes = [NoThrow] in {
+  def xbegin : X86Builtin<"int()">;
+  def xend : X86Builtin<"void()">;
+  def xabort : X86Builtin<"void(_Constant char)">;
+  def xtest : X86Builtin<"int()">;
+}
+
+let Features = "rdpid", Attributes = [NoThrow] in {
+  def rdpid : X86Builtin<"unsigned int()">;
+}
+
+let Features = "rdpru", Attributes = [NoThrow] in {
+  def rdpru : X86Builtin<"unsigned long long int(int)">;
+}
+
+let Features = "pku", Attributes = [NoThrow] in {
+  def rdpkru : X86Builtin<"unsigned int()">;
+  def wrpkru : X86Builtin<"void(unsigned int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def sqrtpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">;
+  def sqrtps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rsqrt14sd_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char)">;
+  def rsqrt14ss_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def rsqrt14pd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char)">;
+  def rsqrt14ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, unsigned short)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rcp14sd_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char)">;
+  def rcp14ss_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def rcp14pd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char)">;
+  def rcp14ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, unsigned short)">;
+  def cvttps2dq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">;
+  def cvttps2udq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">;
+  def cvttpd2dq512_mask : X86Builtin<"_Vector<8, int>(_Vector<8, double>, _Vector<8, int>, unsigned char, _Constant int)">;
+  def cvttpd2udq512_mask : X86Builtin<"_Vector<8, int>(_Vector<8, double>, _Vector<8, int>, unsigned char, _Constant int)">;
+  def cmpps512_mask : X86Builtin<"unsigned short(_Vector<16, float>, _Vector<16, float>, _Constant int, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cmpps256_mask : X86Builtin<"unsigned char(_Vector<8, float>, _Vector<8, float>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cmpps128_mask : X86Builtin<"unsigned char(_Vector<4, float>, _Vector<4, float>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def cmppd512_mask : X86Builtin<"unsigned char(_Vector<8, double>, _Vector<8, double>, _Constant int, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cmppd256_mask : X86Builtin<"unsigned char(_Vector<4, double>, _Vector<4, double>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cmppd128_mask : X86Builtin<"unsigned char(_Vector<2, double>, _Vector<2, double>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def rndscaleps_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
+  def rndscalepd_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">;
+  def cvtps2dq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">;
+  def cvtpd2dq512_mask : X86Builtin<"_Vector<8, int>(_Vector<8, double>, _Vector<8, int>, unsigned char, _Constant int)">;
+  def cvtps2udq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">;
+  def cvtpd2udq512_mask : X86Builtin<"_Vector<8, int>(_Vector<8, double>, _Vector<8, int>, unsigned char, _Constant int)">;
+  def minps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
+  def minpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
+  def maxps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
+  def maxpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
+  def cvtdq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">;
+  def cvtudq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">;
+  def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">;
+  def vcvtph2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, short>, _Vector<16, float>, unsigned short, _Constant int)">;
+  def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
+  def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def loaddqusi512_mask : X86Builtin<"_Vector<16, int>(int const *, _Vector<16, int>, unsigned short)">;
+  def loaddqudi512_mask : X86Builtin<"_Vector<8, long long int>(long long int const *, _Vector<8, long long int>, unsigned char)">;
+  def loadups512_mask : X86Builtin<"_Vector<16, float>(float const *, _Vector<16, float>, unsigned short)">;
+  def loadaps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float const *>, _Vector<16, float>, unsigned short)">;
+  def loadupd512_mask : X86Builtin<"_Vector<8, double>(double const *, _Vector<8, double>, unsigned char)">;
+  def loadapd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double const *>, _Vector<8, double>, unsigned char)">;
+  def storedqudi512_mask : X86Builtin<"void(long long int *, _Vector<8, long long int>, unsigned char)">;
+  def storedqusi512_mask : X86Builtin<"void(int *, _Vector<16, int>, unsigned short)">;
+  def storeupd512_mask : X86Builtin<"void(double *, _Vector<8, double>, unsigned char)">;
+  def storeapd512_mask : X86Builtin<"void(_Vector<8, double *>, _Vector<8, double>, unsigned char)">;
+  def storeups512_mask : X86Builtin<"void(float *, _Vector<16, float>, unsigned short)">;
+  def storeaps512_mask : X86Builtin<"void(_Vector<16, float *>, _Vector<16, float>, unsigned short)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def alignq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
+  def alignd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def alignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def alignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def alignq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def alignq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def extractf64x4_mask : X86Builtin<"_Vector<4, double>(_Vector<8, double>, _Constant int, _Vector<4, double>, unsigned char)">;
+  def extractf32x4_mask : X86Builtin<"_Vector<4, float>(_Vector<16, float>, _Constant int, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpdpbusd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpdpbusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avx512vnni,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpdpbusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+}
+
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpdpbusds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpdpbusds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avx512vnni,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpdpbusds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+}
+
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpdpwssd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpdpwssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avx512vnni,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpdpwssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+}
+
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpdpwssds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpdpwssds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avx512vnni,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpdpwssds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+}
+
+let Features = "avxvnniint8|avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpdpbssd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avxvnniint8|avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpdpbssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avxvnniint8|avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpdpbssds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avxvnniint8|avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpdpbssds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avxvnniint8|avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpdpbsud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avxvnniint8|avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpdpbsud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avxvnniint8|avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpdpbsuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avxvnniint8|avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpdpbsuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avxvnniint8|avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpdpbuud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avxvnniint8|avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpdpbuud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avxvnniint8|avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpdpbuuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avxvnniint8|avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpdpbuuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "movrs", Attributes = [NoThrow, Const] in {
+  def prefetchrs : X86Builtin<"void(void const *)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def gather3div2df : X86Builtin<"_Vector<2, double>(_Vector<2, double>, void const *, _Vector<2, long long int>, unsigned char, _Constant int)">;
+  def gather3div2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, void const *, _Vector<2, long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def gather3div4df : X86Builtin<"_Vector<4, double>(_Vector<4, double>, void const *, _Vector<4, long long int>, unsigned char, _Constant int)">;
+  def gather3div4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, void const *, _Vector<4, long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def gather3div4sf : X86Builtin<"_Vector<4, float>(_Vector<4, float>, void const *, _Vector<2, long long int>, unsigned char, _Constant int)">;
+  def gather3div4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, void const *, _Vector<2, long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def gather3div8sf : X86Builtin<"_Vector<4, float>(_Vector<4, float>, void const *, _Vector<4, long long int>, unsigned char, _Constant int)">;
+  def gather3div8si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, void const *, _Vector<4, long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def gather3siv2df : X86Builtin<"_Vector<2, double>(_Vector<2, double>, void const *, _Vector<4, int>, unsigned char, _Constant int)">;
+  def gather3siv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, void const *, _Vector<4, int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def gather3siv4df : X86Builtin<"_Vector<4, double>(_Vector<4, double>, void const *, _Vector<4, int>, unsigned char, _Constant int)">;
+  def gather3siv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, void const *, _Vector<4, int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def gather3siv4sf : X86Builtin<"_Vector<4, float>(_Vector<4, float>, void const *, _Vector<4, int>, unsigned char, _Constant int)">;
+  def gather3siv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, void const *, _Vector<4, int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def gather3siv8sf : X86Builtin<"_Vector<8, float>(_Vector<8, float>, void const *, _Vector<8, int>, unsigned char, _Constant int)">;
+  def gather3siv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, void const *, _Vector<8, int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def gathersiv8df : X86Builtin<"_Vector<8, double>(_Vector<8, double>, void const *, _Vector<8, int>, unsigned char, _Constant int)">;
+  def gathersiv16sf : X86Builtin<"_Vector<16, float>(_Vector<16, float>, void const *, _Vector<16, int>, unsigned short, _Constant int)">;
+  def gatherdiv8df : X86Builtin<"_Vector<8, double>(_Vector<8, double>, void const *, _Vector<8, long long int>, unsigned char, _Constant int)">;
+  def gatherdiv16sf : X86Builtin<"_Vector<8, float>(_Vector<8, float>, void const *, _Vector<8, long long int>, unsigned char, _Constant int)">;
+  def gathersiv8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, void const *, _Vector<8, int>, unsigned char, _Constant int)">;
+  def gathersiv16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, void const *, _Vector<16, int>, unsigned short, _Constant int)">;
+  def gatherdiv8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, void const *, _Vector<8, long long int>, unsigned char, _Constant int)">;
+  def gatherdiv16si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, void const *, _Vector<8, long long int>, unsigned char, _Constant int)">;
+  def scattersiv8df : X86Builtin<"void(void *, unsigned char, _Vector<8, int>, _Vector<8, double>, _Constant int)">;
+  def scattersiv16sf : X86Builtin<"void(void *, unsigned short, _Vector<16, int>, _Vector<16, float>, _Constant int)">;
+  def scatterdiv8df : X86Builtin<"void(void *, unsigned char, _Vector<8, long long int>, _Vector<8, double>, _Constant int)">;
+  def scatterdiv16sf : X86Builtin<"void(void *, unsigned char, _Vector<8, long long int>, _Vector<8, float>, _Constant int)">;
+  def scattersiv8di : X86Builtin<"void(void *, unsigned char, _Vector<8, int>, _Vector<8, long long int>, _Constant int)">;
+  def scattersiv16si : X86Builtin<"void(void *, unsigned short, _Vector<16, int>, _Vector<16, int>, _Constant int)">;
+  def scatterdiv8di : X86Builtin<"void(void *, unsigned char, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
+  def scatterdiv16si : X86Builtin<"void(void *, unsigned char, _Vector<8, long long int>, _Vector<8, int>, _Constant int)">;
+}
+
+let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+  def knotqi : X86Builtin<"unsigned char(unsigned char)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const] in {
+  def knothi : X86Builtin<"unsigned short(unsigned short)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+  def knotsi : X86Builtin<"unsigned int(unsigned int)">;
+  def knotdi : X86Builtin<"unsigned long long int(unsigned long long int)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
+  def cmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
+  def cmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def cmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def cmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
+  def cmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def cmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def ucmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def ucmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
+  def ucmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def ucmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def ucmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def ucmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
+  def ucmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def ucmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def ucmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def ucmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
+  def ucmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
+  def packssdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
+  def packsswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
+  def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
+  def packuswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
+  def pavgb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
+  def pavgw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
+  def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
+}
+
+let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpconflictdi_128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>)">;
+}
+
+let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpconflictdi_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
+}
+
+let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpconflictsi_128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>)">;
+}
+
+let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpconflictsi_256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>)">;
+}
+
+let Features = "avx512cd,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpconflictdi_512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>)">;
+  def vpconflictsi_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
+  def vplzcntd_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
+  def vplzcntq_512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>)">;
+}
+
+let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpshufbitqmb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpshufbitqmb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx512bitalg,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpshufbitqmb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, unsigned long long int)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmulhrsw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
+  def pmulhuw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
+  def pmulhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def addpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
+  def addps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
+  def divpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
+  def divps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
+  def mulpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
+  def mulps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
+  def subpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
+  def subps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmaddubsw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, _Vector<64, char>)">;
+  def pmaddwd512 : X86Builtin<"_Vector<16, int>(_Vector<32, short>, _Vector<32, short>)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def addss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def divss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def mulss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def subss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def maxss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def minss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def addsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
+  def divsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
+  def mulsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
+  def subsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
+  def maxsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
+  def minsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def compressdf128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def compressdf256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def compressdi128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def compressdi256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def compresshi128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def compresshi256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, unsigned short)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def compressqi128_mask : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def compressqi256_mask : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def compresssf128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def compresssf256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def compresssi128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def compresssi256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def compressstoredf128_mask : X86Builtin<"void(_Vector<2, double *>, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def compressstoredf256_mask : X86Builtin<"void(_Vector<4, double *>, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def compressstoredi128_mask : X86Builtin<"void(_Vector<2, long long int *>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def compressstoredi256_mask : X86Builtin<"void(_Vector<4, long long int *>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def compressstorehi128_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def compressstorehi256_mask : X86Builtin<"void(_Vector<16, short *>, _Vector<16, short>, unsigned short)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def compressstoreqi128_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def compressstoreqi256_mask : X86Builtin<"void(_Vector<32, char *>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def compressstoresf128_mask : X86Builtin<"void(_Vector<4, float *>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def compressstoresf256_mask : X86Builtin<"void(_Vector<8, float *>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def compressstoresi128_mask : X86Builtin<"void(_Vector<4, int *>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def compressstoresi256_mask : X86Builtin<"void(_Vector<8, int *>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
+  def cvtpd2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<2, double>, _Vector<4, float>, unsigned char)">;
+  def cvtpd2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtpd2udq256_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtps2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, float>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtps2udq256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvttpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
+  def cvttpd2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvttpd2udq256_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvttps2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, float>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvttps2udq256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def expanddf128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def expanddf256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def expanddi128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def expanddi256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def expandhi128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def expandhi256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, unsigned short)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def expandqi128_mask : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def expandqi256_mask : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def expandloaddf128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double const *>, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def expandloaddf256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double const *>, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def expandloaddi128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, long long int const *>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def expandloaddi256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int const *>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def expandloadhi128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short const *>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def expandloadhi256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short const *>, _Vector<16, short>, unsigned short)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def expandloadqi128_mask : X86Builtin<"_Vector<16, char>(_Vector<16, char const *>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def expandloadqi256_mask : X86Builtin<"_Vector<32, char>(_Vector<32, char const *>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def expandloadsf128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float const *>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def expandloadsf256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float const *>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def expandloadsi128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int const *>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def expandloadsi256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int const *>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def expandsf128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def expandsf256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def expandsi128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def expandsi256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def getexppd128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def getexppd256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def getexpps128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def getexpps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rndscalepd_128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def rndscalepd_256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rndscaleps_128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def rndscaleps_256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def scalefpd128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def scalefpd256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def scalefps128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def scalefps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def scatterdiv2df : X86Builtin<"void(void *, unsigned char, _Vector<2, long long int>, _Vector<2, double>, _Constant int)">;
+  def scatterdiv2di : X86Builtin<"void(void *, unsigned char, _Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def scatterdiv4df : X86Builtin<"void(void *, unsigned char, _Vector<4, long long int>, _Vector<4, double>, _Constant int)">;
+  def scatterdiv4di : X86Builtin<"void(void *, unsigned char, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def scatterdiv4sf : X86Builtin<"void(void *, unsigned char, _Vector<2, long long int>, _Vector<4, float>, _Constant int)">;
+  def scatterdiv4si : X86Builtin<"void(void *, unsigned char, _Vector<2, long long int>, _Vector<4, int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def scatterdiv8sf : X86Builtin<"void(void *, unsigned char, _Vector<4, long long int>, _Vector<4, float>, _Constant int)">;
+  def scatterdiv8si : X86Builtin<"void(void *, unsigned char, _Vector<4, long long int>, _Vector<4, int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def scattersiv2df : X86Builtin<"void(void *, unsigned char, _Vector<4, int>, _Vector<2, double>, _Constant int)">;
+  def scattersiv2di : X86Builtin<"void(void *, unsigned char, _Vector<4, int>, _Vector<2, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def scattersiv4df : X86Builtin<"void(void *, unsigned char, _Vector<4, int>, _Vector<4, double>, _Constant int)">;
+  def scattersiv4di : X86Builtin<"void(void *, unsigned char, _Vector<4, int>, _Vector<4, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def scattersiv4sf : X86Builtin<"void(void *, unsigned char, _Vector<4, int>, _Vector<4, float>, _Constant int)">;
+  def scattersiv4si : X86Builtin<"void(void *, unsigned char, _Vector<4, int>, _Vector<4, int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def scattersiv8sf : X86Builtin<"void(void *, unsigned char, _Vector<8, int>, _Vector<8, float>, _Constant int)">;
+  def scattersiv8si : X86Builtin<"void(void *, unsigned char, _Vector<8, int>, _Vector<8, int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpermi2vard128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpermi2vard256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpermi2vard512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpermi2varpd128 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>, _Vector<2, double>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpermi2varpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>, _Vector<4, double>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpermi2varpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>, _Vector<8, double>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpermi2varps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>, _Vector<4, float>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpermi2varps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>, _Vector<8, float>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpermi2varps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>, _Vector<16, float>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpermi2varq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpermi2varq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpermi2varq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
+}
+
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpermi2varqi128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
+}
+
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpermi2varqi256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">;
+}
+
+let Features = "avx512vbmi,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpermi2varqi512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, _Vector<64, char>)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpermi2varhi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpermi2varhi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpermi2varhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpshldd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpshldd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpshldd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpshldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpshldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpshldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpshldw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpshldw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpshldw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpshldvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpshldvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpshldvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpshldvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpshldvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpshldvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpshldvw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpshldvw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpshldvw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpshrdvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpshrdvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpshrdvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpshrdvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpshrdvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpshrdvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpshrdvw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpshrdvw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpshrdvw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpshrdd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpshrdd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpshrdd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpshrdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpshrdq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpshrdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpshrdw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
+}
+
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpshrdw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpshrdw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovswb512_mask : X86Builtin<"_Vector<32, char>(_Vector<32, short>, _Vector<32, char>, unsigned int)">;
+  def pmovuswb512_mask : X86Builtin<"_Vector<32, char>(_Vector<32, short>, _Vector<32, char>, unsigned int)">;
+  def pmovwb512_mask : X86Builtin<"_Vector<32, char>(_Vector<32, short>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtpd2qq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtpd2qq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtpd2uqq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtpd2uqq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtps2qq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<4, float>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtps2qq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtps2uqq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<4, float>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtps2uqq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtqq2ps128_mask : X86Builtin<"_Vector<4, float>(_Vector<2, long long int>, _Vector<4, float>, unsigned char)">;
+  def cvttpd2qq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvttpd2qq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvttpd2uqq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvttpd2uqq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvttps2qq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<4, float>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvttps2qq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvttps2uqq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<4, float>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvttps2uqq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtuqq2ps128_mask : X86Builtin<"_Vector<4, float>(_Vector<2, long long int>, _Vector<4, float>, unsigned char)">;
+  def rangepd128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def rangepd256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rangeps128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def rangeps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rangesd128_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int, _Constant int)">;
+  def rangess128_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int, _Constant int)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def reducepd128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def reducepd256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def reduceps128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def reduceps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def reducesd_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int, _Constant int)">;
+  def reducess_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int, _Constant int)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovswb128_mask : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovswb256_mask : X86Builtin<"_Vector<16, char>(_Vector<16, short>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovuswb128_mask : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovuswb256_mask : X86Builtin<"_Vector<16, char>(_Vector<16, short>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovwb128_mask : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512dq,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def cvtpd2qq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, double>, _Vector<8, long long int>, unsigned char, _Constant int)">;
+  def cvtpd2uqq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, double>, _Vector<8, long long int>, unsigned char, _Constant int)">;
+  def cvtps2qq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, float>, _Vector<8, long long int>, unsigned char, _Constant int)">;
+  def cvtps2uqq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, float>, _Vector<8, long long int>, unsigned char, _Constant int)">;
+  def cvtqq2pd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, long long int>, _Vector<8, double>, unsigned char, _Constant int)">;
+  def cvtqq2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, long long int>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def cvttpd2qq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, double>, _Vector<8, long long int>, unsigned char, _Constant int)">;
+  def cvttpd2uqq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, double>, _Vector<8, long long int>, unsigned char, _Constant int)">;
+  def cvttps2qq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, float>, _Vector<8, long long int>, unsigned char, _Constant int)">;
+  def cvttps2uqq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, float>, _Vector<8, long long int>, unsigned char, _Constant int)">;
+  def cvtuqq2pd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, long long int>, _Vector<8, double>, unsigned char, _Constant int)">;
+  def cvtuqq2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, long long int>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def rangepd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">;
+  def rangeps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
+  def reducepd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">;
+  def reduceps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def prold512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
+  def prolq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def prold128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def prold256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def prolq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def prolq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def prolvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
+  def prolvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
+  def prord512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
+  def prorq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def prolvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def prolvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def prolvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def prolvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def prord128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def prord256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def prorq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def prorq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def prorvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
+  def prorvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def prorvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def prorvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def prorvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def prorvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
+  def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
+  def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
+  def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
+  def psllwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def psllv16hi : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def psllv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pslldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
+  def psllqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def psrlv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def psrlv16hi : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def psrlv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def psrldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
+  def psrlqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def psrav32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def psrav16hi : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def psrav8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def psravq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def psravq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def psraw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
+  def psrawi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
+  def psrlw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
+  def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
+  def pslldqi512_byteshift : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
+  def psrldqi512_byteshift : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def movdqa32load128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int const *>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def movdqa32load256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int const *>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def movdqa32load512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int const *>, _Vector<16, int>, unsigned short)">;
+  def movdqa32store512_mask : X86Builtin<"void(_Vector<16, int *>, _Vector<16, int>, unsigned short)">;
+  def movdqa64load512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int const *>, _Vector<8, long long int>, unsigned char)">;
+  def movdqa64store512_mask : X86Builtin<"void(_Vector<8, long long int *>, _Vector<8, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def movdqa32store128_mask : X86Builtin<"void(_Vector<4, int *>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def movdqa32store256_mask : X86Builtin<"void(_Vector<8, int *>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def movdqa64load128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int const *>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def movdqa64load256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int const *>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def movdqa64store128_mask : X86Builtin<"void(_Vector<2, long long int *>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def movdqa64store256_mask : X86Builtin<"void(_Vector<4, long long int *>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512ifma,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpmadd52huq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
+  def vpmadd52luq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
+}
+
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpmadd52huq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpmadd52huq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpmadd52luq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpmadd52luq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcomisd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>, _Constant int, _Constant int)">;
+  def vcomiss : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>, _Constant int, _Constant int)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+  def kunpckdi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
+  def kunpcksi : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def loaddquhi512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short const *>, _Vector<32, short>, unsigned int)">;
+  def loaddquqi512_mask : X86Builtin<"_Vector<64, char>(_Vector<64, char const *>, _Vector<64, char>, unsigned long long int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def fixupimmpd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Vector<8, long long int>, _Constant int, unsigned char, _Constant int)">;
+  def fixupimmpd512_maskz : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Vector<8, long long int>, _Constant int, unsigned char, _Constant int)">;
+  def fixupimmps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, int>, _Constant int, unsigned short, _Constant int)">;
+  def fixupimmps512_maskz : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, int>, _Constant int, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def fixupimmsd_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, long long int>, _Constant int, unsigned char, _Constant int)">;
+  def fixupimmsd_maskz : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, long long int>, _Constant int, unsigned char, _Constant int)">;
+  def fixupimmss_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, int>, _Constant int, unsigned char, _Constant int)">;
+  def fixupimmss_maskz : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, int>, _Constant int, unsigned char, _Constant int)">;
+  def getexpsd128_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
+  def getexpss128_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def getmantsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int, _Vector<2, double>, unsigned char, _Constant int)">;
+  def getmantss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int, _Vector<4, float>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def loaddquhi128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short const *>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def loaddquhi256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short const *>, _Vector<16, short>, unsigned short)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def loaddquqi128_mask : X86Builtin<"_Vector<16, char>(_Vector<16, char const *>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def loaddquqi256_mask : X86Builtin<"_Vector<32, char>(_Vector<32, char const *>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def fixupimmpd128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, long long int>, _Constant int, unsigned char)">;
+  def fixupimmpd128_maskz : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, long long int>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def fixupimmpd256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, long long int>, _Constant int, unsigned char)">;
+  def fixupimmpd256_maskz : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, long long int>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def fixupimmps128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, int>, _Constant int, unsigned char)">;
+  def fixupimmps128_maskz : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, int>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def fixupimmps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, int>, _Constant int, unsigned char)">;
+  def fixupimmps256_maskz : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, int>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def loadapd128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double const *>, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def loadsd128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double const *>, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def loadapd256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double const *>, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def loadaps128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float const *>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def loadss128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float const *>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def loadaps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float const *>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def loaddqudi128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int const *>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def loaddqudi256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int const *>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def loaddqusi128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int const *>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def loaddqusi256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int const *>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def loadupd128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double const *>, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def loadupd256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double const *>, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def loadups128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float const *>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def loadups256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float const *>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def storedquhi512_mask : X86Builtin<"void(_Vector<32, short *>, _Vector<32, short>, unsigned int)">;
+  def storedquqi512_mask : X86Builtin<"void(_Vector<64, char *>, _Vector<64, char>, unsigned long long int)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def storedquhi128_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def storedquhi256_mask : X86Builtin<"void(_Vector<16, short *>, _Vector<16, short>, unsigned short)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def storedquqi128_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def storedquqi256_mask : X86Builtin<"void(_Vector<32, char *>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def storeapd128_mask : X86Builtin<"void(_Vector<2, double *>, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def storesd128_mask : X86Builtin<"void(_Vector<2, double *>, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def storeapd256_mask : X86Builtin<"void(_Vector<4, double *>, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def storeaps128_mask : X86Builtin<"void(_Vector<4, float *>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def storess128_mask : X86Builtin<"void(_Vector<4, float *>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def storeaps256_mask : X86Builtin<"void(_Vector<8, float *>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def storedqudi128_mask : X86Builtin<"void(_Vector<2, long long int *>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def storedqudi256_mask : X86Builtin<"void(_Vector<4, long long int *>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def storedqusi128_mask : X86Builtin<"void(_Vector<4, int *>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def storedqusi256_mask : X86Builtin<"void(_Vector<8, int *>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def storeupd128_mask : X86Builtin<"void(_Vector<2, double *>, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def storeupd256_mask : X86Builtin<"void(_Vector<4, double *>, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def storeups128_mask : X86Builtin<"void(_Vector<4, float *>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def storeups256_mask : X86Builtin<"void(_Vector<8, float *>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rcp14pd128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def rcp14pd256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rcp14ps128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def rcp14ps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vplzcntd_128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>)">;
+}
+
+let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vplzcntd_256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>)">;
+}
+
+let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vplzcntq_128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>)">;
+}
+
+let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vplzcntq_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtsd2si32 : X86Builtin<"int(_Vector<2, double>, _Constant int)">;
+  def vcvtsd2usi32 : X86Builtin<"unsigned int(_Vector<2, double>, _Constant int)">;
+  def vcvtss2si32 : X86Builtin<"int(_Vector<4, float>, _Constant int)">;
+  def vcvtss2usi32 : X86Builtin<"unsigned int(_Vector<4, float>, _Constant int)">;
+  def vcvttsd2si32 : X86Builtin<"int(_Vector<2, double>, _Constant int)">;
+  def vcvttsd2usi32 : X86Builtin<"unsigned int(_Vector<2, double>, _Constant int)">;
+  def vcvttss2si32 : X86Builtin<"int(_Vector<4, float>, _Constant int)">;
+  def vcvttss2usi32 : X86Builtin<"unsigned int(_Vector<4, float>, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">;
+  def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">;
+  def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">;
+  def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rndscalesd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int, _Constant int)">;
+  def rndscaless_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def scalefpd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Vector<8, double>, unsigned char, _Constant int)">;
+  def scalefps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def scalefsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
+  def scalefss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def psradi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
+  def psraqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def psraq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def psraqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def psraqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pslld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;
+  def psllq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">;
+  def psllv16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
+  def psllv8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
+  def psrad512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;
+  def psraq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">;
+  def psrav16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
+  def psrav8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
+  def psrld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;
+  def psrlq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">;
+  def psrlv16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
+  def psrlv8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
+  def pternlogd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
+  def pternlogd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
+  def pternlogq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
+  def pternlogq512_maskz : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pternlogd128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
+  def pternlogd128_maskz : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pternlogd256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
+  def pternlogd256_maskz : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pternlogq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
+  def pternlogq128_maskz : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pternlogq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
+  def pternlogq256_maskz : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def shuf_f32x4 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
+  def shuf_f64x2 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
+  def shuf_i32x4 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
+  def shuf_i64x2 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
+  def shufpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
+  def shufps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def shuf_f32x4_256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
+  def shuf_f64x2_256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
+  def shuf_i32x4_256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
+  def shuf_i64x2_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def sqrtsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
+  def sqrtss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rsqrt14pd128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def rsqrt14pd256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rsqrt14ps128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def rsqrt14ps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def cvtb2mask512 : X86Builtin<"unsigned long long int(_Vector<64, char>)">;
+  def cvtmask2b512 : X86Builtin<"_Vector<64, char>(unsigned long long int)">;
+  def cvtmask2w512 : X86Builtin<"_Vector<32, short>(unsigned int)">;
+}
+
+let Features = "avx512dq,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def cvtd2mask512 : X86Builtin<"unsigned short(_Vector<16, int>)">;
+  def cvtmask2d512 : X86Builtin<"_Vector<16, int>(unsigned short)">;
+  def cvtmask2q512 : X86Builtin<"_Vector<8, long long int>(unsigned char)">;
+  def cvtq2mask512 : X86Builtin<"unsigned char(_Vector<8, long long int>)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtb2mask128 : X86Builtin<"unsigned short(_Vector<16, char>)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtb2mask256 : X86Builtin<"unsigned int(_Vector<32, char>)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtmask2b128 : X86Builtin<"_Vector<16, char>(unsigned short)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtmask2b256 : X86Builtin<"_Vector<32, char>(unsigned int)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtmask2w128 : X86Builtin<"_Vector<8, short>(unsigned char)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtmask2w256 : X86Builtin<"_Vector<16, short>(unsigned short)">;
+}
+
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtd2mask128 : X86Builtin<"unsigned char(_Vector<4, int>)">;
+}
+
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtd2mask256 : X86Builtin<"unsigned char(_Vector<8, int>)">;
+}
+
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtmask2d128 : X86Builtin<"_Vector<4, int>(unsigned char)">;
+}
+
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtmask2d256 : X86Builtin<"_Vector<8, int>(unsigned char)">;
+}
+
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtmask2q128 : X86Builtin<"_Vector<2, long long int>(unsigned char)">;
+}
+
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtmask2q256 : X86Builtin<"_Vector<4, long long int>(unsigned char)">;
+}
+
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtq2mask128 : X86Builtin<"unsigned char(_Vector<2, long long int>)">;
+}
+
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtq2mask256 : X86Builtin<"unsigned char(_Vector<4, long long int>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovsdb512_mask : X86Builtin<"_Vector<16, char>(_Vector<16, int>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovsdb512mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<16, int>, unsigned short)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovswb512mem_mask : X86Builtin<"void(_Vector<32, char *>, _Vector<32, short>, unsigned int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovsdw512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, int>, _Vector<16, short>, unsigned short)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovsdw512mem_mask : X86Builtin<"void(_Vector<16, short *>, _Vector<16, int>, unsigned short)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovsqb512_mask : X86Builtin<"_Vector<16, char>(_Vector<8, long long int>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovsqb512mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<8, long long int>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovsqd512_mask : X86Builtin<"_Vector<8, int>(_Vector<8, long long int>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovsqd512mem_mask : X86Builtin<"void(_Vector<8, int *>, _Vector<8, long long int>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovsqw512_mask : X86Builtin<"_Vector<8, short>(_Vector<8, long long int>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovsqw512mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<8, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovsdb128_mask : X86Builtin<"_Vector<16, char>(_Vector<4, int>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovsdb128mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovswb128mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovsdb256_mask : X86Builtin<"_Vector<16, char>(_Vector<8, int>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovsdb256mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovswb256mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<16, short>, unsigned short)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovsdw128_mask : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovsdw128mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovsdw256_mask : X86Builtin<"_Vector<8, short>(_Vector<8, int>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovsdw256mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovsqb128_mask : X86Builtin<"_Vector<16, char>(_Vector<2, long long int>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovsqb128mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovsqb256_mask : X86Builtin<"_Vector<16, char>(_Vector<4, long long int>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovsqb256mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovsqd128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, long long int>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovsqd128mem_mask : X86Builtin<"void(_Vector<4, int *>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovsqd256_mask : X86Builtin<"_Vector<4, int>(_Vector<4, long long int>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovsqd256mem_mask : X86Builtin<"void(_Vector<4, int *>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovsqw128_mask : X86Builtin<"_Vector<8, short>(_Vector<2, long long int>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovsqw128mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovsqw256_mask : X86Builtin<"_Vector<8, short>(_Vector<4, long long int>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovsqw256mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovusdb512_mask : X86Builtin<"_Vector<16, char>(_Vector<16, int>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovusdb512mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<16, int>, unsigned short)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovuswb512mem_mask : X86Builtin<"void(_Vector<32, char *>, _Vector<32, short>, unsigned int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovusdw512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, int>, _Vector<16, short>, unsigned short)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovusdw512mem_mask : X86Builtin<"void(_Vector<16, short *>, _Vector<16, int>, unsigned short)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovusqb512_mask : X86Builtin<"_Vector<16, char>(_Vector<8, long long int>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovusqb512mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<8, long long int>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovusqd512_mask : X86Builtin<"_Vector<8, int>(_Vector<8, long long int>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovusqd512mem_mask : X86Builtin<"void(_Vector<8, int *>, _Vector<8, long long int>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovusqw512_mask : X86Builtin<"_Vector<8, short>(_Vector<8, long long int>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovusqw512mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<8, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovusdb128_mask : X86Builtin<"_Vector<16, char>(_Vector<4, int>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovusdb128mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovuswb128mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovusdb256_mask : X86Builtin<"_Vector<16, char>(_Vector<8, int>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovusdb256mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovuswb256mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<16, short>, unsigned short)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovusdw128_mask : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovusdw128mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovusdw256_mask : X86Builtin<"_Vector<8, short>(_Vector<8, int>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovusdw256mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovusqb128_mask : X86Builtin<"_Vector<16, char>(_Vector<2, long long int>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovusqb128mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovusqb256_mask : X86Builtin<"_Vector<16, char>(_Vector<4, long long int>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovusqb256mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovusqd128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, long long int>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovusqd128mem_mask : X86Builtin<"void(_Vector<4, int *>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovusqd256_mask : X86Builtin<"_Vector<4, int>(_Vector<4, long long int>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovusqd256mem_mask : X86Builtin<"void(_Vector<4, int *>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovusqw128_mask : X86Builtin<"_Vector<8, short>(_Vector<2, long long int>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovusqw128mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovusqw256_mask : X86Builtin<"_Vector<8, short>(_Vector<4, long long int>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovusqw256mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovdb512_mask : X86Builtin<"_Vector<16, char>(_Vector<16, int>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovdb512mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<16, int>, unsigned short)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovwb512mem_mask : X86Builtin<"void(_Vector<32, char *>, _Vector<32, short>, unsigned int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovdw512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, int>, _Vector<16, short>, unsigned short)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovdw512mem_mask : X86Builtin<"void(_Vector<16, short *>, _Vector<16, int>, unsigned short)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovqb512_mask : X86Builtin<"_Vector<16, char>(_Vector<8, long long int>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovqb512mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<8, long long int>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovqd512_mask : X86Builtin<"_Vector<8, int>(_Vector<8, long long int>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovqd512mem_mask : X86Builtin<"void(_Vector<8, int *>, _Vector<8, long long int>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pmovqw512_mask : X86Builtin<"_Vector<8, short>(_Vector<8, long long int>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def pmovqw512mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<8, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovdb128_mask : X86Builtin<"_Vector<16, char>(_Vector<4, int>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovwb128mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovdb128mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovdb256_mask : X86Builtin<"_Vector<16, char>(_Vector<8, int>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovdb256mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovwb256mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<16, short>, unsigned short)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovdw128_mask : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovdw128mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovdw256_mask : X86Builtin<"_Vector<8, short>(_Vector<8, int>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovdw256mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovqb128_mask : X86Builtin<"_Vector<16, char>(_Vector<2, long long int>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovqb128mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovqb256_mask : X86Builtin<"_Vector<16, char>(_Vector<4, long long int>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovqb256mem_mask : X86Builtin<"void(_Vector<16, char *>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovqd128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, long long int>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovqd128mem_mask : X86Builtin<"void(_Vector<4, int *>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovqd256mem_mask : X86Builtin<"void(_Vector<4, int *>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def pmovqw128_mask : X86Builtin<"_Vector<8, short>(_Vector<2, long long int>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def pmovqw128mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def pmovqw256_mask : X86Builtin<"_Vector<8, short>(_Vector<4, long long int>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def pmovqw256mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512dq,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def extractf32x8_mask : X86Builtin<"_Vector<8, float>(_Vector<16, float>, _Constant int, _Vector<8, float>, unsigned char)">;
+  def extractf64x2_512_mask : X86Builtin<"_Vector<2, double>(_Vector<8, double>, _Constant int, _Vector<2, double>, unsigned char)">;
+  def extracti32x8_mask : X86Builtin<"_Vector<8, int>(_Vector<16, int>, _Constant int, _Vector<8, int>, unsigned char)">;
+  def extracti64x2_512_mask : X86Builtin<"_Vector<2, long long int>(_Vector<8, long long int>, _Constant int, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def extracti32x4_mask : X86Builtin<"_Vector<4, int>(_Vector<16, int>, _Constant int, _Vector<4, int>, unsigned char)">;
+  def extracti64x4_mask : X86Builtin<"_Vector<4, long long int>(_Vector<8, long long int>, _Constant int, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def extractf64x2_256_mask : X86Builtin<"_Vector<2, double>(_Vector<4, double>, _Constant int, _Vector<2, double>, unsigned char)">;
+  def extracti64x2_256_mask : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def extractf32x4_256_mask : X86Builtin<"_Vector<4, float>(_Vector<8, float>, _Constant int, _Vector<4, float>, unsigned char)">;
+  def extracti32x4_256_mask : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512dq,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def insertf32x8 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<8, float>, _Constant int)">;
+  def insertf64x2_512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<2, double>, _Constant int)">;
+  def inserti32x8 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<8, int>, _Constant int)">;
+  def inserti64x2_512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def insertf64x4 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<4, double>, _Constant int)">;
+  def inserti64x4 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<4, long long int>, _Constant int)">;
+}
+
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def insertf64x2_256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">;
+  def inserti64x2_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def insertf32x4_256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">;
+  def inserti32x4_256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def insertf32x4 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<4, float>, _Constant int)">;
+  def inserti32x4 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>, _Constant int)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def getmantpd128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def getmantpd256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def getmantps128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def getmantps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def getmantpd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">;
+  def getmantps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
+  def getexppd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char, _Constant int)">;
+  def getexpps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vfmaddss3_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vfmaddss3_maskz : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vfmaddss3_mask3 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vfmaddsd3_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
+  def vfmaddsd3_maskz : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
+  def vfmaddsd3_mask3 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
+  def vfmsubsd3_mask3 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
+  def vfmsubss3_mask3 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def permdf512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">;
+  def permdi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def permvarhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def permvardf512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">;
+  def permvardi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
+  def permvarsf512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
+  def permvarsi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
+}
+
+let Features = "avx512vbmi,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def permvarqi512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
+}
+
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def permvarqi128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
+}
+
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def permvarqi256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def permvarhi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def permvarhi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def permvardf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
+  def permvardi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def fpclasspd128_mask : X86Builtin<"unsigned char(_Vector<2, double>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def fpclasspd256_mask : X86Builtin<"unsigned char(_Vector<4, double>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def fpclassps128_mask : X86Builtin<"unsigned char(_Vector<4, float>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def fpclassps256_mask : X86Builtin<"unsigned char(_Vector<8, float>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512dq,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def fpclassps512_mask : X86Builtin<"unsigned short(_Vector<16, float>, _Constant int, unsigned short)">;
+  def fpclasspd512_mask : X86Builtin<"unsigned char(_Vector<8, double>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def fpclasssd_mask : X86Builtin<"unsigned char(_Vector<2, double>, _Constant int, unsigned char)">;
+  def fpclassss_mask : X86Builtin<"unsigned char(_Vector<4, float>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+  def kaddqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">;
+  def kaddhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+  def kaddsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
+  def kadddi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
+}
+
+let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+  def kandqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const] in {
+  def kandhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+  def kandsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
+  def kanddi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
+}
+
+let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+  def kandnqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const] in {
+  def kandnhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+  def kandnsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
+  def kandndi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
+}
+
+let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+  def korqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const] in {
+  def korhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+  def korsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
+  def kordi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
+}
+
+let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+  def kortestcqi : X86Builtin<"int(unsigned char, unsigned char)">;
+  def kortestzqi : X86Builtin<"int(unsigned char, unsigned char)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const] in {
+  def kortestchi : X86Builtin<"int(unsigned short, unsigned short)">;
+  def kortestzhi : X86Builtin<"int(unsigned short, unsigned short)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+  def kortestcsi : X86Builtin<"int(unsigned int, unsigned int)">;
+  def kortestzsi : X86Builtin<"int(unsigned int, unsigned int)">;
+  def kortestcdi : X86Builtin<"int(unsigned long long int, unsigned long long int)">;
+  def kortestzdi : X86Builtin<"int(unsigned long long int, unsigned long long int)">;
+}
+
+let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+  def ktestcqi : X86Builtin<"int(unsigned char, unsigned char)">;
+  def ktestzqi : X86Builtin<"int(unsigned char, unsigned char)">;
+  def ktestchi : X86Builtin<"int(unsigned short, unsigned short)">;
+  def ktestzhi : X86Builtin<"int(unsigned short, unsigned short)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+  def ktestcsi : X86Builtin<"int(unsigned int, unsigned int)">;
+  def ktestzsi : X86Builtin<"int(unsigned int, unsigned int)">;
+  def ktestcdi : X86Builtin<"int(unsigned long long int, unsigned long long int)">;
+  def ktestzdi : X86Builtin<"int(unsigned long long int, unsigned long long int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const] in {
+  def kunpckhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">;
+}
+
+let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+  def kxnorqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const] in {
+  def kxnorhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+  def kxnorsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
+  def kxnordi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
+}
+
+let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+  def kxorqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const] in {
+  def kxorhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+  def kxorsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
+  def kxordi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
+}
+
+let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+  def kshiftliqi : X86Builtin<"unsigned char(unsigned char, _Constant unsigned int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const] in {
+  def kshiftlihi : X86Builtin<"unsigned short(unsigned short, _Constant unsigned int)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+  def kshiftlisi : X86Builtin<"unsigned int(unsigned int, _Constant unsigned int)">;
+  def kshiftlidi : X86Builtin<"unsigned long long int(unsigned long long int, _Constant unsigned int)">;
+}
+
+let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+  def kshiftriqi : X86Builtin<"unsigned char(unsigned char, _Constant unsigned int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const] in {
+  def kshiftrihi : X86Builtin<"unsigned short(unsigned short, _Constant unsigned int)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+  def kshiftrisi : X86Builtin<"unsigned int(unsigned int, _Constant unsigned int)">;
+  def kshiftridi : X86Builtin<"unsigned long long int(unsigned long long int, _Constant unsigned int)">;
+}
+
+let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+  def kmovb : X86Builtin<"unsigned char(unsigned char)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const] in {
+  def kmovw : X86Builtin<"unsigned short(unsigned short)">;
+}
+
+let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+  def kmovd : X86Builtin<"unsigned int(unsigned int)">;
+  def kmovq : X86Builtin<"unsigned long long int(unsigned long long int)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def palignr512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, _Constant int)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def dbpsadbw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>, _Constant int)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def dbpsadbw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def dbpsadbw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, _Vector<64, char>, _Constant int)">;
+  def psadbw512 : X86Builtin<"_Vector<8, long long int>(_Vector<64, char>, _Vector<64, char>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def compressdf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char)">;
+  def compressdi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def compresshi512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, unsigned int)">;
+  def compressqi512_mask : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, unsigned long long int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def compresssf512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, unsigned short)">;
+  def compresssi512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, unsigned short)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cmpsd_mask : X86Builtin<"unsigned char(_Vector<2, double>, _Vector<2, double>, _Constant int, unsigned char, _Constant int)">;
+  def cmpss_mask : X86Builtin<"unsigned char(_Vector<4, float>, _Vector<4, float>, _Constant int, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
+  def expanddf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char)">;
+  def expanddi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def expandhi512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, unsigned int)">;
+  def expandqi512_mask : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, unsigned long long int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def expandloaddf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double const *>, _Vector<8, double>, unsigned char)">;
+  def expandloaddi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int const *>, _Vector<8, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def expandloadhi512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short const *>, _Vector<32, short>, unsigned int)">;
+  def expandloadqi512_mask : X86Builtin<"_Vector<64, char>(_Vector<64, char const *>, _Vector<64, char>, unsigned long long int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def expandloadsf512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float const *>, _Vector<16, float>, unsigned short)">;
+  def expandloadsi512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int const *>, _Vector<16, int>, unsigned short)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def expandsf512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, unsigned short)">;
+  def expandsi512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, unsigned short)">;
+  def cvtps2pd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, float>, _Vector<8, double>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def compressstoredf512_mask : X86Builtin<"void(_Vector<8, double *>, _Vector<8, double>, unsigned char)">;
+  def compressstoredi512_mask : X86Builtin<"void(_Vector<8, long long int *>, _Vector<8, long long int>, unsigned char)">;
+}
+
+let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def compressstorehi512_mask : X86Builtin<"void(_Vector<32, short *>, _Vector<32, short>, unsigned int)">;
+  def compressstoreqi512_mask : X86Builtin<"void(_Vector<64, char *>, _Vector<64, char>, unsigned long long int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def compressstoresf512_mask : X86Builtin<"void(_Vector<16, float *>, _Vector<16, float>, unsigned short)">;
+  def compressstoresi512_mask : X86Builtin<"void(_Vector<16, int *>, _Vector<16, int>, unsigned short)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtph2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<8, short>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtph2ps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, short>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtps2ph_mask : X86Builtin<"_Vector<8, short>(_Vector<4, float>, _Constant int, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtps2ph256_mask : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def cvtw2mask512 : X86Builtin<"unsigned int(_Vector<32, short>)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtw2mask128 : X86Builtin<"unsigned char(_Vector<8, short>)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def cvtsi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, int, _Constant int)">;
+  def cvtss2sd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<4, float>, _Vector<2, double>, unsigned char, _Constant int)">;
+  def cvtusi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx512vbmi,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vpmultishiftqb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
+}
+
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpmultishiftqb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
+}
+
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpmultishiftqb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
+}
+
+let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtne2ps2bf16_128 : X86Builtin<"_Vector<8, __bf16>(_Vector<4, float>, _Vector<4, float>)">;
+}
+
+let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtne2ps2bf16_256 : X86Builtin<"_Vector<16, __bf16>(_Vector<8, float>, _Vector<8, float>)">;
+}
+
+let Features = "avx512bf16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def cvtne2ps2bf16_512 : X86Builtin<"_Vector<32, __bf16>(_Vector<16, float>, _Vector<16, float>)">;
+}
+
+let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtneps2bf16_128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<4, float>, _Vector<8, __bf16>, unsigned char)">;
+}
+
+let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cvtneps2bf16_256_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, float>, _Vector<8, __bf16>, unsigned char)">;
+}
+
+let Features = "avx512bf16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def cvtneps2bf16_512_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, float>, _Vector<16, __bf16>, unsigned short)">;
+}
+
+let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def dpbf16ps_128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
+}
+
+let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def dpbf16ps_256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<16, __bf16>, _Vector<16, __bf16>)">;
+}
+
+let Features = "avx512bf16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def dpbf16ps_512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<32, __bf16>, _Vector<32, __bf16>)">;
+}
+
+let Features = "avx512bf16", Attributes = [NoThrow, Const] in {
+  def cvtsbf162ss_32 : X86Builtin<"float(__bf16)">;
+}
+
+let Features = "avx512vp2intersect,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vp2intersect_q_512 : X86Builtin<"void(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char *, unsigned char *)">;
+}
+
+let Features = "avx512vp2intersect,avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vp2intersect_q_256 : X86Builtin<"void(_Vector<4, long long int>, _Vector<4, long long int>, unsigned char *, unsigned char *)">;
+}
+
+let Features = "avx512vp2intersect,avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vp2intersect_q_128 : X86Builtin<"void(_Vector<2, long long int>, _Vector<2, long long int>, unsigned char *, unsigned char *)">;
+}
+
+let Features = "avx512vp2intersect,evex512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vp2intersect_d_512 : X86Builtin<"void(_Vector<16, int>, _Vector<16, int>, unsigned short *, unsigned short *)">;
+}
+
+let Features = "avx512vp2intersect,avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vp2intersect_d_256 : X86Builtin<"void(_Vector<8, int>, _Vector<8, int>, unsigned char *, unsigned char *)">;
+}
+
+let Features = "avx512vp2intersect,avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vp2intersect_d_128 : X86Builtin<"void(_Vector<4, int>, _Vector<4, int>, unsigned char *, unsigned char *)">;
+}
+
+let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcomish : X86Builtin<"int(_Vector<8, _Float16>, _Vector<8, _Float16>, _Constant int, _Constant int)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def addph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">;
+  def subph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">;
+  def mulph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">;
+  def divph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">;
+  def maxph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">;
+  def minph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def minph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def minph128 : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def maxph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def maxph128 : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>)">;
+}
+
+let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def addsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def divsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def mulsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def subsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def maxsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def minsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def cmpph512_mask : X86Builtin<"unsigned int(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int, unsigned int, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def cmpph256_mask : X86Builtin<"unsigned short(_Vector<16, _Float16>, _Vector<16, _Float16>, _Constant int, unsigned short)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cmpph128_mask : X86Builtin<"unsigned char(_Vector<8, _Float16>, _Vector<8, _Float16>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cmpsh_mask : X86Builtin<"unsigned char(_Vector<8, _Float16>, _Vector<8, _Float16>, _Constant int, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512fp16", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def loadsh128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16 const *>, _Vector<8, _Float16>, unsigned char)">;
+  def storesh128_mask : X86Builtin<"void(_Vector<8, _Float16 *>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rcpph128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def rcpph256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, unsigned short)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def rcpph512_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rsqrtph128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def rsqrtph256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, unsigned short)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def rsqrtph512_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def getmantph128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Constant int, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def getmantph256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Constant int, _Vector<16, _Float16>, unsigned short)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def getmantph512_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Constant int, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def getexpph128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def getexpph256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, unsigned short)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def getexpph512_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def scalefph128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def scalefph256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>, unsigned short)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def scalefph512_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rndscaleph_128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Constant int, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def rndscaleph_256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Constant int, _Vector<16, _Float16>, unsigned short)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def rndscaleph_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Constant int, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def reduceph128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Constant int, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def reduceph256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Constant int, _Vector<16, _Float16>, unsigned short)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def reduceph512_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Constant int, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def rcpsh_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char)">;
+  def rsqrtsh_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char)">;
+  def getmantsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Constant int, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def getexpsh128_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def scalefsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def rndscalesh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int, _Constant int)">;
+  def reducesh_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def sqrtph : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def sqrtph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def sqrtph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Constant int)">;
+}
+
+let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def sqrtsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def fpclassph128_mask : X86Builtin<"unsigned char(_Vector<8, _Float16>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def fpclassph256_mask : X86Builtin<"unsigned short(_Vector<16, _Float16>, _Constant int, unsigned short)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def fpclassph512_mask : X86Builtin<"unsigned int(_Vector<32, _Float16>, _Constant int, unsigned int)">;
+}
+
+let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def fpclasssh_mask : X86Builtin<"unsigned char(_Vector<8, _Float16>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtpd2ph128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<2, double>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtpd2ph256_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<4, double>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtpd2ph512_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, double>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtph2pd128_mask : X86Builtin<"_Vector<2, double>(_Vector<8, _Float16>, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtph2pd256_mask : X86Builtin<"_Vector<4, double>(_Vector<8, _Float16>, _Vector<4, double>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtph2pd512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, _Float16>, _Vector<8, double>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtsh2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<8, _Float16>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vcvtss2sh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<4, float>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def vcvtsd2sh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<2, double>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def vcvtsh2sd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<8, _Float16>, _Vector<2, double>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtph2w128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, _Float16>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtph2w256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, _Float16>, _Vector<16, short>, unsigned short)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtph2w512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, _Float16>, _Vector<32, short>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvttph2w128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, _Float16>, _Vector<8, short>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvttph2w256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, _Float16>, _Vector<16, short>, unsigned short)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvttph2w512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, _Float16>, _Vector<32, short>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtw2ph128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, short>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtw2ph256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, short>, _Vector<16, _Float16>, unsigned short)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtw2ph512_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<32, short>, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtph2uw128_mask : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, _Float16>, _Vector<8, unsigned short>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtph2uw256_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtph2uw512_mask : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, _Float16>, _Vector<32, unsigned short>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvttph2uw128_mask : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, _Float16>, _Vector<8, unsigned short>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvttph2uw256_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvttph2uw512_mask : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, _Float16>, _Vector<32, unsigned short>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtuw2ph128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, unsigned short>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtuw2ph256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, unsigned short>, _Vector<16, _Float16>, unsigned short)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtuw2ph512_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<32, unsigned short>, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtph2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<8, _Float16>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtph2dq256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, _Float16>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtph2dq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, _Float16>, _Vector<16, int>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtph2udq128_mask : X86Builtin<"_Vector<4, unsigned int>(_Vector<8, _Float16>, _Vector<4, unsigned int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtph2udq256_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, _Float16>, _Vector<8, unsigned int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtph2udq512_mask : X86Builtin<"_Vector<16, unsigned int>(_Vector<16, _Float16>, _Vector<16, unsigned int>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtdq2ph128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<4, int>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtdq2ph256_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, int>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtdq2ph512_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, int>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtudq2ph128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<4, unsigned int>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtudq2ph256_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, unsigned int>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtudq2ph512_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, unsigned int>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvttph2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<8, _Float16>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvttph2dq256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, _Float16>, _Vector<8, int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvttph2dq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, _Float16>, _Vector<16, int>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvttph2udq128_mask : X86Builtin<"_Vector<4, unsigned int>(_Vector<8, _Float16>, _Vector<4, unsigned int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvttph2udq256_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, _Float16>, _Vector<8, unsigned int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvttph2udq512_mask : X86Builtin<"_Vector<16, unsigned int>(_Vector<16, _Float16>, _Vector<16, unsigned int>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtqq2ph128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<2, long long int>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtqq2ph256_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<4, long long int>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtqq2ph512_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, long long int>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtph2qq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<8, _Float16>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtph2qq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<8, _Float16>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtph2qq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, _Float16>, _Vector<8, long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtuqq2ph128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<2, unsigned long long int>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtuqq2ph256_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<4, unsigned long long int>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtuqq2ph512_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, unsigned long long int>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtph2uqq128_mask : X86Builtin<"_Vector<2, unsigned long long int>(_Vector<8, _Float16>, _Vector<2, unsigned long long int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtph2uqq256_mask : X86Builtin<"_Vector<4, unsigned long long int>(_Vector<8, _Float16>, _Vector<4, unsigned long long int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtph2uqq512_mask : X86Builtin<"_Vector<8, unsigned long long int>(_Vector<8, _Float16>, _Vector<8, unsigned long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvttph2qq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<8, _Float16>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvttph2qq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<8, _Float16>, _Vector<4, long long int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvttph2qq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, _Float16>, _Vector<8, long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvttph2uqq128_mask : X86Builtin<"_Vector<2, unsigned long long int>(_Vector<8, _Float16>, _Vector<2, unsigned long long int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvttph2uqq256_mask : X86Builtin<"_Vector<4, unsigned long long int>(_Vector<8, _Float16>, _Vector<4, unsigned long long int>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvttph2uqq512_mask : X86Builtin<"_Vector<8, unsigned long long int>(_Vector<8, _Float16>, _Vector<8, unsigned long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtsh2si32 : X86Builtin<"int(_Vector<8, _Float16>, _Constant int)">;
+  def vcvtsh2usi32 : X86Builtin<"unsigned int(_Vector<8, _Float16>, _Constant int)">;
+  def vcvtusi2sh : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, unsigned int, _Constant int)">;
+  def vcvtsi2sh : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, int, _Constant int)">;
+  def vcvttsh2si32 : X86Builtin<"int(_Vector<8, _Float16>, _Constant int)">;
+  def vcvttsh2usi32 : X86Builtin<"unsigned int(_Vector<8, _Float16>, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtph2psx128_mask : X86Builtin<"_Vector<4, float>(_Vector<8, _Float16>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtph2psx256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, _Float16>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtph2psx512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, _Float16>, _Vector<16, float>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtps2phx128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<4, float>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvtps2phx256_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, float>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvtps2phx512_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, float>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vfmaddph : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vfmaddph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vfmaddph512_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+  def vfmaddph512_mask3 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+  def vfmaddph512_maskz : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vfmaddsubph : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vfmaddsubph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vfmaddsubph512_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+  def vfmaddsubph512_maskz : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+  def vfmaddsubph512_mask3 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+  def vfmsubaddph512_mask3 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+  def vfmsubph512_mask3 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vfmaddsh3_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def vfmaddsh3_maskz : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def vfmaddsh3_mask3 : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def vfmsubsh3_mask3 : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vfmaddcph128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char)">;
+  def vfmaddcph128_maskz : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vfmaddcph256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char)">;
+  def vfmaddcph256_maskz : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vfmaddcph512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+  def vfmaddcph512_maskz : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+  def vfmaddcph512_mask3 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vfcmaddcph128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char)">;
+  def vfcmaddcph128_maskz : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vfcmaddcph256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char)">;
+  def vfcmaddcph256_maskz : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vfcmaddcph512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+  def vfcmaddcph512_maskz : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+  def vfcmaddcph512_mask3 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vfmaddcsh_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vfmaddcsh_maskz : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vfcmaddcsh_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vfcmaddcsh_maskz : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vfmaddcsh_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vfmaddcsh_round_mask3 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vfcmaddcsh_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vfcmaddcsh_round_mask3 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vfmulcsh_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vfcmulcsh_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vfmulcph128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vfmulcph256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vfmulcph512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vfcmulcph128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vfcmulcph256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vfcmulcph512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def selectb_128 : X86Builtin<"_Vector<16, char>(unsigned short, _Vector<16, char>, _Vector<16, char>)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def selectb_256 : X86Builtin<"_Vector<32, char>(unsigned int, _Vector<32, char>, _Vector<32, char>)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def selectb_512 : X86Builtin<"_Vector<64, char>(unsigned long long int, _Vector<64, char>, _Vector<64, char>)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def selectw_128 : X86Builtin<"_Vector<8, short>(unsigned char, _Vector<8, short>, _Vector<8, short>)">;
+}
+
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def selectw_256 : X86Builtin<"_Vector<16, short>(unsigned short, _Vector<16, short>, _Vector<16, short>)">;
+}
+
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def selectw_512 : X86Builtin<"_Vector<32, short>(unsigned int, _Vector<32, short>, _Vector<32, short>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def selectd_128 : X86Builtin<"_Vector<4, int>(unsigned char, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def selectd_256 : X86Builtin<"_Vector<8, int>(unsigned char, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def selectd_512 : X86Builtin<"_Vector<16, int>(unsigned short, _Vector<16, int>, _Vector<16, int>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def selectph_128 : X86Builtin<"_Vector<8, _Float16>(unsigned char, _Vector<8, _Float16>, _Vector<8, _Float16>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def selectph_256 : X86Builtin<"_Vector<16, _Float16>(unsigned short, _Vector<16, _Float16>, _Vector<16, _Float16>)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def selectph_512 : X86Builtin<"_Vector<32, _Float16>(unsigned int, _Vector<32, _Float16>, _Vector<32, _Float16>)">;
+}
+
+let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def selectpbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
+}
+
+let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def selectpbf_256 : X86Builtin<"_Vector<16, __bf16>(unsigned short, _Vector<16, __bf16>, _Vector<16, __bf16>)">;
+}
+
+let Features = "avx512bf16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def selectpbf_512 : X86Builtin<"_Vector<32, __bf16>(unsigned int, _Vector<32, __bf16>, _Vector<32, __bf16>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def selectq_128 : X86Builtin<"_Vector<2, long long int>(unsigned char, _Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def selectq_256 : X86Builtin<"_Vector<4, long long int>(unsigned char, _Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def selectq_512 : X86Builtin<"_Vector<8, long long int>(unsigned char, _Vector<8, long long int>, _Vector<8, long long int>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def selectps_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def selectps_256 : X86Builtin<"_Vector<8, float>(unsigned char, _Vector<8, float>, _Vector<8, float>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def selectps_512 : X86Builtin<"_Vector<16, float>(unsigned short, _Vector<16, float>, _Vector<16, float>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def selectpd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def selectpd_256 : X86Builtin<"_Vector<4, double>(unsigned char, _Vector<4, double>, _Vector<4, double>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def selectpd_512 : X86Builtin<"_Vector<8, double>(unsigned char, _Vector<8, double>, _Vector<8, double>)">;
+}
+
+let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def selectsh_128 : X86Builtin<"_Vector<8, _Float16>(unsigned char, _Vector<8, _Float16>, _Vector<8, _Float16>)">;
+}
+
+let Features = "avx512bf16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def selectsbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def selectss_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">;
+  def selectsd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def reduce_fadd_pd512 : X86Builtin<"double(double, _Vector<8, double>)">;
+  def reduce_fadd_ps512 : X86Builtin<"float(float, _Vector<16, float>)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def reduce_fadd_ph512 : X86Builtin<"_Float16(_Float16, _Vector<32, _Float16>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def reduce_fadd_ph256 : X86Builtin<"_Float16(_Float16, _Vector<16, _Float16>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def reduce_fadd_ph128 : X86Builtin<"_Float16(_Float16, _Vector<8, _Float16>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def reduce_fmax_pd512 : X86Builtin<"double(_Vector<8, double>)">;
+  def reduce_fmax_ps512 : X86Builtin<"float(_Vector<16, float>)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def reduce_fmax_ph512 : X86Builtin<"_Float16(_Vector<32, _Float16>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def reduce_fmax_ph256 : X86Builtin<"_Float16(_Vector<16, _Float16>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def reduce_fmax_ph128 : X86Builtin<"_Float16(_Vector<8, _Float16>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def reduce_fmin_pd512 : X86Builtin<"double(_Vector<8, double>)">;
+  def reduce_fmin_ps512 : X86Builtin<"float(_Vector<16, float>)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def reduce_fmin_ph512 : X86Builtin<"_Float16(_Vector<32, _Float16>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def reduce_fmin_ph256 : X86Builtin<"_Float16(_Vector<16, _Float16>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def reduce_fmin_ph128 : X86Builtin<"_Float16(_Vector<8, _Float16>)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def reduce_fmul_pd512 : X86Builtin<"double(double, _Vector<8, double>)">;
+  def reduce_fmul_ps512 : X86Builtin<"float(float, _Vector<16, float>)">;
+}
+
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def reduce_fmul_ph512 : X86Builtin<"_Float16(_Float16, _Vector<32, _Float16>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def reduce_fmul_ph256 : X86Builtin<"_Float16(_Float16, _Vector<16, _Float16>)">;
+}
+
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def reduce_fmul_ph128 : X86Builtin<"_Float16(_Float16, _Vector<8, _Float16>)">;
+}
+
+let Features = "mwaitx", Attributes = [NoThrow] in {
+  def monitorx : X86Builtin<"void(void const *, unsigned int, unsigned int)">;
+  def mwaitx : X86Builtin<"void(unsigned int, unsigned int, unsigned int)">;
+}
+
+let Features = "waitpkg", Attributes = [NoThrow] in {
+  def umonitor : X86Builtin<"void(void const *)">;
+  def umwait : X86Builtin<"unsigned char(unsigned int, unsigned int, unsigned int)">;
+  def tpause : X86Builtin<"unsigned char(unsigned int, unsigned int, unsigned int)">;
+}
+
+let Features = "clzero", Attributes = [NoThrow] in {
+  def clzero : X86Builtin<"void(void *)">;
+}
+
+let Features = "cldemote", Attributes = [NoThrow] in {
+  def cldemote : X86Builtin<"void(void const *)">;
+}
+
+let Features = "movdiri", Attributes = [NoThrow] in {
+  def directstore_u32 : X86Builtin<"void(unsigned int *, unsigned int)">;
+}
+
+let Features = "movdir64b", Attributes = [NoThrow] in {
+  def movdir64b : X86Builtin<"void(void *, void const *)">;
+}
+
+let Features = "ptwrite", Attributes = [NoThrow] in {
+  def ptwrite32 : X86Builtin<"void(unsigned int)">;
+}
+
+let Features = "invpcid", Attributes = [NoThrow, Const] in {
+  def invpcid : X86Builtin<"void(unsigned int, void *)">;
+}
+
+let Features = "enqcmd", Attributes = [NoThrow] in {
+  def enqcmd : X86Builtin<"unsigned char(void *, void const *)">;
+  def enqcmds : X86Builtin<"unsigned char(void *, void const *)">;
+}
+
+let Features = "kl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def loadiwkey : X86Builtin<"void(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>, unsigned int)">;
+  def encodekey128_u32 : X86Builtin<"unsigned int(unsigned int, _Vector<2, long long int>, void *)">;
+  def encodekey256_u32 : X86Builtin<"unsigned int(unsigned int, _Vector<2, long long int>, _Vector<2, long long int>, void *)">;
+  def aesenc128kl_u8 : X86Builtin<"unsigned char(_Vector<2, long long int *>, _Vector<2, long long int>, void const *)">;
+  def aesenc256kl_u8 : X86Builtin<"unsigned char(_Vector<2, long long int *>, _Vector<2, long long int>, void const *)">;
+  def aesdec128kl_u8 : X86Builtin<"unsigned char(_Vector<2, long long int *>, _Vector<2, long long int>, void const *)">;
+  def aesdec256kl_u8 : X86Builtin<"unsigned char(_Vector<2, long long int *>, _Vector<2, long long int>, void const *)">;
+}
+
+let Features = "kl,widekl", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def aesencwide128kl_u8 : X86Builtin<"unsigned char(_Vector<2, long long int *>, _Vector<2, long long int const *>, void const *)">;
+  def aesencwide256kl_u8 : X86Builtin<"unsigned char(_Vector<2, long long int *>, _Vector<2, long long int const *>, void const *)">;
+  def aesdecwide128kl_u8 : X86Builtin<"unsigned char(_Vector<2, long long int *>, _Vector<2, long long int const *>, void const *)">;
+  def aesdecwide256kl_u8 : X86Builtin<"unsigned char(_Vector<2, long long int *>, _Vector<2, long long int const *>, void const *)">;
+}
+
+let Features = "serialize", Attributes = [NoThrow] in {
+  def serialize : X86Builtin<"void()">;
+}
+
+let Features = "tsxldtrk", Attributes = [NoThrow] in {
+  def xsusldtrk : X86Builtin<"void()">;
+  def xresldtrk : X86Builtin<"void()">;
+}
+
+let Features = "raoint", Attributes = [NoThrow] in {
+  def aadd32 : X86Builtin<"void(void *, signed int)">;
+  def aand32 : X86Builtin<"void(void *, signed int)">;
+  def aor32 : X86Builtin<"void(void *, signed int)">;
+  def axor32 : X86Builtin<"void(void *, signed int)">;
+}
+
+let Header = "intrin.h", Languages = "ALL_MS_LANGUAGES", Attributes = [NoThrow, RequireDeclaration] in {
+  def _BitScanForward : X86LibBuiltin<"unsigned char(msuint32_t *, msuint32_t)">;
+  def _BitScanReverse : X86LibBuiltin<"unsigned char(msuint32_t *, msuint32_t)">;
+  def _ReadWriteBarrier : X86LibBuiltin<"void()">;
+  def _ReadBarrier : X86LibBuiltin<"void()">;
+  def _WriteBarrier : X86LibBuiltin<"void()">;
+  def __cpuid : X86LibBuiltin<"void(int *, int)">;
+  def __cpuidex : X86LibBuiltin<"void(int *, int, int)">;
+}
+
+let Header = "intrin.h", Languages = "ALL_MS_LANGUAGES", Attributes = [NoThrow, Const, RequireDeclaration] in {
+  def __emul : X86LibBuiltin<"long long int(int, int)">;
+  def __emulu : X86LibBuiltin<"unsigned long long int(unsigned int, unsigned int)">;
+}
+
+let Header = "intrin.h", Languages = "ALL_MS_LANGUAGES", Attributes = [NoThrow, RequireDeclaration] in {
+  def _AddressOfReturnAddress : X86LibBuiltin<"void *()">;
+  def __stosb : X86LibBuiltin<"void(unsigned char *, unsigned char, size_t)">;
+}
+
+let Header = "intrin.h", Languages = "ALL_MS_LANGUAGES", Attributes = [NoThrow, RequireDeclaration, NoReturn] in {
+  def __int2c : X86LibBuiltin<"void()">;
+  def __ud2 : X86LibBuiltin<"void()">;
+}
+
+let Header = "intrin.h", Languages = "ALL_MS_LANGUAGES", Attributes = [NoThrow, RequireDeclaration] in {
+  def __readfsbyte : X86LibBuiltin<"unsigned char(msuint32_t)">;
+  def __readfsword : X86LibBuiltin<"unsigned short(msuint32_t)">;
+  def __readfsdword : X86LibBuiltin<"msuint32_t(msuint32_t)">;
+  def __readfsqword : X86LibBuiltin<"unsigned long long int(msuint32_t)">;
+  def __readgsbyte : X86LibBuiltin<"unsigned char(msuint32_t)">;
+  def __readgsword : X86LibBuiltin<"unsigned short(msuint32_t)">;
+  def __readgsdword : X86LibBuiltin<"msuint32_t(msuint32_t)">;
+  def __readgsqword : X86LibBuiltin<"unsigned long long int(msuint32_t)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vdpphps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<8, _Float16>, _Vector<8, _Float16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vdpphps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<16, _Float16>, _Vector<16, _Float16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vdpphps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<32, _Float16>, _Vector<32, _Float16>)">;
+  def vpdpbssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+  def vpdpbssds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+  def vpdpbsud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+  def vpdpbsuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+  def vpdpbuud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+  def vpdpbuuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vpdpwsud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+  def vpdpwsuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+  def vpdpwusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+  def vpdpwusds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+  def vpdpwuud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+  def vpdpwuuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def mpsadbw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, _Vector<64, char>, _Constant char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vaddpd256_round : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
+  def vaddph256_round : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Constant int)">;
+  def vaddps256_round : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
+  def vcmppd256_round_mask : X86Builtin<"unsigned char(_Vector<4, double>, _Vector<4, double>, _Constant int, unsigned char, _Constant int)">;
+  def vcmpph256_round_mask : X86Builtin<"unsigned short(_Vector<16, _Float16>, _Vector<16, _Float16>, _Constant int, unsigned short, _Constant int)">;
+  def vcmpps256_round_mask : X86Builtin<"unsigned char(_Vector<8, float>, _Vector<8, float>, _Constant int, unsigned char, _Constant int)">;
+  def vcvtdq2ph256_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, int>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def vcvtdq2ps256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, int>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vcvtpd2dq256_round_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char, _Constant int)">;
+  def vcvtpd2ph256_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<4, double>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def vcvtpd2ps256_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, double>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vcvtpd2qq256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+  def vcvtpd2udq256_round_mask : X86Builtin<"_Vector<4, unsigned int>(_Vector<4, double>, _Vector<4, unsigned int>, unsigned char, _Constant int)">;
+  def vcvtpd2uqq256_round_mask : X86Builtin<"_Vector<4, unsigned long long int>(_Vector<4, double>, _Vector<4, unsigned long long int>, unsigned char, _Constant int)">;
+  def vcvtph2dq256_round_mask : X86Builtin<"_Vector<8, int>(_Vector<8, _Float16>, _Vector<8, int>, unsigned char, _Constant int)">;
+  def vcvtph2pd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<8, _Float16>, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vcvtph2psx256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, _Float16>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vcvtph2qq256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<8, _Float16>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+  def vcvtph2udq256_round_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, _Float16>, _Vector<8, unsigned int>, unsigned char, _Constant int)">;
+  def vcvtph2uqq256_round_mask : X86Builtin<"_Vector<4, unsigned long long int>(_Vector<8, _Float16>, _Vector<4, unsigned long long int>, unsigned char, _Constant int)">;
+  def vcvtph2uw256_round_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short, _Constant int)">;
+  def vcvtph2w256_round_mask : X86Builtin<"_Vector<16, short>(_Vector<16, _Float16>, _Vector<16, short>, unsigned short, _Constant int)">;
+  def vcvtps2dq256_round_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char, _Constant int)">;
+  def vcvtps2pd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<4, float>, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vcvtps2phx256_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, float>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def vcvtps2qq256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+  def vcvtps2udq256_round_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, float>, _Vector<8, unsigned int>, unsigned char, _Constant int)">;
+  def vcvtps2uqq256_round_mask : X86Builtin<"_Vector<4, unsigned long long int>(_Vector<4, float>, _Vector<4, unsigned long long int>, unsigned char, _Constant int)">;
+  def vcvtqq2pd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<4, long long int>, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vcvtqq2ph256_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<4, long long int>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def vcvtqq2ps256_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, long long int>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vcvttpd2dq256_round_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char, _Constant int)">;
+  def vcvttpd2qq256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+  def vcvttpd2udq256_round_mask : X86Builtin<"_Vector<4, unsigned int>(_Vector<4, double>, _Vector<4, unsigned int>, unsigned char, _Constant int)">;
+  def vcvttpd2uqq256_round_mask : X86Builtin<"_Vector<4, unsigned long long int>(_Vector<4, double>, _Vector<4, unsigned long long int>, unsigned char, _Constant int)">;
+  def vcvttph2dq256_round_mask : X86Builtin<"_Vector<8, int>(_Vector<8, _Float16>, _Vector<8, int>, unsigned char, _Constant int)">;
+  def vcvttph2qq256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<8, _Float16>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+  def vcvttph2udq256_round_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, _Float16>, _Vector<8, unsigned int>, unsigned char, _Constant int)">;
+  def vcvttph2uqq256_round_mask : X86Builtin<"_Vector<4, unsigned long long int>(_Vector<8, _Float16>, _Vector<4, unsigned long long int>, unsigned char, _Constant int)">;
+  def vcvttph2uw256_round_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short, _Constant int)">;
+  def vcvttph2w256_round_mask : X86Builtin<"_Vector<16, short>(_Vector<16, _Float16>, _Vector<16, short>, unsigned short, _Constant int)">;
+  def vcvttps2dq256_round_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char, _Constant int)">;
+  def vcvttps2qq256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+  def vcvttps2udq256_round_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, float>, _Vector<8, unsigned int>, unsigned char, _Constant int)">;
+  def vcvttps2uqq256_round_mask : X86Builtin<"_Vector<4, unsigned long long int>(_Vector<4, float>, _Vector<4, unsigned long long int>, unsigned char, _Constant int)">;
+  def vcvtudq2ph256_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, unsigned int>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def vcvtudq2ps256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, unsigned int>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vcvtuqq2pd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<4, unsigned long long int>, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vcvtuqq2ph256_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<4, unsigned long long int>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def vcvtuqq2ps256_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, unsigned long long int>, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vcvtuw2ph256_round_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, unsigned short>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vcvtw2ph256_round_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, short>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vdivpd256_round : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
+  def vdivph256_round : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Constant int)">;
+  def vdivps256_round : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
+  def vfcmaddcph256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vfcmaddcph256_round_maskz : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vfcmaddcph256_round_mask3 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vfcmulcph256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vfixupimmpd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, long long int>, _Constant int, unsigned char, _Constant int)">;
+  def vfixupimmpd256_round_maskz : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, long long int>, _Constant int, unsigned char, _Constant int)">;
+  def vfixupimmps256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, int>, _Constant int, unsigned char, _Constant int)">;
+  def vfixupimmps256_round_maskz : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, int>, _Constant int, unsigned char, _Constant int)">;
+  def vfmaddpd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vfmaddpd256_round_maskz : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vfmaddpd256_round_mask3 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vfmaddph256_round_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vfmaddph256_round_maskz : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vfmaddph256_round_mask3 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vfmaddps256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vfmaddps256_round_maskz : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vfmaddps256_round_mask3 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vfmaddcph256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vfmaddcph256_round_maskz : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vfmaddcph256_round_mask3 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vfmaddsubpd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vfmaddsubpd256_round_maskz : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vfmaddsubpd256_round_mask3 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vfmaddsubph256_round_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vfmaddsubph256_round_maskz : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vfmaddsubph256_round_mask3 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vfmaddsubps256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vfmaddsubps256_round_maskz : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vfmaddsubps256_round_mask3 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vfmsubpd256_round_mask3 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vfmsubph256_round_mask3 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vfmsubps256_round_mask3 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vfmsubaddpd256_round_mask3 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vfmsubaddph256_round_mask3 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vfmsubaddps256_round_mask3 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vfmulcph256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vgetexppd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vgetexpph256_round_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vgetexpps256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vgetmantpd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vgetmantph256_round_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Constant int, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vgetmantps256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vmaxpd256_round : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
+  def vmaxph256_round : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Constant int)">;
+  def vmaxps256_round : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
+  def vminpd256_round : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
+  def vminph256_round : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Constant int)">;
+  def vminps256_round : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
+  def vmulpd256_round : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
+  def vmulph256_round : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Constant int)">;
+  def vmulps256_round : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
+  def vrangepd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vrangeps256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vreducepd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vreduceph256_round_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Constant int, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vreduceps256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vrndscalepd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vrndscaleph256_round_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Constant int, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vrndscaleps256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vscalefpd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>, unsigned char, _Constant int)">;
+  def vscalefph256_round_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+  def vscalefps256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>, unsigned char, _Constant int)">;
+  def vsqrtpd256_round : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
+  def vsqrtph256_round : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Constant int)">;
+  def vsqrtps256_round : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
+  def vsubpd256_round : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
+  def vsubph256_round : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Constant int)">;
+  def vsubps256_round : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
+}
+
+let Features = "avxvnniint16|avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vpdpwsud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avxvnniint16|avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vpdpwsud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avxvnniint16|avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vpdpwsuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avxvnniint16|avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vpdpwsuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avxvnniint16|avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vpdpwusd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avxvnniint16|avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vpdpwusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avxvnniint16|avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vpdpwusds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avxvnniint16|avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vpdpwusds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avxvnniint16|avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vpdpwuud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avxvnniint16|avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vpdpwuud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avxvnniint16|avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vpdpwuuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+}
+
+let Features = "avxvnniint16|avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vpdpwuuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvttsd2sis32 : X86Builtin<"int(_Vector<2, double>, _Constant int)">;
+  def vcvttsd2usis32 : X86Builtin<"unsigned int(_Vector<2, double>, _Constant int)">;
+  def vcvttss2sis32 : X86Builtin<"int(_Vector<4, float>, _Constant int)">;
+  def vcvttss2usis32 : X86Builtin<"unsigned int(_Vector<4, float>, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvttpd2dqs128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvttpd2dqs256_round_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvttpd2dqs512_round_mask : X86Builtin<"_Vector<8, int>(_Vector<8, double>, _Vector<8, int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvttpd2udqs128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvttpd2udqs256_round_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvttpd2udqs512_round_mask : X86Builtin<"_Vector<8, int>(_Vector<8, double>, _Vector<8, int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvttpd2qqs128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvttpd2qqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvttpd2qqs512_round_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, double>, _Vector<8, long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvttpd2uqqs128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvttpd2uqqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvttpd2uqqs512_round_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, double>, _Vector<8, long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvttps2dqs128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, float>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvttps2dqs256_round_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvttps2dqs512_round_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvttps2udqs128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, float>, _Vector<4, int>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvttps2udqs256_round_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvttps2udqs512_round_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvttps2qqs128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<4, float>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvttps2qqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvttps2qqs512_round_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, float>, _Vector<8, long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvttps2uqqs128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<4, float>, _Vector<2, long long int>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvttps2uqqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvttps2uqqs512_round_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, float>, _Vector<8, long long int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avxneconvert", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vbcstnebf162ps128 : X86Builtin<"_Vector<4, float>(__bf16 const *)">;
+}
+
+let Features = "avxneconvert", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vbcstnebf162ps256 : X86Builtin<"_Vector<8, float>(__bf16 const *)">;
+}
+
+let Features = "avxneconvert", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vbcstnesh2ps128 : X86Builtin<"_Vector<4, float>(_Float16 const *)">;
+}
+
+let Features = "avxneconvert", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vbcstnesh2ps256 : X86Builtin<"_Vector<8, float>(_Float16 const *)">;
+}
+
+let Features = "avxneconvert", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtneebf162ps128 : X86Builtin<"_Vector<4, float>(_Vector<8, __bf16 const *>)">;
+}
+
+let Features = "avxneconvert", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtneebf162ps256 : X86Builtin<"_Vector<8, float>(_Vector<16, __bf16 const *>)">;
+}
+
+let Features = "avxneconvert", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtneeph2ps128 : X86Builtin<"_Vector<4, float>(_Vector<8, _Float16 const *>)">;
+}
+
+let Features = "avxneconvert", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtneeph2ps256 : X86Builtin<"_Vector<8, float>(_Vector<16, _Float16 const *>)">;
+}
+
+let Features = "avxneconvert", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtneobf162ps128 : X86Builtin<"_Vector<4, float>(_Vector<8, __bf16 const *>)">;
+}
+
+let Features = "avxneconvert", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtneobf162ps256 : X86Builtin<"_Vector<8, float>(_Vector<16, __bf16 const *>)">;
+}
+
+let Features = "avxneconvert", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtneoph2ps128 : X86Builtin<"_Vector<4, float>(_Vector<8, _Float16 const *>)">;
+}
+
+let Features = "avxneconvert", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtneoph2ps256 : X86Builtin<"_Vector<8, float>(_Vector<16, _Float16 const *>)">;
+}
+
+let Features = "avx512bf16,avx512vl|avxneconvert", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtneps2bf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<4, float>)">;
+}
+
+let Features = "avx512bf16,avx512vl|avxneconvert", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtneps2bf16256 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, float>)">;
+}
+
+let Features = "sha512", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vsha512msg1 : X86Builtin<"_Vector<4, unsigned long long int>(_Vector<4, unsigned long long int>, _Vector<2, unsigned long long int>)">;
+  def vsha512msg2 : X86Builtin<"_Vector<4, unsigned long long int>(_Vector<4, unsigned long long int>, _Vector<4, unsigned long long int>)">;
+  def vsha512rnds2 : X86Builtin<"_Vector<4, unsigned long long int>(_Vector<4, unsigned long long int>, _Vector<4, unsigned long long int>, _Vector<2, unsigned long long int>)">;
+}
+
+let Header = "intrin.h", Languages = "ALL_MS_LANGUAGES", Attributes = [NoThrow, RequireDeclaration] in {
+  def _InterlockedAnd64 : X86LibBuiltin<"int64_t(int64_t volatile *, int64_t)">;
+  def _InterlockedDecrement64 : X86LibBuiltin<"int64_t(int64_t volatile *)">;
+  def _InterlockedExchange64 : X86LibBuiltin<"int64_t(int64_t volatile *, int64_t)">;
+  def _InterlockedExchangeAdd64 : X86LibBuiltin<"int64_t(int64_t volatile *, int64_t)">;
+  def _InterlockedExchangeSub64 : X86LibBuiltin<"int64_t(int64_t volatile *, int64_t)">;
+  def _InterlockedIncrement64 : X86LibBuiltin<"int64_t(int64_t volatile *)">;
+  def _InterlockedOr64 : X86LibBuiltin<"int64_t(int64_t volatile *, int64_t)">;
+  def _InterlockedXor64 : X86LibBuiltin<"int64_t(int64_t volatile *, int64_t)">;
+}
+
+let Features = "sm3", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vsm3msg1 : X86Builtin<"_Vector<4, unsigned int>(_Vector<4, unsigned int>, _Vector<4, unsigned int>, _Vector<4, unsigned int>)">;
+  def vsm3msg2 : X86Builtin<"_Vector<4, unsigned int>(_Vector<4, unsigned int>, _Vector<4, unsigned int>, _Vector<4, unsigned int>)">;
+  def vsm3rnds2 : X86Builtin<"_Vector<4, unsigned int>(_Vector<4, unsigned int>, _Vector<4, unsigned int>, _Vector<4, unsigned int>, _Constant unsigned int)">;
+}
+
+let Features = "sm4", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vsm4key4128 : X86Builtin<"_Vector<4, unsigned int>(_Vector<4, unsigned int>, _Vector<4, unsigned int>)">;
+}
+
+let Features = "sm4", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vsm4key4256 : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, unsigned int>, _Vector<8, unsigned int>)">;
+}
+
+let Features = "sm4", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vsm4rnds4128 : X86Builtin<"_Vector<4, unsigned int>(_Vector<4, unsigned int>, _Vector<4, unsigned int>)">;
+}
+
+let Features = "sm4", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vsm4rnds4256 : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, unsigned int>, _Vector<8, unsigned int>)">;
+}
+
+let Features = "avx10.2-512,sm4", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vsm4key4512 : X86Builtin<"_Vector<16, unsigned int>(_Vector<16, unsigned int>, _Vector<16, unsigned int>)">;
+  def vsm4rnds4512 : X86Builtin<"_Vector<16, unsigned int>(_Vector<16, unsigned int>, _Vector<16, unsigned int>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vminmaxnepbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vminmaxnepbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vminmaxnepbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vminmaxpd128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int, _Vector<2, double>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vminmaxpd256_round_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int, _Vector<4, double>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vminmaxpd512_round_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vminmaxph128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Constant int, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vminmaxph256_round_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Constant int, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vminmaxph512_round_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vminmaxps128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vminmaxps256_round_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int, _Vector<8, float>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vminmaxps512_round_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vminmaxsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int, _Vector<2, double>, unsigned char, _Constant int)">;
+  def vminmaxsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Constant int, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+  def vminmaxss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int, _Vector<4, float>, unsigned char, _Constant int)">;
+  def vcvtnebf162ibs128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtnebf162ibs256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, __bf16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtnebf162ibs512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtnebf162iubs128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtnebf162iubs256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, __bf16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtnebf162iubs512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtph2ibs128_mask : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, _Float16>, _Vector<8, unsigned short>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtph2ibs256_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtph2ibs512_mask : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, _Float16>, _Vector<32, unsigned short>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtph2iubs128_mask : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, _Float16>, _Vector<8, unsigned short>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtph2iubs256_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtph2iubs512_mask : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, _Float16>, _Vector<32, unsigned short>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtps2ibs128_mask : X86Builtin<"_Vector<4, unsigned int>(_Vector<4, float>, _Vector<4, unsigned int>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtps2ibs256_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, float>, _Vector<8, unsigned int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtps2ibs512_mask : X86Builtin<"_Vector<16, unsigned int>(_Vector<16, float>, _Vector<16, unsigned int>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtps2iubs128_mask : X86Builtin<"_Vector<4, unsigned int>(_Vector<4, float>, _Vector<4, unsigned int>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtps2iubs256_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, float>, _Vector<8, unsigned int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtps2iubs512_mask : X86Builtin<"_Vector<16, unsigned int>(_Vector<16, float>, _Vector<16, unsigned int>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvttnebf162ibs128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvttnebf162ibs256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, __bf16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvttnebf162ibs512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvttnebf162iubs128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvttnebf162iubs256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, __bf16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvttnebf162iubs512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvttph2ibs128_mask : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, _Float16>, _Vector<8, unsigned short>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvttph2ibs256_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvttph2ibs512_mask : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, _Float16>, _Vector<32, unsigned short>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvttph2iubs128_mask : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, _Float16>, _Vector<8, unsigned short>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvttph2iubs256_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvttph2iubs512_mask : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, _Float16>, _Vector<32, unsigned short>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvttps2ibs128_mask : X86Builtin<"_Vector<4, unsigned int>(_Vector<4, float>, _Vector<4, unsigned int>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvttps2ibs256_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, float>, _Vector<8, unsigned int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvttps2ibs512_mask : X86Builtin<"_Vector<16, unsigned int>(_Vector<16, float>, _Vector<16, unsigned int>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvttps2iubs128_mask : X86Builtin<"_Vector<4, unsigned int>(_Vector<4, float>, _Vector<4, unsigned int>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvttps2iubs256_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, float>, _Vector<8, unsigned int>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvttps2iubs512_mask : X86Builtin<"_Vector<16, unsigned int>(_Vector<16, float>, _Vector<16, unsigned int>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvt2ps2phx128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<4, float>, _Vector<4, float>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcvt2ps2phx256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<8, float>, _Vector<8, float>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcvt2ps2phx512_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<16, float>, _Vector<16, float>, _Vector<32, _Float16>, unsigned int, _Constant int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtbiasph2bf8_128_mask : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<8, _Float16>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtbiasph2bf8_256_mask : X86Builtin<"_Vector<16, char>(_Vector<32, char>, _Vector<16, _Float16>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtbiasph2bf8_512_mask : X86Builtin<"_Vector<32, char>(_Vector<64, char>, _Vector<32, _Float16>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtbiasph2bf8s_128_mask : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<8, _Float16>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtbiasph2bf8s_256_mask : X86Builtin<"_Vector<16, char>(_Vector<32, char>, _Vector<16, _Float16>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtbiasph2bf8s_512_mask : X86Builtin<"_Vector<32, char>(_Vector<64, char>, _Vector<32, _Float16>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtbiasph2hf8_128_mask : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<8, _Float16>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtbiasph2hf8_256_mask : X86Builtin<"_Vector<16, char>(_Vector<32, char>, _Vector<16, _Float16>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtbiasph2hf8_512_mask : X86Builtin<"_Vector<32, char>(_Vector<64, char>, _Vector<32, _Float16>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtbiasph2hf8s_128_mask : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<8, _Float16>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtbiasph2hf8s_256_mask : X86Builtin<"_Vector<16, char>(_Vector<32, char>, _Vector<16, _Float16>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtbiasph2hf8s_512_mask : X86Builtin<"_Vector<32, char>(_Vector<64, char>, _Vector<32, _Float16>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtne2ph2bf8_128 : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<8, _Float16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtne2ph2bf8_256 : X86Builtin<"_Vector<32, char>(_Vector<16, _Float16>, _Vector<16, _Float16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtne2ph2bf8_512 : X86Builtin<"_Vector<64, char>(_Vector<32, _Float16>, _Vector<32, _Float16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtne2ph2bf8s_128 : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<8, _Float16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtne2ph2bf8s_256 : X86Builtin<"_Vector<32, char>(_Vector<16, _Float16>, _Vector<16, _Float16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtne2ph2bf8s_512 : X86Builtin<"_Vector<64, char>(_Vector<32, _Float16>, _Vector<32, _Float16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtne2ph2hf8_128 : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<8, _Float16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtne2ph2hf8_256 : X86Builtin<"_Vector<32, char>(_Vector<16, _Float16>, _Vector<16, _Float16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtne2ph2hf8_512 : X86Builtin<"_Vector<64, char>(_Vector<32, _Float16>, _Vector<32, _Float16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtne2ph2hf8s_128 : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<8, _Float16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtne2ph2hf8s_256 : X86Builtin<"_Vector<32, char>(_Vector<16, _Float16>, _Vector<16, _Float16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtne2ph2hf8s_512 : X86Builtin<"_Vector<64, char>(_Vector<32, _Float16>, _Vector<32, _Float16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvthf8_2ph128_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<16, char>, _Vector<8, _Float16>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvthf8_2ph256_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, char>, _Vector<16, _Float16>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvthf8_2ph512_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<32, char>, _Vector<32, _Float16>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtneph2bf8_128_mask : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtneph2bf8_256_mask : X86Builtin<"_Vector<16, char>(_Vector<16, _Float16>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtneph2bf8_512_mask : X86Builtin<"_Vector<32, char>(_Vector<32, _Float16>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtneph2bf8s_128_mask : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtneph2bf8s_256_mask : X86Builtin<"_Vector<16, char>(_Vector<16, _Float16>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtneph2bf8s_512_mask : X86Builtin<"_Vector<32, char>(_Vector<32, _Float16>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtneph2hf8_128_mask : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtneph2hf8_256_mask : X86Builtin<"_Vector<16, char>(_Vector<16, _Float16>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtneph2hf8_512_mask : X86Builtin<"_Vector<32, char>(_Vector<32, _Float16>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vcvtneph2hf8s_128_mask : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<16, char>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vcvtneph2hf8s_256_mask : X86Builtin<"_Vector<16, char>(_Vector<16, _Float16>, _Vector<16, char>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vcvtneph2hf8s_512_mask : X86Builtin<"_Vector<32, char>(_Vector<32, _Float16>, _Vector<32, char>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def loadsbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16 const *>, _Vector<8, __bf16>, unsigned char)">;
+  def storesbf16128_mask : X86Builtin<"void(_Vector<8, __bf16 *>, _Vector<8, __bf16>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vaddnepbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vaddnepbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vaddnepbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vdivnepbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vdivnepbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vdivnepbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vmaxpbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vmaxpbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vmaxpbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vminpbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vminpbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vminpbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vmulnepbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vmulnepbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vmulnepbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vsubnepbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vsubnepbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vsubnepbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcomsbf16eq : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vcomsbf16lt : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vcomsbf16neq : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vcomsbf16ge : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vcomsbf16gt : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vcomsbf16le : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vcmppbf16512_mask : X86Builtin<"unsigned int(_Vector<32, __bf16>, _Vector<32, __bf16>, _Constant int, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vcmppbf16256_mask : X86Builtin<"unsigned short(_Vector<16, __bf16>, _Vector<16, __bf16>, _Constant int, unsigned short)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcmppbf16128_mask : X86Builtin<"unsigned char(_Vector<8, __bf16>, _Vector<8, __bf16>, _Constant int, unsigned char)">;
+  def vfpclasspbf16128_mask : X86Builtin<"unsigned char(_Vector<8, __bf16>, _Constant int, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vfpclasspbf16256_mask : X86Builtin<"unsigned short(_Vector<16, __bf16>, _Constant int, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vfpclasspbf16512_mask : X86Builtin<"unsigned int(_Vector<32, __bf16>, _Constant int, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vscalefpbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, _Vector<8, __bf16>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vscalefpbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, _Vector<16, __bf16>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vscalefpbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, _Vector<32, __bf16>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vrcppbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vrcppbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vrcppbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vgetexppbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vgetexppbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vgetexppbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vrsqrtpbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vrsqrtpbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vrsqrtpbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vreducenepbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Constant int, _Vector<8, __bf16>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vreducenepbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Constant int, _Vector<16, __bf16>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vreducenepbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Constant int, _Vector<32, __bf16>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vrndscalenepbf16_128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Constant int, _Vector<8, __bf16>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vrndscalenepbf16_256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Constant int, _Vector<16, __bf16>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vrndscalenepbf16_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Constant int, _Vector<32, __bf16>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vgetmantpbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Constant int, _Vector<8, __bf16>, unsigned char)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vgetmantpbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Constant int, _Vector<16, __bf16>, unsigned short)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vgetmantpbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Constant int, _Vector<32, __bf16>, unsigned int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vsqrtnepbf16 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vsqrtnepbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>)">;
+}
+
+let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+  def vsqrtnepbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">;
+  def vfmaddnepbh512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, _Vector<32, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vfmaddnepbh256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, _Vector<16, __bf16>)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vfmaddnepbh128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
+}
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index a14fd2c4b224d8..556332dd4b217b 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -124,8 +124,6 @@ namespace clang {
   enum {
     LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
 #define BUILTIN(ID, TYPE, ATTRS) BI##ID,
-#include "clang/Basic/BuiltinsX86.def"
-#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
 #include "clang/Basic/BuiltinsX86.inc"
     FirstX86_64Builtin,
     LastX86CommonBuiltin = FirstX86_64Builtin - 1,
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 1b16888a0711b5..7e5a5c78aa6b58 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -24,14 +24,6 @@ namespace clang {
 namespace targets {
 
 static constexpr Builtin::Info BuiltinInfoX86[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE)         \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS},
-#include "clang/Basic/BuiltinsX86.def"
-
 #define BUILTIN(ID, TYPE, ATTRS)                                               \
   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
diff --git a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
index 6c3604adc92b99..b08b02d9dcecaa 100644
--- a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
+++ b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
@@ -25,6 +25,7 @@ enum class BuiltinType {
   LibBuiltin,
   LangBuiltin,
   TargetBuiltin,
+  TargetLibBuiltin,
 };
 
 class PrototypeParser {
@@ -108,9 +109,15 @@ class PrototypeParser {
     } else if (T.consume_back("&")) {
       ParseType(T);
       Type += "&";
+    } else if (T.consume_front("long long")) {
+      Type += "O";
+      ParseType(T);
     } else if (T.consume_front("long")) {
       Type += "L";
       ParseType(T);
+    } else if (T.consume_front("signed")) {
+      Type += "S";
+      ParseType(T);
     } else if (T.consume_front("unsigned")) {
       Type += "U";
       ParseType(T);
@@ -155,6 +162,7 @@ class PrototypeParser {
                                .Case("__fp16", "h")
                                .Case("__int128_t", "LLLi")
                                .Case("_Float16", "x")
+                               .Case("__bf16", "y")
                                .Case("bool", "b")
                                .Case("char", "c")
                                .Case("constant_CFString", "F")
@@ -262,6 +270,9 @@ void EmitBuiltinDef(raw_ostream &OS, StringRef Substitution,
   case BuiltinType::TargetBuiltin:
     OS << "TARGET_BUILTIN";
     break;
+  case BuiltinType::TargetLibBuiltin:
+    OS << "TARGET_HEADER_BUILTIN";
+    break;
   }
 
   OS << "(" << Spelling;
@@ -279,6 +290,12 @@ void EmitBuiltinDef(raw_ostream &OS, StringRef Substitution,
     OS << ", " << Builtin->getValueAsString("Languages");
     break;
   }
+  case BuiltinType::TargetLibBuiltin: {
+    OS << ", ";
+    HeaderNameParser{Builtin}.Print(OS);
+    OS << ", " << Builtin->getValueAsString("Languages");
+    [[fallthrough]];
+  }
   case BuiltinType::TargetBuiltin:
     OS << ", \"" << Builtin->getValueAsString("Features") << "\"";
     break;
@@ -331,6 +348,8 @@ void EmitBuiltin(raw_ostream &OS, const Record *Builtin) {
         BT = BuiltinType::AtomicBuiltin;
       } else if (Builtin->isSubClassOf("LangBuiltin")) {
         BT = BuiltinType::LangBuiltin;
+      } else if (Builtin->isSubClassOf("TargetLibBuiltin")) {
+        BT = BuiltinType::TargetLibBuiltin;
       } else if (Builtin->isSubClassOf("TargetBuiltin")) {
         BT = BuiltinType::TargetBuiltin;
       } else if (Builtin->isSubClassOf("LibBuiltin")) {
@@ -367,6 +386,10 @@ void clang::EmitClangBuiltins(const RecordKeeper &Records, raw_ostream &OS) {
 #if defined(BUILTIN) && !defined(TARGET_BUILTIN)
 #  define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
 #endif
+
+#if defined(BUILTIN) && !defined(TARGET_HEADER_BUILTIN)
+#  define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANG, FEATURE) BUILTIN(ID, TYPE, ATTRS)
+#endif
 )c++";
 
   // AtomicBuiltins are order dependent
@@ -390,5 +413,6 @@ void clang::EmitClangBuiltins(const RecordKeeper &Records, raw_ostream &OS) {
 #undef LIBBUILTIN
 #undef LANGBUILTIN
 #undef TARGET_BUILTIN
+#undef TARGET_HEADER_BUILTIN
 )c++";
 }

>From 37ecefb81179a9f721aad09582444910cafd0914 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc at gmail.com>
Date: Tue, 24 Dec 2024 08:41:49 +0000
Subject: [PATCH 02/10] Bulk port 64-bit x86 builtins to TableGen

This PR follows #120831 (the PR contains both, only review the last
commit here as the other commit will be reviewed on the other PR).

Similar to that PR, this does a very mechanical port of X86 builtins to
TableGen. There is a *lot* of improvement available here to use TableGen
more effectively and collapse repeated structures. But those can now be
follow-up PRs that restructure *within* the `.td` file.

The current structure produces a file that exactly matches the original
X-macros except for the differences outlined in #120831:

- Horizontal whitespace
- `long long` types now use `long long` outside of OpenCL, but switch to
  `long` in OpenCL (if relevant at all).

Otherwise, only the order of builtins change, and no tests regress.
---
 clang/include/clang/Basic/BuiltinsX86.td     |  17 +-
 clang/include/clang/Basic/BuiltinsX86Base.td |  28 ++
 clang/include/clang/Basic/BuiltinsX86_64.td  | 485 +++++++++++++++++++
 clang/include/clang/Basic/CMakeLists.txt     |   4 +
 clang/include/clang/Basic/TargetBuiltins.h   |   2 +-
 clang/lib/Basic/Targets/X86.cpp              |   2 +-
 6 files changed, 520 insertions(+), 18 deletions(-)
 create mode 100644 clang/include/clang/Basic/BuiltinsX86Base.td
 create mode 100644 clang/include/clang/Basic/BuiltinsX86_64.td

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index be2802f3908ff3..1f134ce24108ef 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -10,22 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-include "clang/Basic/BuiltinsBase.td"
-
-class X86Builtin<string prototype> : TargetBuiltin {
-  let Spellings = ["__builtin_ia32_" # NAME];
-  let Prototype = prototype;
-}
-
-class X86NoPrefixBuiltin<string prototype> : TargetBuiltin {
-  let Spellings = [NAME];
-  let Prototype = prototype;
-}
-
-class X86LibBuiltin<string prototype> : TargetLibBuiltin {
-  let Spellings = [NAME];
-  let Prototype = prototype;
-}
+include "clang/Basic/BuiltinsX86Base.td"
 
 def rdpmc : X86Builtin<"unsigned long long int(int)">;
 def rdtsc : X86Builtin<"unsigned long long int()">;
diff --git a/clang/include/clang/Basic/BuiltinsX86Base.td b/clang/include/clang/Basic/BuiltinsX86Base.td
new file mode 100644
index 00000000000000..b92b3f3fe0c77a
--- /dev/null
+++ b/clang/include/clang/Basic/BuiltinsX86Base.td
@@ -0,0 +1,28 @@
+//===--- BuiltinsX86Base.td - X86 Builtin function classes ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86-specific builtin function classes.
+//
+//===----------------------------------------------------------------------===//
+
+include "clang/Basic/BuiltinsBase.td"
+
+class X86Builtin<string prototype> : TargetBuiltin {
+  let Spellings = ["__builtin_ia32_" # NAME];
+  let Prototype = prototype;
+}
+
+class X86NoPrefixBuiltin<string prototype> : TargetBuiltin {
+  let Spellings = [NAME];
+  let Prototype = prototype;
+}
+
+class X86LibBuiltin<string prototype> : TargetLibBuiltin {
+  let Spellings = [NAME];
+  let Prototype = prototype;
+}
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.td b/clang/include/clang/Basic/BuiltinsX86_64.td
new file mode 100644
index 00000000000000..a6c6ef80eac212
--- /dev/null
+++ b/clang/include/clang/Basic/BuiltinsX86_64.td
@@ -0,0 +1,485 @@
+//===--- BuiltinsX86_64.td - X86-64 Builtin function database ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86-64-specific builtin function database.
+//
+//===----------------------------------------------------------------------===//
+
+include "clang/Basic/BuiltinsX86Base.td"
+
+let Header = "intrin.h", Languages = "ALL_MS_LANGUAGES", Attributes = [NoThrow, RequireDeclaration] in {
+  def _BitScanForward64 : X86LibBuiltin<"unsigned char(msuint32_t *, unsigned long long int)">;
+  def _BitScanReverse64 : X86LibBuiltin<"unsigned char(msuint32_t *, unsigned long long int)">;
+}
+
+let Header = "intrin.h", Languages = "ALL_MS_LANGUAGES", Attributes = [NoThrow, Const, RequireDeclaration] in {
+  def __mulh : X86LibBuiltin<"long long int(long long int, long long int)">;
+  def __umulh : X86LibBuiltin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
+  def _mul128 : X86LibBuiltin<"long long int(long long int, long long int, long long int *)">;
+  def _umul128 : X86LibBuiltin<"unsigned long long int(unsigned long long int, unsigned long long int, unsigned long long int *)">;
+}
+
+let Header = "intrin.h", Languages = "ALL_MS_LANGUAGES", Attributes = [NoThrow, RequireDeclaration] in {
+  def __faststorefence : X86LibBuiltin<"void()">;
+}
+
+let Header = "intrin.h", Languages = "ALL_MS_LANGUAGES", Attributes = [NoThrow, Const, RequireDeclaration] in {
+  def __shiftleft128 : X86LibBuiltin<"unsigned long long int(unsigned long long int, unsigned long long int, unsigned char)">;
+  def __shiftright128 : X86LibBuiltin<"unsigned long long int(unsigned long long int, unsigned long long int, unsigned char)">;
+}
+
+let Features = "cx16", Header = "intrin.h", Languages = "ALL_MS_LANGUAGES", Attributes = [NoThrow, RequireDeclaration] in {
+  def _InterlockedCompareExchange128 : X86LibBuiltin<"unsigned char(long long int volatile *, long long int, long long int, long long int *)">;
+}
+
+let Attributes = [NoThrow] in {
+  def readeflags_u64 : X86Builtin<"unsigned long long int()">;
+  def writeeflags_u64 : X86Builtin<"void(unsigned long long int)">;
+}
+
+let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtss2si64 : X86Builtin<"long long int(_Vector<4, float>)">;
+  def cvttss2si64 : X86Builtin<"long long int(_Vector<4, float>)">;
+}
+
+let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def cvtsd2si64 : X86Builtin<"long long int(_Vector<2, double>)">;
+  def cvttsd2si64 : X86Builtin<"long long int(_Vector<2, double>)">;
+}
+
+let Features = "sse2", Attributes = [NoThrow] in {
+  def movnti64 : X86Builtin<"void(long long int *, long long int)">;
+}
+
+let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vec_set_v2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, long long int, _Constant int)">;
+}
+
+let Features = "crc32", Attributes = [NoThrow, Const] in {
+  def crc32di : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vec_ext_v4di : X86Builtin<"long long int(_Vector<4, long long int>, _Constant int)">;
+  def vec_set_v4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, long long int, _Constant int)">;
+}
+
+let Features = "fsgsbase", Attributes = [NoThrow] in {
+  def rdfsbase32 : X86Builtin<"unsigned int()">;
+  def rdfsbase64 : X86Builtin<"unsigned long long int()">;
+  def rdgsbase32 : X86Builtin<"unsigned int()">;
+  def rdgsbase64 : X86Builtin<"unsigned long long int()">;
+  def wrfsbase32 : X86Builtin<"void(unsigned int)">;
+  def wrfsbase64 : X86Builtin<"void(unsigned long long int)">;
+  def wrgsbase32 : X86Builtin<"void(unsigned int)">;
+  def wrgsbase64 : X86Builtin<"void(unsigned long long int)">;
+}
+
+let Features = "fxsr", Attributes = [NoThrow] in {
+  def fxrstor64 : X86Builtin<"void(void *)">;
+  def fxsave64 : X86Builtin<"void(void *)">;
+}
+
+let Features = "xsave", Attributes = [NoThrow] in {
+  def xsave64 : X86Builtin<"void(void *, unsigned long long int)">;
+  def xrstor64 : X86Builtin<"void(void *, unsigned long long int)">;
+}
+
+let Features = "xsaveopt", Attributes = [NoThrow] in {
+  def xsaveopt64 : X86Builtin<"void(void *, unsigned long long int)">;
+}
+
+let Features = "xsaves", Attributes = [NoThrow] in {
+  def xrstors64 : X86Builtin<"void(void *, unsigned long long int)">;
+}
+
+let Features = "xsavec", Attributes = [NoThrow] in {
+  def xsavec64 : X86Builtin<"void(void *, unsigned long long int)">;
+}
+
+let Features = "xsaves", Attributes = [NoThrow] in {
+  def xsaves64 : X86Builtin<"void(void *, unsigned long long int)">;
+}
+
+let Features = "shstk", Attributes = [NoThrow] in {
+  def incsspq : X86Builtin<"void(unsigned long long int)">;
+  def rdsspq : X86Builtin<"unsigned long long int(unsigned long long int)">;
+  def wrssq : X86Builtin<"void(unsigned long long int, void *)">;
+  def wrussq : X86Builtin<"void(unsigned long long int, void *)">;
+}
+
+let Attributes = [NoThrow, Constexpr] in {
+  def addcarryx_u64 : X86Builtin<"unsigned char(unsigned char, unsigned long long int, unsigned long long int, unsigned long long int *)">;
+  def subborrow_u64 : X86Builtin<"unsigned char(unsigned char, unsigned long long int, unsigned long long int, unsigned long long int *)">;
+}
+
+let Features = "rdrnd", Attributes = [NoThrow] in {
+  def rdrand64_step : X86Builtin<"unsigned int(unsigned long long int *)">;
+}
+
+let Features = "rdseed", Attributes = [NoThrow] in {
+  def rdseed64_step : X86Builtin<"unsigned int(unsigned long long int *)">;
+}
+
+let Features = "lzcnt", Attributes = [NoThrow, Const, Constexpr] in {
+  def lzcnt_u64 : X86Builtin<"unsigned long long int(unsigned long long int)">;
+}
+
+let Features = "bmi", Attributes = [NoThrow, Const, Constexpr] in {
+  def bextr_u64 : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
+}
+
+let Attributes = [NoThrow, Const, Constexpr] in {
+  def tzcnt_u64 : X86Builtin<"unsigned long long int(unsigned long long int)">;
+}
+
+let Features = "bmi2", Attributes = [NoThrow, Const, Constexpr] in {
+  def bzhi_di : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
+  def pdep_di : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
+  def pext_di : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
+}
+
+let Features = "tbm", Attributes = [NoThrow, Const, Constexpr] in {
+  def bextri_u64 : X86Builtin<"unsigned long long int(unsigned long long int, _Constant unsigned long long int)">;
+}
+
+let Features = "lwp", Attributes = [NoThrow] in {
+  def lwpins64 : X86Builtin<"unsigned char(unsigned long long int, unsigned int, _Constant unsigned int)">;
+  def lwpval64 : X86Builtin<"void(unsigned long long int, unsigned int, _Constant unsigned int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtsd2si64 : X86Builtin<"long long int(_Vector<2, double>, _Constant int)">;
+  def vcvtsd2usi64 : X86Builtin<"unsigned long long int(_Vector<2, double>, _Constant int)">;
+  def vcvtss2si64 : X86Builtin<"long long int(_Vector<4, float>, _Constant int)">;
+  def vcvtss2usi64 : X86Builtin<"unsigned long long int(_Vector<4, float>, _Constant int)">;
+  def vcvttsd2si64 : X86Builtin<"long long int(_Vector<2, double>, _Constant int)">;
+  def vcvttsd2usi64 : X86Builtin<"unsigned long long int(_Vector<2, double>, _Constant int)">;
+  def vcvttss2si64 : X86Builtin<"long long int(_Vector<4, float>, _Constant int)">;
+  def vcvttss2usi64 : X86Builtin<"unsigned long long int(_Vector<4, float>, _Constant int)">;
+  def cvtsi2sd64 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, long long int, _Constant int)">;
+  def cvtsi2ss64 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, long long int, _Constant int)">;
+  def cvtusi2sd64 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, unsigned long long int, _Constant int)">;
+  def cvtusi2ss64 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned long long int, _Constant int)">;
+}
+
+let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvtsh2si64 : X86Builtin<"long long int(_Vector<8, _Float16>, _Constant int)">;
+  def vcvtsh2usi64 : X86Builtin<"unsigned long long int(_Vector<8, _Float16>, _Constant int)">;
+  def vcvtusi642sh : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, unsigned long long int, _Constant int)">;
+  def vcvtsi642sh : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, long long int, _Constant int)">;
+  def vcvttsh2si64 : X86Builtin<"long long int(_Vector<8, _Float16>, _Constant int)">;
+  def vcvttsh2usi64 : X86Builtin<"unsigned long long int(_Vector<8, _Float16>, _Constant int)">;
+}
+
+let Features = "movdiri", Attributes = [NoThrow] in {
+  def directstore_u64 : X86Builtin<"void(unsigned long int *, unsigned long int)">;
+}
+
+let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vcvttsd2sis64 : X86Builtin<"long long int(_Vector<2, double>, _Constant int)">;
+  def vcvttsd2usis64 : X86Builtin<"unsigned long long int(_Vector<2, double>, _Constant int)">;
+  def vcvttss2sis64 : X86Builtin<"long long int(_Vector<4, float>, _Constant int)">;
+  def vcvttss2usis64 : X86Builtin<"unsigned long long int(_Vector<4, float>, _Constant int)">;
+}
+
+let Features = "uintr", Attributes = [NoThrow] in {
+  def clui : X86Builtin<"void()">;
+  def stui : X86Builtin<"void()">;
+  def testui : X86Builtin<"unsigned char()">;
+  def senduipi : X86Builtin<"void(uint64_t)">;
+}
+
+let Features = "usermsr", Attributes = [NoThrow] in {
+  def urdmsr : X86Builtin<"unsigned long long int(unsigned long long int)">;
+  def uwrmsr : X86Builtin<"void(unsigned long long int, unsigned long long int)">;
+}
+
+let Features = "amx-tile", Attributes = [NoThrow] in {
+  def tile_loadconfig_internal : X86Builtin<"void(void const *)">;
+  def tileloadd64_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, void const *, size_t)">;
+}
+
+let Features = "amx-movrs", Attributes = [NoThrow] in {
+  def tileloaddrs64_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, void const *, size_t)">;
+}
+
+let Features = "amx-tile", Attributes = [NoThrow] in {
+  def tileloaddt164_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, void const *, size_t)">;
+}
+
+let Features = "amx-movrs", Attributes = [NoThrow] in {
+  def tileloaddrst164_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, void const *, size_t)">;
+}
+
+let Features = "amx-int8", Attributes = [NoThrow] in {
+  def tdpbssd_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+  def tdpbsud_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+  def tdpbusd_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+  def tdpbuud_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+}
+
+let Features = "amx-tile", Attributes = [NoThrow] in {
+  def tilestored64_internal : X86Builtin<"void(unsigned short, unsigned short, void *, size_t, _Vector<256, int>)">;
+  def tilezero_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short)">;
+}
+
+let Features = "amx-bf16", Attributes = [NoThrow] in {
+  def tdpbf16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+}
+
+let Features = "amx-fp16", Attributes = [NoThrow] in {
+  def tdpfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+}
+
+let Features = "amx-complex", Attributes = [NoThrow] in {
+  def tcmmimfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+  def tcmmrlfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+}
+
+let Features = "amx-transpose", Attributes = [NoThrow] in {
+  def t2rpntlvwz0_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
+}
+
+let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in {
+  def t2rpntlvwz0rs_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
+}
+
+let Features = "amx-transpose", Attributes = [NoThrow] in {
+  def t2rpntlvwz0t1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
+}
+
+let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in {
+  def t2rpntlvwz0rst1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
+}
+
+let Features = "amx-transpose", Attributes = [NoThrow] in {
+  def t2rpntlvwz1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
+}
+
+let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in {
+  def t2rpntlvwz1rs_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
+}
+
+let Features = "amx-transpose", Attributes = [NoThrow] in {
+  def t2rpntlvwz1t1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
+}
+
+let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in {
+  def t2rpntlvwz1rst1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
+}
+
+let Features = "amx-transpose", Attributes = [NoThrow] in {
+  def ttransposed_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, _Vector<256, int>)">;
+}
+
+let Features = "amx-bf16,amx-transpose", Attributes = [NoThrow] in {
+  def ttdpbf16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+}
+
+let Features = "amx-fp16,amx-transpose", Attributes = [NoThrow] in {
+  def ttdpfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+}
+
+let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in {
+  def ttcmmimfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+  def ttcmmrlfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+  def tconjtcmmimfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+  def tconjtfp16_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, _Vector<256, int>)">;
+}
+
+let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in {
+  def tcvtrowd2ps_internal : X86Builtin<"_Vector<16, float>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
+  def tcvtrowps2pbf16h_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
+  def tcvtrowps2pbf16l_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
+  def tcvtrowps2phh_internal : X86Builtin<"_Vector<32, _Float16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
+  def tcvtrowps2phl_internal : X86Builtin<"_Vector<32, _Float16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
+  def tilemovrow_internal : X86Builtin<"_Vector<16, int>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
+}
+
+let Features = "amx-tf32", Attributes = [NoThrow] in {
+  def tmmultf32ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+}
+
+let Features = "amx-tf32,amx-transpose", Attributes = [NoThrow] in {
+  def ttmmultf32ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+}
+
+let Features = "amx-fp8", Attributes = [NoThrow] in {
+  def tdpbf8ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+  def tdpbhf8ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+  def tdphbf8ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+  def tdphf8ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
+}
+
+let Features = "amx-tile", Attributes = [NoThrow] in {
+  def tile_loadconfig : X86Builtin<"void(void const *)">;
+  def tile_storeconfig : X86Builtin<"void(void const *)">;
+  def tilerelease : X86Builtin<"void()">;
+  def tilezero : X86Builtin<"void(unsigned char)">;
+}
+
+let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in {
+  def t2rpntlvwz0rs : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
+  def t2rpntlvwz0rst1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
+  def t2rpntlvwz1rs : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
+  def t2rpntlvwz1rst1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
+}
+
+let Features = "amx-movrs", Attributes = [NoThrow] in {
+  def tileloaddrs64 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
+  def tileloaddrst164 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
+}
+
+let Features = "amx-tile", Attributes = [NoThrow] in {
+  def tileloadd64 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
+  def tileloaddt164 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
+  def tilestored64 : X86Builtin<"void(_Constant unsigned char, void *, size_t)">;
+}
+
+let Features = "amx-int8", Attributes = [NoThrow] in {
+  def tdpbssd : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
+  def tdpbsud : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
+  def tdpbusd : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
+  def tdpbuud : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
+}
+
+let Features = "amx-bf16", Attributes = [NoThrow] in {
+  def tdpbf16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
+}
+
+let Features = "ptwrite", Attributes = [NoThrow] in {
+  def ptwrite64 : X86Builtin<"void(unsigned long long int)">;
+}
+
+let Features = "amx-complex", Attributes = [NoThrow] in {
+  def tcmmimfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
+  def tcmmrlfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
+}
+
+let Features = "amx-transpose", Attributes = [NoThrow] in {
+  def t2rpntlvwz0 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
+  def t2rpntlvwz0t1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
+  def t2rpntlvwz1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
+  def t2rpntlvwz1t1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
+  def ttransposed : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char)">;
+}
+
+let Features = "amx-bf16,amx-transpose", Attributes = [NoThrow] in {
+  def ttdpbf16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
+}
+
+let Features = "amx-fp16,amx-transpose", Attributes = [NoThrow] in {
+  def ttdpfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
+}
+
+let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in {
+  def ttcmmimfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
+  def ttcmmrlfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
+  def tconjtcmmimfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
+  def tconjtfp16 : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char)">;
+}
+
+let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in {
+  def tcvtrowd2ps : X86Builtin<"_Vector<16, float>(_Constant unsigned char, unsigned int)">;
+  def tcvtrowps2pbf16h : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
+  def tcvtrowps2pbf16l : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
+  def tcvtrowps2phh : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned char, unsigned int)">;
+  def tcvtrowps2phl : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned char, unsigned int)">;
+  def tilemovrow : X86Builtin<"_Vector<16, int>(_Constant unsigned char, unsigned int)">;
+}
+
+let Features = "amx-fp16", Attributes = [NoThrow] in {
+  def tdpfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
+}
+
+let Features = "amx-fp8", Attributes = [NoThrow] in {
+  def tdpbf8ps : X86Builtin<"void(_Constant unsigned char, unsigned _Constant char, unsigned _Constant char)">;
+  def tdpbhf8ps : X86Builtin<"void(_Constant unsigned char, unsigned _Constant char, unsigned _Constant char)">;
+  def tdphbf8ps : X86Builtin<"void(_Constant unsigned char, unsigned _Constant char, unsigned _Constant char)">;
+  def tdphf8ps : X86Builtin<"void(_Constant unsigned char, unsigned _Constant char, unsigned _Constant char)">;
+}
+
+let Features = "amx-tf32", Attributes = [NoThrow] in {
+  def tmmultf32ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
+}
+
+let Features = "amx-tf32,amx-transpose", Attributes = [NoThrow] in {
+  def ttmmultf32ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
+}
+
+let Features = "prefetchi", Attributes = [NoThrow, Const] in {
+  def prefetchi : X86Builtin<"void(void const *, unsigned int)">;
+}
+
+let Features = "cmpccxadd", Attributes = [NoThrow] in {
+  def cmpccxadd32 : X86Builtin<"signed int(void *, signed int, signed int, _Constant int)">;
+  def cmpccxadd64 : X86Builtin<"signed long long int(signed long long int *, signed long long int, signed long long int, _Constant int)">;
+}
+
+let Features = "raoint", Attributes = [NoThrow] in {
+  def aadd64 : X86Builtin<"void(void *, signed long long int)">;
+  def aand64 : X86Builtin<"void(void *, signed long long int)">;
+  def aor64 : X86Builtin<"void(void *, signed long long int)">;
+  def axor64 : X86Builtin<"void(void *, signed long long int)">;
+}
+
+let Features = "movrs", Attributes = [NoThrow] in {
+  def movrsqi : X86Builtin<"signed char(void const *)">;
+  def movrshi : X86Builtin<"signed short(void const *)">;
+  def movrssi : X86Builtin<"signed int(void const *)">;
+  def movrsdi : X86Builtin<"signed long long int(void const *)">;
+}
+
+let Features = "movrs,avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vmovrsb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char const *>)">;
+}
+
+let Features = "movrs,avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vmovrsb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char const *>)">;
+}
+
+let Features = "movrs,avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vmovrsb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char const *>)">;
+}
+
+let Features = "movrs,avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vmovrsd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int const *>)">;
+}
+
+let Features = "movrs,avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vmovrsd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int const *>)">;
+}
+
+let Features = "movrs,avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vmovrsd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int const *>)">;
+}
+
+let Features = "movrs,avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vmovrsq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int const *>)">;
+}
+
+let Features = "movrs,avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vmovrsq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int const *>)">;
+}
+
+let Features = "movrs,avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vmovrsq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int const *>)">;
+}
+
+let Features = "movrs,avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+  def vmovrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short const *>)">;
+}
+
+let Features = "movrs,avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
+  def vmovrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short const *>)">;
+}
+
+let Features = "movrs,avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
+  def vmovrsw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short const *>)">;
+}
diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt
index 76ac3367e23a66..1ccc73892fe6e2 100644
--- a/clang/include/clang/Basic/CMakeLists.txt
+++ b/clang/include/clang/Basic/CMakeLists.txt
@@ -64,6 +64,10 @@ clang_tablegen(BuiltinsX86.inc -gen-clang-builtins
   SOURCE BuiltinsX86.td
   TARGET ClangBuiltinsX86)
 
+clang_tablegen(BuiltinsX86_64.inc -gen-clang-builtins
+  SOURCE BuiltinsX86_64.td
+  TARGET ClangBuiltinsX86_64)
+
 # ARM NEON and MVE
 clang_tablegen(arm_neon.inc -gen-arm-neon-sema
   SOURCE arm_neon.td
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 556332dd4b217b..914be3691ee812 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -128,7 +128,7 @@ namespace clang {
     FirstX86_64Builtin,
     LastX86CommonBuiltin = FirstX86_64Builtin - 1,
 #define BUILTIN(ID, TYPE, ATTRS) BI##ID,
-#include "clang/Basic/BuiltinsX86_64.def"
+#include "clang/Basic/BuiltinsX86_64.inc"
     LastTSBuiltin
   };
   }
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 7e5a5c78aa6b58..d2d92fb864c310 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -38,7 +38,7 @@ static constexpr Builtin::Info BuiltinInfoX86[] = {
   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE)         \
   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS},
-#include "clang/Basic/BuiltinsX86_64.def"
+#include "clang/Basic/BuiltinsX86_64.inc"
 };
 
 static const char *const GCCRegNames[] = {

>From 2f61189324a0c5db2238ae2e0f3a6f0b317ad0d2 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc at gmail.com>
Date: Sat, 21 Dec 2024 09:26:24 +0000
Subject: [PATCH 03/10] Factor common code for quoting a builtin name

This shows up in several places in order to match the quoting of other
uses of the same diagnostic. Handling it centrally simplifies the code
and reduces changes if the storage for builtin names changes.

This refactoring is extracted out of #120534 as requested in code
review.
---
 clang/include/clang/Basic/Builtins.h     |  3 +++
 clang/lib/AST/ByteCode/InterpBuiltin.cpp |  4 ++--
 clang/lib/AST/ExprConstant.cpp           | 15 +++++++--------
 clang/lib/Basic/Builtins.cpp             |  4 ++++
 4 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/clang/include/clang/Basic/Builtins.h b/clang/include/clang/Basic/Builtins.h
index e27d8ccce73664..63559d977ce6b6 100644
--- a/clang/include/clang/Basic/Builtins.h
+++ b/clang/include/clang/Basic/Builtins.h
@@ -102,6 +102,9 @@ class Context {
   /// e.g. "__builtin_abs".
   llvm::StringRef getName(unsigned ID) const { return getRecord(ID).Name; }
 
+  /// Return a quoted name for the specified builtin for use in diagnostics.
+  std::string getQuotedName(unsigned ID) const;
+
   /// Get the type descriptor string for the specified builtin.
   const char *getTypeString(unsigned ID) const { return getRecord(ID).Type; }
 
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 2ae91feb2d9e8e..ed18e1b24c323e 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -154,7 +154,7 @@ static void diagnoseNonConstexprBuiltin(InterpState &S, CodePtr OpPC,
   if (S.getLangOpts().CPlusPlus11)
     S.CCEDiag(Loc, diag::note_constexpr_invalid_function)
         << /*isConstexpr=*/0 << /*isConstructor=*/0
-        << ("'" + S.getASTContext().BuiltinInfo.getName(ID) + "'").str();
+        << S.getASTContext().BuiltinInfo.getQuotedName(ID);
   else
     S.CCEDiag(Loc, diag::note_invalid_subexpr_in_const_expr);
 }
@@ -1948,7 +1948,7 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
                   !isOneByteCharacterType(PtrB.getType()))) {
     S.FFDiag(S.Current->getSource(OpPC),
              diag::note_constexpr_memcmp_unsupported)
-        << ("'" + ASTCtx.BuiltinInfo.getName(ID) + "'").str() << PtrA.getType()
+        << ASTCtx.BuiltinInfo.getQuotedName(ID) << PtrA.getType()
         << PtrB.getType();
     return false;
   }
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 89c515e6392764..c667fddfbf8120 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -9858,7 +9858,7 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     if (Info.getLangOpts().CPlusPlus11)
       Info.CCEDiag(E, diag::note_constexpr_invalid_function)
           << /*isConstexpr*/ 0 << /*isConstructor*/ 0
-          << ("'" + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'").str();
+          << Info.Ctx.BuiltinInfo.getQuotedName(BuiltinOp);
     else
       Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
     [[fallthrough]];
@@ -9903,8 +9903,7 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     // FIXME: We can compare the bytes in the correct order.
     if (IsRawByte && !isOneByteCharacterType(CharTy)) {
       Info.FFDiag(E, diag::note_constexpr_memchr_unsupported)
-          << ("'" + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'").str()
-          << CharTy;
+          << Info.Ctx.BuiltinInfo.getQuotedName(BuiltinOp) << CharTy;
       return false;
     }
     // Figure out what value we're actually looking for (after converting to
@@ -9966,7 +9965,7 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     if (Info.getLangOpts().CPlusPlus11)
       Info.CCEDiag(E, diag::note_constexpr_invalid_function)
           << /*isConstexpr*/ 0 << /*isConstructor*/ 0
-          << ("'" + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'").str();
+          << Info.Ctx.BuiltinInfo.getQuotedName(BuiltinOp);
     else
       Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
     [[fallthrough]];
@@ -13241,7 +13240,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     if (Info.getLangOpts().CPlusPlus11)
       Info.CCEDiag(E, diag::note_constexpr_invalid_function)
           << /*isConstexpr*/ 0 << /*isConstructor*/ 0
-          << ("'" + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'").str();
+          << Info.Ctx.BuiltinInfo.getQuotedName(BuiltinOp);
     else
       Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
     [[fallthrough]];
@@ -13266,7 +13265,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     if (Info.getLangOpts().CPlusPlus11)
       Info.CCEDiag(E, diag::note_constexpr_invalid_function)
           << /*isConstexpr*/ 0 << /*isConstructor*/ 0
-          << ("'" + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'").str();
+          << Info.Ctx.BuiltinInfo.getQuotedName(BuiltinOp);
     else
       Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
     [[fallthrough]];
@@ -13321,8 +13320,8 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
         !(isOneByteCharacterType(CharTy1) && isOneByteCharacterType(CharTy2))) {
       // FIXME: Consider using our bit_cast implementation to support this.
       Info.FFDiag(E, diag::note_constexpr_memcmp_unsupported)
-          << ("'" + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'").str()
-          << CharTy1 << CharTy2;
+          << Info.Ctx.BuiltinInfo.getQuotedName(BuiltinOp) << CharTy1
+          << CharTy2;
       return false;
     }
 
diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp
index 8dd1888db29883..588183788de322 100644
--- a/clang/lib/Basic/Builtins.cpp
+++ b/clang/lib/Basic/Builtins.cpp
@@ -163,6 +163,10 @@ void Builtin::Context::initializeBuiltins(IdentifierTable &Table,
   }
 }
 
+std::string Builtin::Context::getQuotedName(unsigned ID) const {
+  return (llvm::Twine("'") + getName(ID) + "'").str();
+}
+
 unsigned Builtin::Context::getRequiredVectorWidth(unsigned ID) const {
   const char *WidthPos = ::strchr(getRecord(ID).Attributes, 'V');
   if (!WidthPos)

>From e93952f8dcd2152f7c8e28ff7a7315eb2098aa87 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc at gmail.com>
Date: Sat, 14 Dec 2024 09:09:47 +0000
Subject: [PATCH 04/10] Reapply "Switch builtin strings to use string tables"
 (#118734)

This reverts commit ca79ff07d8ae7a0c2531bfdb1cb623e25e5bd486.

It also updates the original PR to use the newly added `StringTable`
abstraction for string tables, and simplifies the construction to build
the string table and info arrays separately. This should reduce any
`constexpr` compile time memory or CPU cost of the original PR while
significantly improving the APIs throughout.
---
 clang/include/clang/Basic/Builtins.h      | 197 ++++++++++++++++++----
 clang/include/clang/Basic/BuiltinsPPC.def |   1 +
 clang/include/clang/Basic/TargetInfo.h    |  11 +-
 clang/lib/Basic/Builtins.cpp              | 117 ++++++++-----
 clang/lib/Basic/Targets/AArch64.cpp       |  61 ++++---
 clang/lib/Basic/Targets/AArch64.h         |   3 +-
 clang/lib/Basic/Targets/AMDGPU.cpp        |  26 ++-
 clang/lib/Basic/Targets/AMDGPU.h          |   3 +-
 clang/lib/Basic/Targets/ARC.h             |   5 +-
 clang/lib/Basic/Targets/ARM.cpp           |  48 +++---
 clang/lib/Basic/Targets/ARM.h             |   3 +-
 clang/lib/Basic/Targets/AVR.h             |   5 +-
 clang/lib/Basic/Targets/BPF.cpp           |  22 ++-
 clang/lib/Basic/Targets/BPF.h             |   3 +-
 clang/lib/Basic/Targets/CSKY.cpp          |   4 -
 clang/lib/Basic/Targets/CSKY.h            |   5 +-
 clang/lib/Basic/Targets/DirectX.h         |   5 +-
 clang/lib/Basic/Targets/Hexagon.cpp       |  29 ++--
 clang/lib/Basic/Targets/Hexagon.h         |   3 +-
 clang/lib/Basic/Targets/Lanai.h           |   5 +-
 clang/lib/Basic/Targets/LoongArch.cpp     |  26 ++-
 clang/lib/Basic/Targets/LoongArch.h       |   3 +-
 clang/lib/Basic/Targets/M68k.cpp          |   5 +-
 clang/lib/Basic/Targets/M68k.h            |   3 +-
 clang/lib/Basic/Targets/MSP430.h          |   5 +-
 clang/lib/Basic/Targets/Mips.cpp          |  25 ++-
 clang/lib/Basic/Targets/Mips.h            |   3 +-
 clang/lib/Basic/Targets/NVPTX.cpp         |  29 ++--
 clang/lib/Basic/Targets/NVPTX.h           |   3 +-
 clang/lib/Basic/Targets/PNaCl.h           |   5 +-
 clang/lib/Basic/Targets/PPC.cpp           |  29 ++--
 clang/lib/Basic/Targets/PPC.h             |   3 +-
 clang/lib/Basic/Targets/RISCV.cpp         |  35 ++--
 clang/lib/Basic/Targets/RISCV.h           |   3 +-
 clang/lib/Basic/Targets/SPIR.cpp          |   5 +-
 clang/lib/Basic/Targets/SPIR.h            |   8 +-
 clang/lib/Basic/Targets/Sparc.h           |   5 +-
 clang/lib/Basic/Targets/SystemZ.cpp       |  26 ++-
 clang/lib/Basic/Targets/SystemZ.h         |   3 +-
 clang/lib/Basic/Targets/TCE.h             |   5 +-
 clang/lib/Basic/Targets/VE.cpp            |  22 ++-
 clang/lib/Basic/Targets/VE.h              |   3 +-
 clang/lib/Basic/Targets/WebAssembly.cpp   |  29 ++--
 clang/lib/Basic/Targets/WebAssembly.h     |   3 +-
 clang/lib/Basic/Targets/X86.cpp           |  56 +++---
 clang/lib/Basic/Targets/X86.h             |   6 +-
 clang/lib/Basic/Targets/XCore.cpp         |  25 ++-
 clang/lib/Basic/Targets/XCore.h           |   3 +-
 48 files changed, 633 insertions(+), 299 deletions(-)

diff --git a/clang/include/clang/Basic/Builtins.h b/clang/include/clang/Basic/Builtins.h
index 63559d977ce6b6..8c6b604b0bfe0f 100644
--- a/clang/include/clang/Basic/Builtins.h
+++ b/clang/include/clang/Basic/Builtins.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringTable.h"
 #include <cstring>
 
 // VC++ defines 'alloca' as an object-like macro, which interferes with our
@@ -55,6 +56,7 @@ struct HeaderDesc {
 #undef HEADER
   } ID;
 
+  constexpr HeaderDesc() : ID() {}
   constexpr HeaderDesc(HeaderID ID) : ID(ID) {}
 
   const char *getName() const;
@@ -68,14 +70,132 @@ enum ID {
   FirstTSBuiltin
 };
 
+// The info used to represent each builtin.
 struct Info {
-  llvm::StringLiteral Name;
-  const char *Type, *Attributes;
-  const char *Features;
+  // Rather than store pointers to the string literals describing these four
+  // aspects of builtins, we store offsets into a common string table.
+  struct StrOffsets {
+    llvm::StringTable::Offset Name;
+    llvm::StringTable::Offset Type;
+    llvm::StringTable::Offset Attributes;
+    llvm::StringTable::Offset Features;
+  } Offsets;
+
   HeaderDesc Header;
   LanguageID Langs;
 };
 
+// A constexpr function to construct an infos array from X-macros.
+//
+// The input array uses the same data structure, but the offsets are actually
+// _lengths_ when input. This is all we can compute from the X-macro approach to
+// builtins. This function will convert these lengths into actual offsets to a
+// string table built up through sequentially appending strings with the given
+// lengths.
+template <size_t N>
+static constexpr std::array<Info, N> MakeInfos(std::array<Info, N> Infos) {
+  // Translate lengths to offsets. We start past the initial empty string at
+  // offset zero.
+  unsigned Offset = 1;
+  for (Info &I : Infos) {
+    Info::StrOffsets NewOffsets = {};
+    NewOffsets.Name = Offset;
+    Offset += I.Offsets.Name.value();
+    NewOffsets.Type = Offset;
+    Offset += I.Offsets.Type.value();
+    NewOffsets.Attributes = Offset;
+    Offset += I.Offsets.Attributes.value();
+    NewOffsets.Features = Offset;
+    Offset += I.Offsets.Features.value();
+    I.Offsets = NewOffsets;
+  }
+  return Infos;
+}
+
+// A detail macro used below to emit a string literal that, after string literal
+// concatenation, ends up triggering the `-Woverlength-strings` warning. While
+// the warning is useful in general to catch accidentally excessive strings,
+// here we are creating them intentionally.
+//
+// This relies on a subtle aspect of `_Pragma`: that the *diagnostic* ones don't
+// turn into actual tokens that would disrupt string literal concatenation.
+#ifdef __clang__
+#define CLANG_BUILTIN_DETAIL_STR_TABLE(S)                                      \
+  _Pragma("clang diagnostic push")                                             \
+      _Pragma("clang diagnostic ignored \"-Woverlength-strings\"")             \
+          S _Pragma("clang diagnostic pop")
+#else
+#define CLANG_BUILTIN_DETAIL_STR_TABLE(S) S
+#endif
+
+// We require string tables to start with an empty string so that a `0` offset
+// can always be used to refer to an empty string. To satisfy that when building
+// string tables with X-macros, we use this start macro prior to expanding the
+// X-macros.
+#define CLANG_BUILTIN_STR_TABLE_START CLANG_BUILTIN_DETAIL_STR_TABLE("\0")
+
+// A macro that can be used with `Builtins.def` and similar files as an X-macro
+// to add the string arguments to a builtin string table. This is typically the
+// target for the `BUILTIN`, `LANGBUILTIN`, or `LIBBUILTIN` macros in those
+// files.
+#define CLANG_BUILTIN_STR_TABLE(ID, TYPE, ATTRS)                               \
+  CLANG_BUILTIN_DETAIL_STR_TABLE(#ID "\0" TYPE "\0" ATTRS "\0" /*FEATURE*/ "\0")
+
+// A macro that can be used with target builtin `.def` and `.inc` files as an
+// X-macro to add the string arguments to a builtin string table. this is
+// typically the target for the `TARGET_BUILTIN` macro.
+#define CLANG_TARGET_BUILTIN_STR_TABLE(ID, TYPE, ATTRS, FEATURE)               \
+  CLANG_BUILTIN_DETAIL_STR_TABLE(#ID "\0" TYPE "\0" ATTRS "\0" FEATURE "\0")
+
+// A macro that can be used with target builtin `.def` and `.inc` files as an
+// X-macro to add the string arguments to a builtin string table. this is
+// typically the target for the `TARGET_HEADER_BUILTIN` macro. We can't delegate
+// to `TARGET_BUILTIN` because the `FEATURE` string changes position.
+#define CLANG_TARGET_HEADER_BUILTIN_STR_TABLE(ID, TYPE, ATTRS, HEADER, LANGS,  \
+                                              FEATURE)                         \
+  CLANG_BUILTIN_DETAIL_STR_TABLE(#ID "\0" TYPE "\0" ATTRS "\0" FEATURE "\0")
+
+// A detail macro used internally to compute the desired string table
+// `StrOffsets` struct for arguments to `MakeInfos`.
+#define CLANG_BUILTIN_DETAIL_STR_OFFSETS(ID, TYPE, ATTRS)                      \
+  Builtin::Info::StrOffsets {                                                  \
+    sizeof(#ID), sizeof(TYPE), sizeof(ATTRS), sizeof("")                       \
+  }
+
+// A detail macro used internally to compute the desired string table
+// `StrOffsets` struct for arguments to `Storage::Make`.
+#define CLANG_TARGET_BUILTIN_DETAIL_STR_OFFSETS(ID, TYPE, ATTRS, FEATURE)      \
+  Builtin::Info::StrOffsets {                                                  \
+    sizeof(#ID), sizeof(TYPE), sizeof(ATTRS), sizeof(FEATURE)                  \
+  }
+
+// A set of macros that can be used with builtin `.def' files as an X-macro to
+// create an `Info` struct for a particular builtin. It both computes the
+// `StrOffsets` value for the string table (the lengths here, translated to
+// offsets by the `MakeInfos` function), and the other metadata for each
+// builtin.
+//
+// There is a corresponding macro for each of `BUILTIN`, `LANGBUILTIN`,
+// `LIBBUILTIN`, `TARGET_BUILTIN`, and `TARGET_HEADER_BUILTIN`.
+#define CLANG_BUILTIN_ENTRY(ID, TYPE, ATTRS)                                   \
+  Builtin::Info{CLANG_BUILTIN_DETAIL_STR_OFFSETS(ID, TYPE, ATTRS),             \
+                HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define CLANG_LANGBUILTIN_ENTRY(ID, TYPE, ATTRS, LANG)                         \
+  Builtin::Info{CLANG_BUILTIN_DETAIL_STR_OFFSETS(ID, TYPE, ATTRS),             \
+                HeaderDesc::NO_HEADER, LANG},
+#define CLANG_LIBBUILTIN_ENTRY(ID, TYPE, ATTRS, HEADER, LANG)                  \
+  Builtin::Info{CLANG_BUILTIN_DETAIL_STR_OFFSETS(ID, TYPE, ATTRS),             \
+                HeaderDesc::HEADER, LANG},
+#define CLANG_TARGET_BUILTIN_ENTRY(ID, TYPE, ATTRS, FEATURE)                   \
+  Builtin::Info{                                                               \
+      CLANG_TARGET_BUILTIN_DETAIL_STR_OFFSETS(ID, TYPE, ATTRS, FEATURE),       \
+      HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define CLANG_TARGET_HEADER_BUILTIN_ENTRY(ID, TYPE, ATTRS, HEADER, LANG,       \
+                                          FEATURE)                             \
+  Builtin::Info{                                                               \
+      CLANG_TARGET_BUILTIN_DETAIL_STR_OFFSETS(ID, TYPE, ATTRS, FEATURE),       \
+      HeaderDesc::HEADER, LANG},
+
 /// Holds information about both target-independent and
 /// target-specific builtins, allowing easy queries by clients.
 ///
@@ -83,8 +203,11 @@ struct Info {
 /// AuxTSRecords. Their IDs are shifted up by TSRecords.size() and need to
 /// be translated back with getAuxBuiltinID() before use.
 class Context {
-  llvm::ArrayRef<Info> TSRecords;
-  llvm::ArrayRef<Info> AuxTSRecords;
+  const llvm::StringTable *TSStrTable = nullptr;
+  const llvm::StringTable *AuxTSStrTable = nullptr;
+
+  llvm::ArrayRef<Info> TSInfos;
+  llvm::ArrayRef<Info> AuxTSInfos;
 
 public:
   Context() = default;
@@ -100,13 +223,16 @@ class Context {
 
   /// Return the identifier name for the specified builtin,
   /// e.g. "__builtin_abs".
-  llvm::StringRef getName(unsigned ID) const { return getRecord(ID).Name; }
+  llvm::StringRef getName(unsigned ID) const;
 
   /// Return a quoted name for the specified builtin for use in diagnostics.
   std::string getQuotedName(unsigned ID) const;
 
   /// Get the type descriptor string for the specified builtin.
-  const char *getTypeString(unsigned ID) const { return getRecord(ID).Type; }
+  const char *getTypeString(unsigned ID) const;
+
+  /// Get the attributes descriptor string for the specified builtin.
+  const char *getAttributesString(unsigned ID) const;
 
   /// Return true if this function is a target-specific builtin.
   bool isTSBuiltin(unsigned ID) const {
@@ -115,40 +241,40 @@ class Context {
 
   /// Return true if this function has no side effects.
   bool isPure(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'U') != nullptr;
+    return strchr(getAttributesString(ID), 'U') != nullptr;
   }
 
   /// Return true if this function has no side effects and doesn't
   /// read memory.
   bool isConst(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'c') != nullptr;
+    return strchr(getAttributesString(ID), 'c') != nullptr;
   }
 
   /// Return true if we know this builtin never throws an exception.
   bool isNoThrow(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'n') != nullptr;
+    return strchr(getAttributesString(ID), 'n') != nullptr;
   }
 
   /// Return true if we know this builtin never returns.
   bool isNoReturn(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'r') != nullptr;
+    return strchr(getAttributesString(ID), 'r') != nullptr;
   }
 
   /// Return true if we know this builtin can return twice.
   bool isReturnsTwice(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'j') != nullptr;
+    return strchr(getAttributesString(ID), 'j') != nullptr;
   }
 
   /// Returns true if this builtin does not perform the side-effects
   /// of its arguments.
   bool isUnevaluated(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'u') != nullptr;
+    return strchr(getAttributesString(ID), 'u') != nullptr;
   }
 
   /// Return true if this is a builtin for a libc/libm function,
   /// with a "__builtin_" prefix (e.g. __builtin_abs).
   bool isLibFunction(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'F') != nullptr;
+    return strchr(getAttributesString(ID), 'F') != nullptr;
   }
 
   /// Determines whether this builtin is a predefined libc/libm
@@ -159,21 +285,21 @@ class Context {
   /// they do not, but they are recognized as builtins once we see
   /// a declaration.
   bool isPredefinedLibFunction(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'f') != nullptr;
+    return strchr(getAttributesString(ID), 'f') != nullptr;
   }
 
   /// Returns true if this builtin requires appropriate header in other
   /// compilers. In Clang it will work even without including it, but we can emit
   /// a warning about missing header.
   bool isHeaderDependentFunction(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'h') != nullptr;
+    return strchr(getAttributesString(ID), 'h') != nullptr;
   }
 
   /// Determines whether this builtin is a predefined compiler-rt/libgcc
   /// function, such as "__clear_cache", where we know the signature a
   /// priori.
   bool isPredefinedRuntimeFunction(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'i') != nullptr;
+    return strchr(getAttributesString(ID), 'i') != nullptr;
   }
 
   /// Determines whether this builtin is a C++ standard library function
@@ -181,7 +307,7 @@ class Context {
   /// specialization, where the signature is determined by the standard library
   /// declaration.
   bool isInStdNamespace(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'z') != nullptr;
+    return strchr(getAttributesString(ID), 'z') != nullptr;
   }
 
   /// Determines whether this builtin can have its address taken with no
@@ -195,33 +321,33 @@ class Context {
 
   /// Determines whether this builtin has custom typechecking.
   bool hasCustomTypechecking(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 't') != nullptr;
+    return strchr(getAttributesString(ID), 't') != nullptr;
   }
 
   /// Determines whether a declaration of this builtin should be recognized
   /// even if the type doesn't match the specified signature.
   bool allowTypeMismatch(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'T') != nullptr ||
+    return strchr(getAttributesString(ID), 'T') != nullptr ||
            hasCustomTypechecking(ID);
   }
 
   /// Determines whether this builtin has a result or any arguments which
   /// are pointer types.
   bool hasPtrArgsOrResult(unsigned ID) const {
-    return strchr(getRecord(ID).Type, '*') != nullptr;
+    return strchr(getTypeString(ID), '*') != nullptr;
   }
 
   /// Return true if this builtin has a result or any arguments which are
   /// reference types.
   bool hasReferenceArgsOrResult(unsigned ID) const {
-    return strchr(getRecord(ID).Type, '&') != nullptr ||
-           strchr(getRecord(ID).Type, 'A') != nullptr;
+    return strchr(getTypeString(ID), '&') != nullptr ||
+           strchr(getTypeString(ID), 'A') != nullptr;
   }
 
   /// If this is a library function that comes from a specific
   /// header, retrieve that header name.
   const char *getHeaderName(unsigned ID) const {
-    return getRecord(ID).Header.getName();
+    return getInfo(ID).Header.getName();
   }
 
   /// Determine whether this builtin is like printf in its
@@ -246,27 +372,25 @@ class Context {
   /// Such functions can be const when the MathErrno lang option and FP
   /// exceptions are disabled.
   bool isConstWithoutErrnoAndExceptions(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'e') != nullptr;
+    return strchr(getAttributesString(ID), 'e') != nullptr;
   }
 
   bool isConstWithoutExceptions(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'g') != nullptr;
+    return strchr(getAttributesString(ID), 'g') != nullptr;
   }
 
-  const char *getRequiredFeatures(unsigned ID) const {
-    return getRecord(ID).Features;
-  }
+  const char *getRequiredFeatures(unsigned ID) const;
 
   unsigned getRequiredVectorWidth(unsigned ID) const;
 
   /// Return true if builtin ID belongs to AuxTarget.
   bool isAuxBuiltinID(unsigned ID) const {
-    return ID >= (Builtin::FirstTSBuiltin + TSRecords.size());
+    return ID >= (Builtin::FirstTSBuiltin + TSInfos.size());
   }
 
   /// Return real builtin ID (i.e. ID it would have during compilation
   /// for AuxTarget).
-  unsigned getAuxBuiltinID(unsigned ID) const { return ID - TSRecords.size(); }
+  unsigned getAuxBuiltinID(unsigned ID) const { return ID - TSInfos.size(); }
 
   /// Returns true if this is a libc/libm function without the '__builtin_'
   /// prefix.
@@ -278,16 +402,21 @@ class Context {
 
   /// Return true if this function can be constant evaluated by Clang frontend.
   bool isConstantEvaluated(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'E') != nullptr;
+    return strchr(getAttributesString(ID), 'E') != nullptr;
   }
 
   /// Returns true if this is an immediate (consteval) function
   bool isImmediate(unsigned ID) const {
-    return strchr(getRecord(ID).Attributes, 'G') != nullptr;
+    return strchr(getAttributesString(ID), 'G') != nullptr;
   }
 
 private:
-  const Info &getRecord(unsigned ID) const;
+  std::pair<const llvm::StringTable &, const Info &>
+  getStrTableAndInfo(unsigned ID) const;
+
+  const Info &getInfo(unsigned ID) const {
+    return getStrTableAndInfo(ID).second;
+  }
 
   /// Helper function for isPrintfLike and isScanfLike.
   bool isLike(unsigned ID, unsigned &FormatIdx, bool &HasVAListArg,
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 161df386f00f03..bb7d54bbb793eb 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1138,5 +1138,6 @@ UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2nn, "vW512*VVi15i15i3", true,
 // FIXME: Obviously incomplete.
 
 #undef BUILTIN
+#undef TARGET_BUILTIN
 #undef CUSTOM_BUILTIN
 #undef UNALIASED_CUSTOM_BUILTIN
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 82bd537b242c1c..0b9d18362d0a00 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -16,6 +16,7 @@
 
 #include "clang/Basic/AddressSpaces.h"
 #include "clang/Basic/BitmaskEnum.h"
+#include "clang/Basic/Builtins.h"
 #include "clang/Basic/CFProtectionOptions.h"
 #include "clang/Basic/CodeGenOptions.h"
 #include "clang/Basic/LLVM.h"
@@ -32,6 +33,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/StringTable.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/DataTypes.h"
@@ -1009,10 +1011,11 @@ class TargetInfo : public TransferrableTargetInfo,
   virtual void getTargetDefines(const LangOptions &Opts,
                                 MacroBuilder &Builder) const = 0;
 
-  /// Return information about target-specific builtins for
-  /// the current primary target, and info about which builtins are non-portable
-  /// across the current set of primary and secondary targets.
-  virtual ArrayRef<Builtin::Info> getTargetBuiltins() const = 0;
+  /// Return information about target-specific builtins for the current primary
+  /// target, and info about which builtins are non-portable across the current
+  /// set of primary and secondary targets.
+  virtual std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const = 0;
 
   /// Returns target-specific min and max values VScale_Range.
   virtual std::optional<std::pair<unsigned, unsigned>>
diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp
index 588183788de322..58fd212f9ddf17 100644
--- a/clang/lib/Basic/Builtins.cpp
+++ b/clang/lib/Basic/Builtins.cpp
@@ -29,54 +29,91 @@ const char *HeaderDesc::getName() const {
   llvm_unreachable("Unknown HeaderDesc::HeaderID enum");
 }
 
-static constexpr Builtin::Info BuiltinInfo[] = {
-    {"not a builtin function", nullptr, nullptr, nullptr, HeaderDesc::NO_HEADER,
-     ALL_LANGUAGES},
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define LANGBUILTIN(ID, TYPE, ATTRS, LANGS)                                    \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANGS},
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, LANGS)                             \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, LANGS},
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+        // We inject a non-builtin string into the table.
+        CLANG_BUILTIN_STR_TABLE("not a builtin function", "", "")
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
 #include "clang/Basic/Builtins.inc"
-};
+    ;
+
+static constexpr auto BuiltinInfos =
+    Builtin::MakeInfos<Builtin::FirstTSBuiltin>(
+        {CLANG_BUILTIN_ENTRY("not a builtin function", "", "")
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define LANGBUILTIN CLANG_LANGBUILTIN_ENTRY
+#define LIBBUILTIN CLANG_LIBBUILTIN_ENTRY
+#include "clang/Basic/Builtins.inc"
+        });
 
-const Builtin::Info &Builtin::Context::getRecord(unsigned ID) const {
+std::pair<const llvm::StringTable &, const Builtin::Info &>
+Builtin::Context::getStrTableAndInfo(unsigned ID) const {
   if (ID < Builtin::FirstTSBuiltin)
-    return BuiltinInfo[ID];
-  assert(((ID - Builtin::FirstTSBuiltin) <
-          (TSRecords.size() + AuxTSRecords.size())) &&
-         "Invalid builtin ID!");
+    return {BuiltinStrings, BuiltinInfos[ID]};
+  assert(
+      ((ID - Builtin::FirstTSBuiltin) < (TSInfos.size() + AuxTSInfos.size())) &&
+      "Invalid builtin ID!");
   if (isAuxBuiltinID(ID))
-    return AuxTSRecords[getAuxBuiltinID(ID) - Builtin::FirstTSBuiltin];
-  return TSRecords[ID - Builtin::FirstTSBuiltin];
+    return {*AuxTSStrTable,
+            AuxTSInfos[getAuxBuiltinID(ID) - Builtin::FirstTSBuiltin]};
+  return {*TSStrTable, TSInfos[ID - Builtin::FirstTSBuiltin]};
+}
+
+/// Return the identifier name for the specified builtin,
+/// e.g. "__builtin_abs".
+llvm::StringRef Builtin::Context::getName(unsigned ID) const {
+  const auto &[StrTable, I] = getStrTableAndInfo(ID);
+  return StrTable[I.Offsets.Name];
+}
+
+const char *Builtin::Context::getTypeString(unsigned ID) const {
+  const auto &[StrTable, I] = getStrTableAndInfo(ID);
+  return StrTable[I.Offsets.Type].data();
+}
+
+const char *Builtin::Context::getAttributesString(unsigned ID) const {
+  const auto &[StrTable, I] = getStrTableAndInfo(ID);
+  return StrTable[I.Offsets.Attributes].data();
+}
+
+const char *Builtin::Context::getRequiredFeatures(unsigned ID) const {
+  const auto &[StrTable, I] = getStrTableAndInfo(ID);
+  return StrTable[I.Offsets.Features].data();
 }
 
 void Builtin::Context::InitializeTarget(const TargetInfo &Target,
                                         const TargetInfo *AuxTarget) {
-  assert(TSRecords.empty() && "Already initialized target?");
-  TSRecords = Target.getTargetBuiltins();
-  if (AuxTarget)
-    AuxTSRecords = AuxTarget->getTargetBuiltins();
+  assert(TSStrTable == nullptr && "Already initialized target?");
+  assert(TSInfos.empty() && "Already initialized target?");
+  std::tie(TSStrTable, TSInfos) = Target.getTargetBuiltinStorage();
+  if (AuxTarget) {
+    std::tie(AuxTSStrTable, AuxTSInfos) = AuxTarget->getTargetBuiltinStorage();
+  }
 }
 
 bool Builtin::Context::isBuiltinFunc(llvm::StringRef FuncName) {
   bool InStdNamespace = FuncName.consume_front("std-");
+  const llvm::StringTable &StrTable = BuiltinStrings;
   for (unsigned i = Builtin::NotBuiltin + 1; i != Builtin::FirstTSBuiltin;
        ++i) {
-    if (FuncName == BuiltinInfo[i].Name &&
-        (bool)strchr(BuiltinInfo[i].Attributes, 'z') == InStdNamespace)
-      return strchr(BuiltinInfo[i].Attributes, 'f') != nullptr;
+    const auto &I = BuiltinInfos[i];
+    if (FuncName == StrTable[I.Offsets.Name] &&
+        (bool)strchr(StrTable[I.Offsets.Attributes].data(), 'z') ==
+            InStdNamespace)
+      return strchr(StrTable[I.Offsets.Attributes].data(), 'f') != nullptr;
   }
 
   return false;
 }
 
 /// Is this builtin supported according to the given language options?
-static bool builtinIsSupported(const Builtin::Info &BuiltinInfo,
+static bool builtinIsSupported(const llvm::StringTable &StrTable,
+                               const Builtin::Info &BuiltinInfo,
                                const LangOptions &LangOpts) {
+  auto AttributesStr = StrTable[BuiltinInfo.Offsets.Attributes];
+
   /* Builtins Unsupported */
-  if (LangOpts.NoBuiltin && strchr(BuiltinInfo.Attributes, 'f') != nullptr)
+  if (LangOpts.NoBuiltin && strchr(AttributesStr.data(), 'f') != nullptr)
     return false;
   /* CorBuiltins Unsupported */
   if (!LangOpts.Coroutines && (BuiltinInfo.Langs & COR_LANG))
@@ -123,7 +160,7 @@ static bool builtinIsSupported(const Builtin::Info &BuiltinInfo,
   if (!LangOpts.CPlusPlus && BuiltinInfo.Langs == CXX_LANG)
     return false;
   /* consteval Unsupported */
-  if (!LangOpts.CPlusPlus20 && strchr(BuiltinInfo.Attributes, 'G') != nullptr)
+  if (!LangOpts.CPlusPlus20 && strchr(AttributesStr.data(), 'G') != nullptr)
     return false;
   return true;
 }
@@ -134,20 +171,22 @@ static bool builtinIsSupported(const Builtin::Info &BuiltinInfo,
 void Builtin::Context::initializeBuiltins(IdentifierTable &Table,
                                           const LangOptions& LangOpts) {
   // Step #1: mark all target-independent builtins with their ID's.
-  for (unsigned i = Builtin::NotBuiltin + 1; i != Builtin::FirstTSBuiltin; ++i)
-    if (builtinIsSupported(BuiltinInfo[i], LangOpts)) {
-      Table.get(BuiltinInfo[i].Name).setBuiltinID(i);
+  for (const auto &&[Index, I] :
+       llvm::enumerate(llvm::ArrayRef(BuiltinInfos).drop_front()))
+    if (builtinIsSupported(BuiltinStrings, I, LangOpts)) {
+      Table.get(BuiltinStrings[I.Offsets.Name]).setBuiltinID(Index + 1);
     }
 
   // Step #2: Register target-specific builtins.
-  for (unsigned i = 0, e = TSRecords.size(); i != e; ++i)
-    if (builtinIsSupported(TSRecords[i], LangOpts))
-      Table.get(TSRecords[i].Name).setBuiltinID(i + Builtin::FirstTSBuiltin);
+  for (const auto &&[Index, I] : llvm::enumerate(TSInfos))
+    if (builtinIsSupported(*TSStrTable, I, LangOpts))
+      Table.get((*TSStrTable)[I.Offsets.Name])
+          .setBuiltinID(Index + Builtin::FirstTSBuiltin);
 
   // Step #3: Register target-specific builtins for AuxTarget.
-  for (unsigned i = 0, e = AuxTSRecords.size(); i != e; ++i)
-    Table.get(AuxTSRecords[i].Name)
-        .setBuiltinID(i + Builtin::FirstTSBuiltin + TSRecords.size());
+  for (const auto &&[Index, I] : llvm::enumerate(AuxTSInfos))
+    Table.get((*AuxTSStrTable)[I.Offsets.Name])
+        .setBuiltinID(Index + Builtin::FirstTSBuiltin + TSInfos.size());
 
   // Step #4: Unregister any builtins specified by -fno-builtin-foo.
   for (llvm::StringRef Name : LangOpts.NoBuiltinFuncs) {
@@ -168,7 +207,7 @@ std::string Builtin::Context::getQuotedName(unsigned ID) const {
 }
 
 unsigned Builtin::Context::getRequiredVectorWidth(unsigned ID) const {
-  const char *WidthPos = ::strchr(getRecord(ID).Attributes, 'V');
+  const char *WidthPos = ::strchr(getAttributesString(ID), 'V');
   if (!WidthPos)
     return 0;
 
@@ -191,7 +230,7 @@ bool Builtin::Context::isLike(unsigned ID, unsigned &FormatIdx,
   assert(::toupper(Fmt[0]) == Fmt[1] &&
          "Format string is not in the form \"xX\"");
 
-  const char *Like = ::strpbrk(getRecord(ID).Attributes, Fmt);
+  const char *Like = ::strpbrk(getAttributesString(ID), Fmt);
   if (!Like)
     return false;
 
@@ -218,7 +257,7 @@ bool Builtin::Context::isScanfLike(unsigned ID, unsigned &FormatIdx,
 
 bool Builtin::Context::performsCallback(unsigned ID,
                                         SmallVectorImpl<int> &Encoding) const {
-  const char *CalleePos = ::strchr(getRecord(ID).Attributes, 'C');
+  const char *CalleePos = ::strchr(getAttributesString(ID), 'C');
   if (!CalleePos)
     return false;
 
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 53e102bbe44687..09754e47b08f60 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -26,35 +26,42 @@
 using namespace clang;
 using namespace clang::targets;
 
-static constexpr Builtin::Info BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#include "clang/Basic/BuiltinsNEON.def"
+static constexpr int NumBuiltins =
+    clang::AArch64::LastTSBuiltin - Builtin::FirstTSBuiltin;
 
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
+#include "clang/Basic/BuiltinsNEON.def"
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsSVE.def"
-
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsSME.def"
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
+#define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_STR_TABLE
+#include "clang/Basic/BuiltinsAArch64.def"
+    ;
 
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define LANGBUILTIN(ID, TYPE, ATTRS, LANG)                                     \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE)         \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS},
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+#include "clang/Basic/BuiltinsNEON.def"
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+#include "clang/Basic/BuiltinsSVE.def"
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+#include "clang/Basic/BuiltinsSME.def"
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+#define LANGBUILTIN CLANG_LANGBUILTIN_ENTRY
+#define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsAArch64.def"
-};
+});
 
 void AArch64TargetInfo::setArchFeatures() {
   if (*ArchInfo == llvm::AArch64::ARMV8R) {
@@ -697,9 +704,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
   }
 }
 
-ArrayRef<Builtin::Info> AArch64TargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfo, clang::AArch64::LastTSBuiltin -
-                                         Builtin::FirstTSBuiltin);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+AArch64TargetInfo::getTargetBuiltinStorage() const {
+  return {&BuiltinStrings, BuiltinInfos};
 }
 
 std::optional<std::pair<unsigned, unsigned>>
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 68a8b1ebad8cde..1686d7d2e8e779 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -180,7 +180,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   std::optional<std::pair<unsigned, unsigned>>
   getVScaleRange(const LangOptions &LangOpts) const override;
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 99f8f2944e2796..1b256aafcf9671 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -88,13 +88,21 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
 } // namespace targets
 } // namespace clang
 
-static constexpr Builtin::Info BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+static constexpr int NumBuiltins =
+    clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin;
+
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsAMDGPU.def"
-};
+    ;
+
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+#include "clang/Basic/BuiltinsAMDGPU.def"
+});
 
 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
@@ -266,9 +274,9 @@ void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
                      !isAMDGCN(getTriple()));
 }
 
-ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfo,
-                        clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+AMDGPUTargetInfo::getTargetBuiltinStorage() const {
+  return {&BuiltinStrings, BuiltinInfos};
 }
 
 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index ea4189cdea47da..aac5ae8d9482c0 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -257,7 +257,8 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
                  StringRef CPU,
                  const std::vector<std::string> &FeatureVec) const override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   bool useFP16ConversionIntrinsics() const override { return false; }
 
diff --git a/clang/lib/Basic/Targets/ARC.h b/clang/lib/Basic/Targets/ARC.h
index 7f3d0aa15ab81f..905fdeafec4de0 100644
--- a/clang/lib/Basic/Targets/ARC.h
+++ b/clang/lib/Basic/Targets/ARC.h
@@ -40,7 +40,10 @@ class LLVM_LIBRARY_VISIBILITY ARCTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override { return {}; }
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override {
+    return {nullptr, {}};
+  }
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index 370444057b4298..03c90f745962f7 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -1071,31 +1071,37 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
   }
 }
 
-static constexpr Builtin::Info BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+static constexpr int NumBuiltins =
+    clang::ARM::LastTSBuiltin - Builtin::FirstTSBuiltin;
+
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsNEON.def"
 
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define LANGBUILTIN(ID, TYPE, ATTRS, LANG)                                     \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG},
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE)         \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS},
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
+#define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsARM.def"
-};
+    ;
+
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define LIBBUILTIN CLANG_LIBBUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+#include "clang/Basic/BuiltinsNEON.def"
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define LANGBUILTIN CLANG_LANGBUILTIN_ENTRY
+#define LIBBUILTIN CLANG_LIBBUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+#define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_ENTRY
+#include "clang/Basic/BuiltinsARM.def"
+});
 
-ArrayRef<Builtin::Info> ARMTargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfo,
-                        clang::ARM::LastTSBuiltin - Builtin::FirstTSBuiltin);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+ARMTargetInfo::getTargetBuiltinStorage() const {
+  return {&BuiltinStrings, BuiltinInfos};
 }
 
 bool ARMTargetInfo::isCLZForZeroUndef() const { return false; }
diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h
index 55ecb99d82d8fb..60baf0f4f97122 100644
--- a/clang/lib/Basic/Targets/ARM.h
+++ b/clang/lib/Basic/Targets/ARM.h
@@ -196,7 +196,8 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   bool isCLZForZeroUndef() const override;
   BuiltinVaListKind getBuiltinVaListKind() const override;
diff --git a/clang/lib/Basic/Targets/AVR.h b/clang/lib/Basic/Targets/AVR.h
index df1f8d171efbaa..962f5add183fdb 100644
--- a/clang/lib/Basic/Targets/AVR.h
+++ b/clang/lib/Basic/Targets/AVR.h
@@ -63,7 +63,10 @@ class LLVM_LIBRARY_VISIBILITY AVRTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override { return {}; }
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override {
+    return {nullptr, {}};
+  }
 
   bool allowsLargerPreferedTypeAlignment() const override { return false; }
 
diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp
index f4684765b7ffb3..c5efbda520998d 100644
--- a/clang/lib/Basic/Targets/BPF.cpp
+++ b/clang/lib/Basic/Targets/BPF.cpp
@@ -19,11 +19,19 @@
 using namespace clang;
 using namespace clang::targets;
 
-static constexpr Builtin::Info BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+static constexpr int NumBuiltins =
+    clang::BPF::LastTSBuiltin - Builtin::FirstTSBuiltin;
+
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#include "clang/Basic/BuiltinsBPF.inc"
+    ;
+
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+#define BUILTIN CLANG_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsBPF.inc"
-};
+});
 
 void BPFTargetInfo::getTargetDefines(const LangOptions &Opts,
                                      MacroBuilder &Builder) const {
@@ -81,9 +89,9 @@ void BPFTargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
   Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames));
 }
 
-ArrayRef<Builtin::Info> BPFTargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfo,
-                        clang::BPF::LastTSBuiltin - Builtin::FirstTSBuiltin);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+BPFTargetInfo::getTargetBuiltinStorage() const {
+  return {&BuiltinStrings, BuiltinInfos};
 }
 
 bool BPFTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
diff --git a/clang/lib/Basic/Targets/BPF.h b/clang/lib/Basic/Targets/BPF.h
index 27a4b5f3149702..97aaf35fc523d1 100644
--- a/clang/lib/Basic/Targets/BPF.h
+++ b/clang/lib/Basic/Targets/BPF.h
@@ -58,7 +58,8 @@ class LLVM_LIBRARY_VISIBILITY BPFTargetInfo : public TargetInfo {
   bool handleTargetFeatures(std::vector<std::string> &Features,
                             DiagnosticsEngine &Diags) override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   std::string_view getClobbers() const override { return ""; }
 
diff --git a/clang/lib/Basic/Targets/CSKY.cpp b/clang/lib/Basic/Targets/CSKY.cpp
index c8bf8b9234d243..e698508a2370c9 100644
--- a/clang/lib/Basic/Targets/CSKY.cpp
+++ b/clang/lib/Basic/Targets/CSKY.cpp
@@ -139,10 +139,6 @@ bool CSKYTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
   return true;
 }
 
-ArrayRef<Builtin::Info> CSKYTargetInfo::getTargetBuiltins() const {
-  return ArrayRef<Builtin::Info>();
-}
-
 ArrayRef<const char *> CSKYTargetInfo::getGCCRegNames() const {
   static const char *const GCCRegNames[] = {
       // Integer registers
diff --git a/clang/lib/Basic/Targets/CSKY.h b/clang/lib/Basic/Targets/CSKY.h
index 94d4eeb9a1fff4..7ecc9bc780412f 100644
--- a/clang/lib/Basic/Targets/CSKY.h
+++ b/clang/lib/Basic/Targets/CSKY.h
@@ -73,7 +73,10 @@ class LLVM_LIBRARY_VISIBILITY CSKYTargetInfo : public TargetInfo {
 
   unsigned getMinGlobalAlign(uint64_t, bool HasNonWeakDef) const override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override {
+    return {nullptr, {}};
+  }
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return VoidPtrBuiltinVaList;
diff --git a/clang/lib/Basic/Targets/DirectX.h b/clang/lib/Basic/Targets/DirectX.h
index ab22d1281a4df7..55ea3f877b23f6 100644
--- a/clang/lib/Basic/Targets/DirectX.h
+++ b/clang/lib/Basic/Targets/DirectX.h
@@ -72,7 +72,10 @@ class LLVM_LIBRARY_VISIBILITY DirectXTargetInfo : public TargetInfo {
     return Feature == "directx";
   }
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override { return {}; }
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override {
+    return {nullptr, {}};
+  }
 
   std::string_view getClobbers() const override { return ""; }
 
diff --git a/clang/lib/Basic/Targets/Hexagon.cpp b/clang/lib/Basic/Targets/Hexagon.cpp
index 931327bd8657b0..5e1a7a49772016 100644
--- a/clang/lib/Basic/Targets/Hexagon.cpp
+++ b/clang/lib/Basic/Targets/Hexagon.cpp
@@ -201,15 +201,22 @@ ArrayRef<TargetInfo::GCCRegAlias> HexagonTargetInfo::getGCCRegAliases() const {
   return llvm::ArrayRef(GCCRegAliases);
 }
 
-static constexpr Builtin::Info BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
-  {#ID, TYPE, ATTRS, nullptr, HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+static constexpr int NumBuiltins =
+    clang::Hexagon::LastTSBuiltin - Builtin::FirstTSBuiltin;
+
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsHexagon.def"
-};
+    ;
+
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define LIBBUILTIN CLANG_LIBBUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+#include "clang/Basic/BuiltinsHexagon.def"
+});
 
 bool HexagonTargetInfo::hasFeature(StringRef Feature) const {
   std::string VS = "hvxv" + HVXVersion;
@@ -267,7 +274,7 @@ void HexagonTargetInfo::fillValidCPUList(
     Values.push_back(Suffix.Name);
 }
 
-ArrayRef<Builtin::Info> HexagonTargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfo, clang::Hexagon::LastTSBuiltin -
-                                         Builtin::FirstTSBuiltin);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+HexagonTargetInfo::getTargetBuiltinStorage() const {
+  return {&BuiltinStrings, BuiltinInfos};
 }
diff --git a/clang/lib/Basic/Targets/Hexagon.h b/clang/lib/Basic/Targets/Hexagon.h
index 7f053ab7e48886..b93574aa599f4c 100644
--- a/clang/lib/Basic/Targets/Hexagon.h
+++ b/clang/lib/Basic/Targets/Hexagon.h
@@ -66,7 +66,8 @@ class LLVM_LIBRARY_VISIBILITY HexagonTargetInfo : public TargetInfo {
     BoolWidth = BoolAlign = 8;
   }
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &Info) const override {
diff --git a/clang/lib/Basic/Targets/Lanai.h b/clang/lib/Basic/Targets/Lanai.h
index f7e439c7c9e1cf..e715fa220df7a1 100644
--- a/clang/lib/Basic/Targets/Lanai.h
+++ b/clang/lib/Basic/Targets/Lanai.h
@@ -78,7 +78,10 @@ class LLVM_LIBRARY_VISIBILITY LanaiTargetInfo : public TargetInfo {
     return TargetInfo::VoidPtrBuiltinVaList;
   }
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override { return {}; }
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override {
+    return {nullptr, {}};
+  }
 
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &info) const override {
diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp
index d36186aa9c2fbf..7c0f40f6af3b5f 100644
--- a/clang/lib/Basic/Targets/LoongArch.cpp
+++ b/clang/lib/Basic/Targets/LoongArch.cpp
@@ -270,13 +270,21 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
 }
 
-static constexpr Builtin::Info BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+static constexpr int NumBuiltins =
+    clang::LoongArch::LastTSBuiltin - Builtin::FirstTSBuiltin;
+
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
+#include "clang/Basic/BuiltinsLoongArch.def"
+    ;
+
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsLoongArch.def"
-};
+});
 
 bool LoongArchTargetInfo::initFeatureMap(
     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
@@ -303,9 +311,9 @@ bool LoongArchTargetInfo::hasFeature(StringRef Feature) const {
       .Default(false);
 }
 
-ArrayRef<Builtin::Info> LoongArchTargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfo, clang::LoongArch::LastTSBuiltin -
-                                         Builtin::FirstTSBuiltin);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+LoongArchTargetInfo::getTargetBuiltinStorage() const {
+  return {&BuiltinStrings, BuiltinInfos};
 }
 
 bool LoongArchTargetInfo::handleTargetFeatures(
diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h
index abaa05aa42d438..dee92403dac3a3 100644
--- a/clang/lib/Basic/Targets/LoongArch.h
+++ b/clang/lib/Basic/Targets/LoongArch.h
@@ -70,7 +70,8 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;
diff --git a/clang/lib/Basic/Targets/M68k.cpp b/clang/lib/Basic/Targets/M68k.cpp
index b5b29fd8675630..e2a382653a5c5d 100644
--- a/clang/lib/Basic/Targets/M68k.cpp
+++ b/clang/lib/Basic/Targets/M68k.cpp
@@ -115,9 +115,10 @@ void M68kTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__HAVE_68881__");
 }
 
-ArrayRef<Builtin::Info> M68kTargetInfo::getTargetBuiltins() const {
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+M68kTargetInfo::getTargetBuiltinStorage() const {
   // FIXME: Implement.
-  return {};
+  return {nullptr, {}};
 }
 
 bool M68kTargetInfo::hasFeature(StringRef Feature) const {
diff --git a/clang/lib/Basic/Targets/M68k.h b/clang/lib/Basic/Targets/M68k.h
index b732add77e0340..104cdfd20c1bdc 100644
--- a/clang/lib/Basic/Targets/M68k.h
+++ b/clang/lib/Basic/Targets/M68k.h
@@ -44,7 +44,8 @@ class LLVM_LIBRARY_VISIBILITY M68kTargetInfo : public TargetInfo {
 
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
   bool hasFeature(StringRef Feature) const override;
   ArrayRef<const char *> getGCCRegNames() const override;
   ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;
diff --git a/clang/lib/Basic/Targets/MSP430.h b/clang/lib/Basic/Targets/MSP430.h
index 2266ada25c1dd6..a998ea0143b7f0 100644
--- a/clang/lib/Basic/Targets/MSP430.h
+++ b/clang/lib/Basic/Targets/MSP430.h
@@ -50,9 +50,10 @@ class LLVM_LIBRARY_VISIBILITY MSP430TargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override {
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override {
     // FIXME: Implement.
-    return {};
+    return {nullptr, {}};
   }
 
   bool allowsLargerPreferedTypeAlignment() const override { return false; }
diff --git a/clang/lib/Basic/Targets/Mips.cpp b/clang/lib/Basic/Targets/Mips.cpp
index 174bc9d2ab9967..1edae69dbdd6a2 100644
--- a/clang/lib/Basic/Targets/Mips.cpp
+++ b/clang/lib/Basic/Targets/Mips.cpp
@@ -20,13 +20,20 @@
 using namespace clang;
 using namespace clang::targets;
 
-static constexpr Builtin::Info BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES},
+static constexpr int NumBuiltins =
+    clang::Mips::LastTSBuiltin - Builtin::FirstTSBuiltin;
+
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#include "clang/Basic/BuiltinsMips.def"
+    ;
+
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define LIBBUILTIN CLANG_LIBBUILTIN_ENTRY
 #include "clang/Basic/BuiltinsMips.def"
-};
+});
 
 bool MipsTargetInfo::processorSupportsGPR64() const {
   return llvm::StringSwitch<bool>(CPU)
@@ -223,9 +230,9 @@ bool MipsTargetInfo::hasFeature(StringRef Feature) const {
       .Default(false);
 }
 
-ArrayRef<Builtin::Info> MipsTargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfo,
-                        clang::Mips::LastTSBuiltin - Builtin::FirstTSBuiltin);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+MipsTargetInfo::getTargetBuiltinStorage() const {
+  return {&BuiltinStrings, BuiltinInfos};
 }
 
 unsigned MipsTargetInfo::getUnwindWordWidth() const {
diff --git a/clang/lib/Basic/Targets/Mips.h b/clang/lib/Basic/Targets/Mips.h
index 8acaf56523b218..33a3b39129a923 100644
--- a/clang/lib/Basic/Targets/Mips.h
+++ b/clang/lib/Basic/Targets/Mips.h
@@ -197,7 +197,8 @@ class LLVM_LIBRARY_VISIBILITY MipsTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   bool hasFeature(StringRef Feature) const override;
 
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index dbc3fec3657610..e765ff545b54e0 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -20,15 +20,22 @@
 using namespace clang;
 using namespace clang::targets;
 
-static constexpr Builtin::Info BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+static constexpr int NumBuiltins =
+    clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin;
+
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
+#include "clang/Basic/BuiltinsNVPTX.def"
+    ;
+
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define LIBBUILTIN CLANG_LIBBUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsNVPTX.def"
-};
+});
 
 const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};
 
@@ -295,7 +302,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
   }
 }
 
-ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfo,
-                        clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+NVPTXTargetInfo::getTargetBuiltinStorage() const {
+  return {&BuiltinStrings, BuiltinInfos};
 }
diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h
index d81b89a7f24ac0..bd029e10039e26 100644
--- a/clang/lib/Basic/Targets/NVPTX.h
+++ b/clang/lib/Basic/Targets/NVPTX.h
@@ -74,7 +74,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   bool useFP16ConversionIntrinsics() const override { return false; }
 
diff --git a/clang/lib/Basic/Targets/PNaCl.h b/clang/lib/Basic/Targets/PNaCl.h
index 7e0e10aa362d87..89648f980f4eba 100644
--- a/clang/lib/Basic/Targets/PNaCl.h
+++ b/clang/lib/Basic/Targets/PNaCl.h
@@ -52,7 +52,10 @@ class LLVM_LIBRARY_VISIBILITY PNaClTargetInfo : public TargetInfo {
     return Feature == "pnacl";
   }
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override { return {}; }
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override {
+    return {nullptr, {}};
+  }
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::PNaClABIBuiltinVaList;
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 1448069173b5f4..ab96983c3dc30c 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -19,15 +19,22 @@
 using namespace clang;
 using namespace clang::targets;
 
-static constexpr Builtin::Info BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES},
+static constexpr int NumBuiltins =
+    clang::PPC::LastTSBuiltin - Builtin::FirstTSBuiltin;
+
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsPPC.def"
-};
+    ;
+
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+#define LIBBUILTIN CLANG_LIBBUILTIN_ENTRY
+#include "clang/Basic/BuiltinsPPC.def"
+});
 
 /// handleTargetFeatures - Perform initialization based on the user
 /// configured set of features.
@@ -927,9 +934,9 @@ void PPCTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
     MaxAtomicInlineWidth = 128;
 }
 
-ArrayRef<Builtin::Info> PPCTargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfo,
-                        clang::PPC::LastTSBuiltin - Builtin::FirstTSBuiltin);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+PPCTargetInfo::getTargetBuiltinStorage() const {
+  return {&BuiltinStrings, BuiltinInfos};
 }
 
 bool PPCTargetInfo::validateCpuSupports(StringRef FeatureStr) const {
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 3cd0fcad172939..76f4d152ae5919 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -187,7 +187,8 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
 
   StringRef getABI() const override { return ABI; }
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   bool isCLZForZeroUndef() const override { return false; }
 
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index a541dfedc9b8e1..efa7a91ad03078 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -240,22 +240,31 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
   }
 }
 
-static constexpr Builtin::Info BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+static constexpr int NumBuiltins =
+    clang::RISCV::LastTSBuiltin - Builtin::FirstTSBuiltin;
+
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
+#include "clang/Basic/BuiltinsRISCVVector.def"
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
+#include "clang/Basic/BuiltinsRISCV.inc"
+    ;
+
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsRISCVVector.def"
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsRISCV.inc"
-};
+});
 
-ArrayRef<Builtin::Info> RISCVTargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfo,
-                        clang::RISCV::LastTSBuiltin - Builtin::FirstTSBuiltin);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+RISCVTargetInfo::getTargetBuiltinStorage() const {
+  return {&BuiltinStrings, BuiltinInfos};
 }
 
 bool RISCVTargetInfo::initFeatureMap(
diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h
index 68f10e74ba98c3..c3f62acff2623d 100644
--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@@ -62,7 +62,8 @@ class RISCVTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;
diff --git a/clang/lib/Basic/Targets/SPIR.cpp b/clang/lib/Basic/Targets/SPIR.cpp
index 040303983594f8..fd755ca056691b 100644
--- a/clang/lib/Basic/Targets/SPIR.cpp
+++ b/clang/lib/Basic/Targets/SPIR.cpp
@@ -81,8 +81,9 @@ SPIRV64AMDGCNTargetInfo::convertConstraint(const char *&Constraint) const {
   return AMDGPUTI.convertConstraint(Constraint);
 }
 
-ArrayRef<Builtin::Info> SPIRV64AMDGCNTargetInfo::getTargetBuiltins() const {
-  return AMDGPUTI.getTargetBuiltins();
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+SPIRV64AMDGCNTargetInfo::getTargetBuiltinStorage() const {
+  return AMDGPUTI.getTargetBuiltinStorage();
 }
 
 void SPIRV64AMDGCNTargetInfo::getTargetDefines(const LangOptions &Opts,
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 85e4bd920d8535..b1c5c6ea063545 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -159,7 +159,10 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo {
   // memcpy as per section 3 of the SPIR spec.
   bool useFP16ConversionIntrinsics() const override { return false; }
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override { return {}; }
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override {
+    return {nullptr, {}};
+  }
 
   std::string_view getClobbers() const override { return ""; }
 
@@ -408,7 +411,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
 
   std::string convertConstraint(const char *&Constraint) const override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
diff --git a/clang/lib/Basic/Targets/Sparc.h b/clang/lib/Basic/Targets/Sparc.h
index 9c529a5bc5e7fa..9836f82a2fc54a 100644
--- a/clang/lib/Basic/Targets/Sparc.h
+++ b/clang/lib/Basic/Targets/Sparc.h
@@ -48,9 +48,10 @@ class LLVM_LIBRARY_VISIBILITY SparcTargetInfo : public TargetInfo {
 
   bool hasFeature(StringRef Feature) const override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override {
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override {
     // FIXME: Implement!
-    return {};
+    return {nullptr, {}};
   }
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;
diff --git a/clang/lib/Basic/Targets/SystemZ.cpp b/clang/lib/Basic/Targets/SystemZ.cpp
index 06f08db2eadd47..be84329bf85d41 100644
--- a/clang/lib/Basic/Targets/SystemZ.cpp
+++ b/clang/lib/Basic/Targets/SystemZ.cpp
@@ -20,13 +20,21 @@
 using namespace clang;
 using namespace clang::targets;
 
-static constexpr Builtin::Info BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+static constexpr int NumBuiltins =
+    clang::SystemZ::LastTSBuiltin - Builtin::FirstTSBuiltin;
+
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsSystemZ.def"
-};
+    ;
+
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+#include "clang/Basic/BuiltinsSystemZ.def"
+});
 
 const char *const SystemZTargetInfo::GCCRegNames[] = {
     "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
@@ -170,7 +178,7 @@ void SystemZTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__VEC__", "10304");
 }
 
-ArrayRef<Builtin::Info> SystemZTargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfo, clang::SystemZ::LastTSBuiltin -
-                                         Builtin::FirstTSBuiltin);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+SystemZTargetInfo::getTargetBuiltinStorage() const {
+  return {&BuiltinStrings, BuiltinInfos};
 }
diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index e6405f174f660f..66292c206cbe41 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -99,7 +99,8 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   ArrayRef<const char *> getGCCRegNames() const override;
 
diff --git a/clang/lib/Basic/Targets/TCE.h b/clang/lib/Basic/Targets/TCE.h
index d6280b02f07b25..4f06e013d1dbd4 100644
--- a/clang/lib/Basic/Targets/TCE.h
+++ b/clang/lib/Basic/Targets/TCE.h
@@ -95,7 +95,10 @@ class LLVM_LIBRARY_VISIBILITY TCETargetInfo : public TargetInfo {
 
   bool hasFeature(StringRef Feature) const override { return Feature == "tce"; }
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override { return {}; }
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override {
+    return {nullptr, {}};
+  }
 
   std::string_view getClobbers() const override { return ""; }
 
diff --git a/clang/lib/Basic/Targets/VE.cpp b/clang/lib/Basic/Targets/VE.cpp
index 67cae8faf60522..a955767f46599a 100644
--- a/clang/lib/Basic/Targets/VE.cpp
+++ b/clang/lib/Basic/Targets/VE.cpp
@@ -18,11 +18,19 @@
 using namespace clang;
 using namespace clang::targets;
 
-static constexpr Builtin::Info BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+static constexpr int NumBuiltins =
+    clang::VE::LastTSBuiltin - Builtin::FirstTSBuiltin;
+
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#include "clang/Basic/BuiltinsVE.def"
+    ;
+
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+#define BUILTIN CLANG_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsVE.def"
-};
+});
 
 void VETargetInfo::getTargetDefines(const LangOptions &Opts,
                                     MacroBuilder &Builder) const {
@@ -39,7 +47,7 @@ void VETargetInfo::getTargetDefines(const LangOptions &Opts,
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
 }
 
-ArrayRef<Builtin::Info> VETargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfo,
-                        clang::VE::LastTSBuiltin - Builtin::FirstTSBuiltin);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+VETargetInfo::getTargetBuiltinStorage() const {
+  return {&BuiltinStrings, BuiltinInfos};
 }
diff --git a/clang/lib/Basic/Targets/VE.h b/clang/lib/Basic/Targets/VE.h
index 7e8fdf6096ef23..69621023acff00 100644
--- a/clang/lib/Basic/Targets/VE.h
+++ b/clang/lib/Basic/Targets/VE.h
@@ -55,7 +55,8 @@ class LLVM_LIBRARY_VISIBILITY VETargetInfo : public TargetInfo {
 
   bool hasSjLjLowering() const override { return true; }
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;
diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp
index 7b0fd0c841ba23..4282b1496333ba 100644
--- a/clang/lib/Basic/Targets/WebAssembly.cpp
+++ b/clang/lib/Basic/Targets/WebAssembly.cpp
@@ -20,15 +20,22 @@
 using namespace clang;
 using namespace clang::targets;
 
-static constexpr Builtin::Info BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES},
+static constexpr int NumBuiltins =
+    clang::WebAssembly::LastTSBuiltin - Builtin::FirstTSBuiltin;
+
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
+#include "clang/Basic/BuiltinsWebAssembly.def"
+    ;
+
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+#define LIBBUILTIN CLANG_LIBBUILTIN_ENTRY
 #include "clang/Basic/BuiltinsWebAssembly.def"
-};
+});
 
 static constexpr llvm::StringLiteral ValidCPUNames[] = {
     {"mvp"}, {"bleeding-edge"}, {"generic"}, {"lime1"}};
@@ -360,9 +367,9 @@ bool WebAssemblyTargetInfo::handleTargetFeatures(
   return true;
 }
 
-ArrayRef<Builtin::Info> WebAssemblyTargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfo, clang::WebAssembly::LastTSBuiltin -
-                                         Builtin::FirstTSBuiltin);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+WebAssemblyTargetInfo::getTargetBuiltinStorage() const {
+  return {&BuiltinStrings, BuiltinInfos};
 }
 
 void WebAssemblyTargetInfo::adjust(DiagnosticsEngine &Diags,
diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h
index 0a14da6a277b8e..a67bf5e3733088 100644
--- a/clang/lib/Basic/Targets/WebAssembly.h
+++ b/clang/lib/Basic/Targets/WebAssembly.h
@@ -120,7 +120,8 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo {
 
   bool setCPU(const std::string &Name) final { return isValidCPUName(Name); }
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const final;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const final;
 
   BuiltinVaListKind getBuiltinVaListKind() const final {
     return VoidPtrBuiltinVaList;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index d2d92fb864c310..4ed8bcec94a5be 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -23,23 +23,37 @@
 namespace clang {
 namespace targets {
 
-static constexpr Builtin::Info BuiltinInfoX86[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE)         \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS},
+// The x86-32 builtins are a subset and prefix of the x86-64 builtins.
+static constexpr int NumX86Builtins =
+    X86::LastX86CommonBuiltin - Builtin::FirstTSBuiltin + 1;
+static constexpr int NumX86_64Builtins =
+    X86::LastTSBuiltin - Builtin::FirstTSBuiltin;
+static_assert(NumX86Builtins < NumX86_64Builtins);
+
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
+#define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsX86.inc"
 
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE)         \
-  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS},
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
+#define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsX86_64.inc"
-};
+    ;
+
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumX86_64Builtins>({
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+#define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_ENTRY
+#include "clang/Basic/BuiltinsX86.inc"
+
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+#define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_ENTRY
+#include "clang/Basic/BuiltinsX86_64.inc"
+});
 
 static const char *const GCCRegNames[] = {
     "ax",    "dx",    "cx",    "bx",    "si",      "di",    "bp",    "sp",
@@ -1856,12 +1870,14 @@ ArrayRef<TargetInfo::AddlRegName> X86TargetInfo::getGCCAddlRegNames() const {
   return llvm::ArrayRef(AddlRegNames);
 }
 
-ArrayRef<Builtin::Info> X86_32TargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfoX86, clang::X86::LastX86CommonBuiltin -
-                                            Builtin::FirstTSBuiltin + 1);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+X86_32TargetInfo::getTargetBuiltinStorage() const {
+  // Only use the relevant prefix of the infos, the string table base is common.
+  return {&BuiltinStrings,
+          llvm::ArrayRef(BuiltinInfos).take_front(NumX86Builtins)};
 }
 
-ArrayRef<Builtin::Info> X86_64TargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfoX86,
-                        X86::LastTSBuiltin - Builtin::FirstTSBuiltin);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+X86_64TargetInfo::getTargetBuiltinStorage() const {
+  return {&BuiltinStrings, BuiltinInfos};
 }
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 3ed36c8fa724b5..5a2a546b1038bb 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -508,7 +508,8 @@ class LLVM_LIBRARY_VISIBILITY X86_32TargetInfo : public X86TargetInfo {
       MaxAtomicInlineWidth = 64;
   }
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   bool hasBitIntType() const override { return true; }
   size_t getMaxBitIntWidth() const override {
@@ -812,7 +813,8 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo {
       MaxAtomicInlineWidth = 128;
   }
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   bool hasBitIntType() const override { return true; }
   size_t getMaxBitIntWidth() const override {
diff --git a/clang/lib/Basic/Targets/XCore.cpp b/clang/lib/Basic/Targets/XCore.cpp
index fd377bbfb90e16..334e853a48a12a 100644
--- a/clang/lib/Basic/Targets/XCore.cpp
+++ b/clang/lib/Basic/Targets/XCore.cpp
@@ -18,13 +18,20 @@
 using namespace clang;
 using namespace clang::targets;
 
-static constexpr Builtin::Info BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES},
+static constexpr int NumBuiltins =
+    XCore::LastTSBuiltin - Builtin::FirstTSBuiltin;
+
+static constexpr llvm::StringTable BuiltinStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#include "clang/Basic/BuiltinsXCore.def"
+    ;
+
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define LIBBUILTIN CLANG_LIBBUILTIN_ENTRY
 #include "clang/Basic/BuiltinsXCore.def"
-};
+});
 
 void XCoreTargetInfo::getTargetDefines(const LangOptions &Opts,
                                        MacroBuilder &Builder) const {
@@ -32,7 +39,7 @@ void XCoreTargetInfo::getTargetDefines(const LangOptions &Opts,
   Builder.defineMacro("__XS1B__");
 }
 
-ArrayRef<Builtin::Info> XCoreTargetInfo::getTargetBuiltins() const {
-  return llvm::ArrayRef(BuiltinInfo,
-                        clang::XCore::LastTSBuiltin - Builtin::FirstTSBuiltin);
+std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+XCoreTargetInfo::getTargetBuiltinStorage() const {
+  return {&BuiltinStrings, BuiltinInfos};
 }
diff --git a/clang/lib/Basic/Targets/XCore.h b/clang/lib/Basic/Targets/XCore.h
index 84fd59d1a71e49..3f31095fcef297 100644
--- a/clang/lib/Basic/Targets/XCore.h
+++ b/clang/lib/Basic/Targets/XCore.h
@@ -43,7 +43,8 @@ class LLVM_LIBRARY_VISIBILITY XCoreTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
+  getTargetBuiltinStorage() const override;
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;

>From 50705cd71a95587d558f7623ef4457057dd1d79c Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc at gmail.com>
Date: Sun, 15 Dec 2024 08:15:18 +0000
Subject: [PATCH 05/10] Restructure builtin structures to support sharding

Once using a string table model for builtin strings, these string tables
become extremely large. However, there are often pre-existing reasonable
sharding structures to builtins that will let us divide them up and
expose more efficient ways of representing
a fundamentally sharded structure to their builtins.

This PR establishes a sharded structure in the API design and ensures it
is effective by using it rather than merging tables in AArch64,
LoongArch, RISCV, and X86. This can help reduce the scaling challenges
of the large tables used across various targets including these.

However, just this PR in isolation doesn't help the scaling as much as
it might seem. The largest parts of the largest targets are currently
single shards here.

The other big benefit is that by switching to a sharded architecture, we
can start to introduce different construction or even representations
for the shards. All the different sources and structures driving
builtins make it hard to specialize any of them in a way that improves
scaling, but with a sharded model that should become much more
attainable.
---
 clang/include/clang/Basic/Builtins.h          |  50 ++++---
 .../include/clang/Basic/BuiltinsLoongArch.def |  28 ----
 clang/include/clang/Basic/TargetBuiltins.h    |  12 +-
 clang/include/clang/Basic/TargetInfo.h        |   3 +-
 clang/include/module.modulemap                |   1 -
 clang/lib/Basic/Builtins.cpp                  | 132 +++++++++++-------
 clang/lib/Basic/Targets/AArch64.cpp           |  44 +++++-
 clang/lib/Basic/Targets/AArch64.h             |   3 +-
 clang/lib/Basic/Targets/AMDGPU.cpp            |   6 +-
 clang/lib/Basic/Targets/AMDGPU.h              |   3 +-
 clang/lib/Basic/Targets/ARC.h                 |   5 +-
 clang/lib/Basic/Targets/ARM.cpp               |   6 +-
 clang/lib/Basic/Targets/ARM.h                 |   3 +-
 clang/lib/Basic/Targets/AVR.h                 |   5 +-
 clang/lib/Basic/Targets/BPF.cpp               |   6 +-
 clang/lib/Basic/Targets/BPF.h                 |   3 +-
 clang/lib/Basic/Targets/CSKY.h                |   5 +-
 clang/lib/Basic/Targets/DirectX.h             |   5 +-
 clang/lib/Basic/Targets/Hexagon.cpp           |   6 +-
 clang/lib/Basic/Targets/Hexagon.h             |   3 +-
 clang/lib/Basic/Targets/Lanai.h               |   5 +-
 clang/lib/Basic/Targets/LoongArch.cpp         |  58 ++++++--
 clang/lib/Basic/Targets/LoongArch.h           |   3 +-
 clang/lib/Basic/Targets/M68k.cpp              |   6 +-
 clang/lib/Basic/Targets/M68k.h                |   3 +-
 clang/lib/Basic/Targets/MSP430.h              |   5 +-
 clang/lib/Basic/Targets/Mips.cpp              |   6 +-
 clang/lib/Basic/Targets/Mips.h                |   3 +-
 clang/lib/Basic/Targets/NVPTX.cpp             |   6 +-
 clang/lib/Basic/Targets/NVPTX.h               |   3 +-
 clang/lib/Basic/Targets/PNaCl.h               |   5 +-
 clang/lib/Basic/Targets/PPC.cpp               |   6 +-
 clang/lib/Basic/Targets/PPC.h                 |   3 +-
 clang/lib/Basic/Targets/RISCV.cpp             |  23 ++-
 clang/lib/Basic/Targets/RISCV.h               |   3 +-
 clang/lib/Basic/Targets/SPIR.cpp              |   6 +-
 clang/lib/Basic/Targets/SPIR.h                |   8 +-
 clang/lib/Basic/Targets/Sparc.h               |   5 +-
 clang/lib/Basic/Targets/SystemZ.cpp           |   6 +-
 clang/lib/Basic/Targets/SystemZ.h             |   3 +-
 clang/lib/Basic/Targets/TCE.h                 |   5 +-
 clang/lib/Basic/Targets/VE.cpp                |   5 +-
 clang/lib/Basic/Targets/VE.h                  |   3 +-
 clang/lib/Basic/Targets/WebAssembly.cpp       |   6 +-
 clang/lib/Basic/Targets/WebAssembly.h         |   3 +-
 clang/lib/Basic/Targets/X86.cpp               |  34 +++--
 clang/lib/Basic/Targets/X86.h                 |   6 +-
 clang/lib/Basic/Targets/XCore.cpp             |   6 +-
 clang/lib/Basic/Targets/XCore.h               |   3 +-
 49 files changed, 322 insertions(+), 244 deletions(-)
 delete mode 100644 clang/include/clang/Basic/BuiltinsLoongArch.def

diff --git a/clang/include/clang/Basic/Builtins.h b/clang/include/clang/Basic/Builtins.h
index 8c6b604b0bfe0f..a03442e8e29194 100644
--- a/clang/include/clang/Basic/Builtins.h
+++ b/clang/include/clang/Basic/Builtins.h
@@ -70,7 +70,7 @@ enum ID {
   FirstTSBuiltin
 };
 
-// The info used to represent each builtin.
+/// The info used to represent each builtin.
 struct Info {
   // Rather than store pointers to the string literals describing these four
   // aspects of builtins, we store offsets into a common string table.
@@ -85,13 +85,13 @@ struct Info {
   LanguageID Langs;
 };
 
-// A constexpr function to construct an infos array from X-macros.
-//
-// The input array uses the same data structure, but the offsets are actually
-// _lengths_ when input. This is all we can compute from the X-macro approach to
-// builtins. This function will convert these lengths into actual offsets to a
-// string table built up through sequentially appending strings with the given
-// lengths.
+/// A constexpr function to construct an infos array from X-macros.
+///
+/// The input array uses the same data structure, but the offsets are actually
+/// _lengths_ when input. This is all we can compute from the X-macro approach
+/// to builtins. This function will convert these lengths into actual offsets to
+/// a string table built up through sequentially appending strings with the
+/// given lengths.
 template <size_t N>
 static constexpr std::array<Info, N> MakeInfos(std::array<Info, N> Infos) {
   // Translate lengths to offsets. We start past the initial empty string at
@@ -112,6 +112,16 @@ static constexpr std::array<Info, N> MakeInfos(std::array<Info, N> Infos) {
   return Infos;
 }
 
+/// A shard of a target's builtins string table and info.
+///
+/// Target builtins are sharded across multiple tables due to different
+/// structures, origins, and also to improve the overall scaling by avoiding a
+/// single table across all builtins.
+struct InfosShard {
+  const llvm::StringTable *Strings;
+  llvm::ArrayRef<Info> Infos;
+};
+
 // A detail macro used below to emit a string literal that, after string literal
 // concatenation, ends up triggering the `-Woverlength-strings` warning. While
 // the warning is useful in general to catch accidentally excessive strings,
@@ -203,14 +213,16 @@ static constexpr std::array<Info, N> MakeInfos(std::array<Info, N> Infos) {
 /// AuxTSRecords. Their IDs are shifted up by TSRecords.size() and need to
 /// be translated back with getAuxBuiltinID() before use.
 class Context {
-  const llvm::StringTable *TSStrTable = nullptr;
-  const llvm::StringTable *AuxTSStrTable = nullptr;
+  llvm::SmallVector<InfosShard> BuiltinShards;
+
+  llvm::SmallVector<InfosShard> TargetShards;
+  llvm::SmallVector<InfosShard> AuxTargetShards;
 
-  llvm::ArrayRef<Info> TSInfos;
-  llvm::ArrayRef<Info> AuxTSInfos;
+  unsigned NumTargetBuiltins = 0;
+  unsigned NumAuxTargetBuiltins = 0;
 
 public:
-  Context() = default;
+  Context();
 
   /// Perform target-specific initialization
   /// \param AuxTarget Target info to incorporate builtins from. May be nullptr.
@@ -385,12 +397,12 @@ class Context {
 
   /// Return true if builtin ID belongs to AuxTarget.
   bool isAuxBuiltinID(unsigned ID) const {
-    return ID >= (Builtin::FirstTSBuiltin + TSInfos.size());
+    return ID >= (Builtin::FirstTSBuiltin + NumTargetBuiltins);
   }
 
   /// Return real builtin ID (i.e. ID it would have during compilation
   /// for AuxTarget).
-  unsigned getAuxBuiltinID(unsigned ID) const { return ID - TSInfos.size(); }
+  unsigned getAuxBuiltinID(unsigned ID) const { return ID - NumTargetBuiltins; }
 
   /// Returns true if this is a libc/libm function without the '__builtin_'
   /// prefix.
@@ -411,12 +423,10 @@ class Context {
   }
 
 private:
-  std::pair<const llvm::StringTable &, const Info &>
-  getStrTableAndInfo(unsigned ID) const;
+  std::pair<const InfosShard &, const Info &>
+  getShardAndInfo(unsigned ID) const;
 
-  const Info &getInfo(unsigned ID) const {
-    return getStrTableAndInfo(ID).second;
-  }
+  const Info &getInfo(unsigned ID) const { return getShardAndInfo(ID).second; }
 
   /// Helper function for isPrintfLike and isScanfLike.
   bool isLike(unsigned ID, unsigned &FormatIdx, bool &HasVAListArg,
diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def
deleted file mode 100644
index 95359a3fdc711d..00000000000000
--- a/clang/include/clang/Basic/BuiltinsLoongArch.def
+++ /dev/null
@@ -1,28 +0,0 @@
-//==- BuiltinsLoongArch.def - LoongArch Builtin function database -- C++ -*-==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the LoongArch-specific builtin function database.  Users of
-// this file must define the BUILTIN macro to make use of this information.
-//
-//===----------------------------------------------------------------------===//
-
-#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
-#   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
-#endif
-
-// Definition of LoongArch basic builtins.
-#include "clang/Basic/BuiltinsLoongArchBase.def"
-
-// Definition of LSX builtins.
-#include "clang/Basic/BuiltinsLoongArchLSX.def"
-
-// Definition of LASX builtins.
-#include "clang/Basic/BuiltinsLoongArchLASX.def"
-
-#undef BUILTIN
-#undef TARGET_BUILTIN
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 914be3691ee812..f34e4241b26842 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -168,8 +168,16 @@ namespace clang {
   namespace LoongArch {
   enum {
     LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
-#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
-#include "clang/Basic/BuiltinsLoongArch.def"
+#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BI##ID,
+#include "clang/Basic/BuiltinsLoongArchBase.def"
+    FirstLSXBuiltin,
+    LastBaseBuiltin = FirstLSXBuiltin - 1,
+#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BI##ID,
+#include "clang/Basic/BuiltinsLoongArchLSX.def"
+    FirstLASXBuiltin,
+    LastLSXBuiltin = FirstLASXBuiltin - 1,
+#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BI##ID,
+#include "clang/Basic/BuiltinsLoongArchLASX.def"
     LastTSBuiltin
   };
   } // namespace LoongArch
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 0b9d18362d0a00..b07f311f179c47 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1014,8 +1014,7 @@ class TargetInfo : public TransferrableTargetInfo,
   /// Return information about target-specific builtins for the current primary
   /// target, and info about which builtins are non-portable across the current
   /// set of primary and secondary targets.
-  virtual std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const = 0;
+  virtual llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const = 0;
 
   /// Returns target-specific min and max values VScale_Range.
   virtual std::optional<std::pair<unsigned, unsigned>>
diff --git a/clang/include/module.modulemap b/clang/include/module.modulemap
index 5bb9f6b7a91f67..34b97c220e7f6b 100644
--- a/clang/include/module.modulemap
+++ b/clang/include/module.modulemap
@@ -47,7 +47,6 @@ module Clang_Basic {
   textual header "clang/Basic/BuiltinsHexagon.def"
   textual header "clang/Basic/BuiltinsHexagonDep.def"
   textual header "clang/Basic/BuiltinsHexagonMapCustomDep.def"
-  textual header "clang/Basic/BuiltinsLoongArch.def"
   textual header "clang/Basic/BuiltinsLoongArchBase.def"
   textual header "clang/Basic/BuiltinsLoongArchLASX.def"
   textual header "clang/Basic/BuiltinsLoongArchLSX.def"
diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp
index 58fd212f9ddf17..52375a3663f39e 100644
--- a/clang/lib/Basic/Builtins.cpp
+++ b/clang/lib/Basic/Builtins.cpp
@@ -36,6 +36,7 @@ static constexpr llvm::StringTable BuiltinStrings =
 #define BUILTIN CLANG_BUILTIN_STR_TABLE
 #include "clang/Basic/Builtins.inc"
     ;
+static_assert(BuiltinStrings.size() < 100'000);
 
 static constexpr auto BuiltinInfos =
     Builtin::MakeInfos<Builtin::FirstTSBuiltin>(
@@ -46,71 +47,90 @@ static constexpr auto BuiltinInfos =
 #include "clang/Basic/Builtins.inc"
         });
 
-std::pair<const llvm::StringTable &, const Builtin::Info &>
-Builtin::Context::getStrTableAndInfo(unsigned ID) const {
-  if (ID < Builtin::FirstTSBuiltin)
-    return {BuiltinStrings, BuiltinInfos[ID]};
-  assert(
-      ((ID - Builtin::FirstTSBuiltin) < (TSInfos.size() + AuxTSInfos.size())) &&
-      "Invalid builtin ID!");
-  if (isAuxBuiltinID(ID))
-    return {*AuxTSStrTable,
-            AuxTSInfos[getAuxBuiltinID(ID) - Builtin::FirstTSBuiltin]};
-  return {*TSStrTable, TSInfos[ID - Builtin::FirstTSBuiltin]};
+std::pair<const Builtin::InfosShard &, const Builtin::Info &>
+Builtin::Context::getShardAndInfo(unsigned ID) const {
+  assert((ID < (Builtin::FirstTSBuiltin + NumTargetBuiltins +
+                NumAuxTargetBuiltins)) &&
+         "Invalid builtin ID!");
+
+  ArrayRef<InfosShard> Shards = BuiltinShards;
+  if (isAuxBuiltinID(ID)) {
+    Shards = AuxTargetShards;
+    ID = getAuxBuiltinID(ID) - Builtin::FirstTSBuiltin;
+  } else if (ID >= Builtin::FirstTSBuiltin) {
+    Shards = TargetShards;
+    ID -= Builtin::FirstTSBuiltin;
+  }
+
+  // Loop over the shards to find the one matching this ID. We don't expect to
+  // have many shards and so its better to search linearly than with a binary
+  // search.
+  for (const auto &Shard : Shards) {
+    if (ID < Shard.Infos.size()) {
+      return {Shard, Shard.Infos[ID]};
+    }
+
+    ID -= Shard.Infos.size();
+  }
+  llvm_unreachable("Invalid target builtin shard structure!");
 }
 
 /// Return the identifier name for the specified builtin,
 /// e.g. "__builtin_abs".
 llvm::StringRef Builtin::Context::getName(unsigned ID) const {
-  const auto &[StrTable, I] = getStrTableAndInfo(ID);
-  return StrTable[I.Offsets.Name];
+  const auto &[Shard, I] = getShardAndInfo(ID);
+  return (*Shard.Strings)[I.Offsets.Name];
 }
 
 const char *Builtin::Context::getTypeString(unsigned ID) const {
-  const auto &[StrTable, I] = getStrTableAndInfo(ID);
-  return StrTable[I.Offsets.Type].data();
+  const auto &[Shard, I] = getShardAndInfo(ID);
+  return (*Shard.Strings)[I.Offsets.Type].data();
 }
 
 const char *Builtin::Context::getAttributesString(unsigned ID) const {
-  const auto &[StrTable, I] = getStrTableAndInfo(ID);
-  return StrTable[I.Offsets.Attributes].data();
+  const auto &[Shard, I] = getShardAndInfo(ID);
+  return (*Shard.Strings)[I.Offsets.Attributes].data();
 }
 
 const char *Builtin::Context::getRequiredFeatures(unsigned ID) const {
-  const auto &[StrTable, I] = getStrTableAndInfo(ID);
-  return StrTable[I.Offsets.Features].data();
+  const auto &[Shard, I] = getShardAndInfo(ID);
+  return (*Shard.Strings)[I.Offsets.Features].data();
 }
 
+Builtin::Context::Context() : BuiltinShards{{&BuiltinStrings, BuiltinInfos}} {}
+
 void Builtin::Context::InitializeTarget(const TargetInfo &Target,
                                         const TargetInfo *AuxTarget) {
-  assert(TSStrTable == nullptr && "Already initialized target?");
-  assert(TSInfos.empty() && "Already initialized target?");
-  std::tie(TSStrTable, TSInfos) = Target.getTargetBuiltinStorage();
+  assert(TargetShards.empty() && "Already initialized target?");
+  assert(NumTargetBuiltins == 0 && "Already initialized target?");
+  TargetShards = Target.getTargetBuiltins();
+  for (const auto &Shard : TargetShards)
+    NumTargetBuiltins += Shard.Infos.size();
   if (AuxTarget) {
-    std::tie(AuxTSStrTable, AuxTSInfos) = AuxTarget->getTargetBuiltinStorage();
+    AuxTargetShards = AuxTarget->getTargetBuiltins();
+    for (const auto &Shard : AuxTargetShards)
+      NumAuxTargetBuiltins += Shard.Infos.size();
   }
 }
 
 bool Builtin::Context::isBuiltinFunc(llvm::StringRef FuncName) {
   bool InStdNamespace = FuncName.consume_front("std-");
-  const llvm::StringTable &StrTable = BuiltinStrings;
-  for (unsigned i = Builtin::NotBuiltin + 1; i != Builtin::FirstTSBuiltin;
-       ++i) {
-    const auto &I = BuiltinInfos[i];
-    if (FuncName == StrTable[I.Offsets.Name] &&
-        (bool)strchr(StrTable[I.Offsets.Attributes].data(), 'z') ==
-            InStdNamespace)
-      return strchr(StrTable[I.Offsets.Attributes].data(), 'f') != nullptr;
-  }
+  for (const auto &Shard : {InfosShard{&BuiltinStrings, BuiltinInfos}})
+    for (const auto &I : Shard.Infos)
+      if (FuncName == (*Shard.Strings)[I.Offsets.Name] &&
+          (bool)strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'z') ==
+              InStdNamespace)
+        return strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'f') !=
+               nullptr;
 
   return false;
 }
 
 /// Is this builtin supported according to the given language options?
-static bool builtinIsSupported(const llvm::StringTable &StrTable,
+static bool builtinIsSupported(const llvm::StringTable &Strings,
                                const Builtin::Info &BuiltinInfo,
                                const LangOptions &LangOpts) {
-  auto AttributesStr = StrTable[BuiltinInfo.Offsets.Attributes];
+  auto AttributesStr = Strings[BuiltinInfo.Offsets.Attributes];
 
   /* Builtins Unsupported */
   if (LangOpts.NoBuiltin && strchr(AttributesStr.data(), 'f') != nullptr)
@@ -169,24 +189,34 @@ static bool builtinIsSupported(const llvm::StringTable &StrTable,
 /// appropriate builtin ID # and mark any non-portable builtin identifiers as
 /// such.
 void Builtin::Context::initializeBuiltins(IdentifierTable &Table,
-                                          const LangOptions& LangOpts) {
-  // Step #1: mark all target-independent builtins with their ID's.
-  for (const auto &&[Index, I] :
-       llvm::enumerate(llvm::ArrayRef(BuiltinInfos).drop_front()))
-    if (builtinIsSupported(BuiltinStrings, I, LangOpts)) {
-      Table.get(BuiltinStrings[I.Offsets.Name]).setBuiltinID(Index + 1);
-    }
-
-  // Step #2: Register target-specific builtins.
-  for (const auto &&[Index, I] : llvm::enumerate(TSInfos))
-    if (builtinIsSupported(*TSStrTable, I, LangOpts))
-      Table.get((*TSStrTable)[I.Offsets.Name])
-          .setBuiltinID(Index + Builtin::FirstTSBuiltin);
+                                          const LangOptions &LangOpts) {
+  {
+    unsigned ID = 0;
+    // Step #1: mark all target-independent builtins with their ID's.
+    for (const auto &Shard : BuiltinShards)
+      for (const auto &I : Shard.Infos) {
+        // If this is a real builtin (ID != 0) and is supported, add it.
+        if (ID != 0 && builtinIsSupported(*Shard.Strings, I, LangOpts))
+          Table.get((*Shard.Strings)[I.Offsets.Name]).setBuiltinID(ID);
+        ++ID;
+      }
+    assert(ID == FirstTSBuiltin && "Should have added all non-target IDs!");
+
+    // Step #2: Register target-specific builtins.
+    for (const auto &Shard : TargetShards)
+      for (const auto &I : Shard.Infos) {
+        if (builtinIsSupported(*Shard.Strings, I, LangOpts))
+          Table.get((*Shard.Strings)[I.Offsets.Name]).setBuiltinID(ID);
+        ++ID;
+      }
 
-  // Step #3: Register target-specific builtins for AuxTarget.
-  for (const auto &&[Index, I] : llvm::enumerate(AuxTSInfos))
-    Table.get((*AuxTSStrTable)[I.Offsets.Name])
-        .setBuiltinID(Index + Builtin::FirstTSBuiltin + TSInfos.size());
+    // Step #3: Register target-specific builtins for AuxTarget.
+    for (const auto &Shard : AuxTargetShards)
+      for (const auto &I : Shard.Infos) {
+        Table.get((*Shard.Strings)[I.Offsets.Name]).setBuiltinID(ID);
+        ++ID;
+      }
+  }
 
   // Step #4: Unregister any builtins specified by -fno-builtin-foo.
   for (llvm::StringRef Name : LangOpts.NoBuiltinFuncs) {
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 09754e47b08f60..04697deb50be79 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -26,42 +26,67 @@
 using namespace clang;
 using namespace clang::targets;
 
+static constexpr int NumNEONBuiltins =
+    NEON::FirstTSBuiltin - Builtin::FirstTSBuiltin;
+static constexpr int NumSVEBuiltins =
+    SVE::FirstTSBuiltin - NEON::FirstTSBuiltin;
+static constexpr int NumSMEBuiltins = SME::FirstTSBuiltin - SVE::FirstTSBuiltin;
+static constexpr int NumAArch64Builtins =
+    AArch64::LastTSBuiltin - SME::FirstTSBuiltin;
 static constexpr int NumBuiltins =
-    clang::AArch64::LastTSBuiltin - Builtin::FirstTSBuiltin;
+    AArch64::LastTSBuiltin - Builtin::FirstTSBuiltin;
+static_assert(NumBuiltins == (NumNEONBuiltins + NumSVEBuiltins +
+                              NumSMEBuiltins + NumAArch64Builtins));
 
-static constexpr llvm::StringTable BuiltinStrings =
+static constexpr llvm::StringTable BuiltinNEONStrings =
     CLANG_BUILTIN_STR_TABLE_START
 #define BUILTIN CLANG_BUILTIN_STR_TABLE
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsNEON.def"
+    ;
+static constexpr llvm::StringTable BuiltinSVEStrings =
+    CLANG_BUILTIN_STR_TABLE_START
 #define BUILTIN CLANG_BUILTIN_STR_TABLE
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsSVE.def"
+    ;
+static constexpr llvm::StringTable BuiltinSMEStrings =
+    CLANG_BUILTIN_STR_TABLE_START
 #define BUILTIN CLANG_BUILTIN_STR_TABLE
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsSME.def"
+    ;
+static constexpr llvm::StringTable BuiltinAArch64Strings =
+    CLANG_BUILTIN_STR_TABLE_START
 #define BUILTIN CLANG_BUILTIN_STR_TABLE
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsAArch64.def"
     ;
 
-static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+static constexpr auto BuiltinNEONInfos = Builtin::MakeInfos<NumNEONBuiltins>({
 #define BUILTIN CLANG_BUILTIN_ENTRY
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsNEON.def"
+});
+static constexpr auto BuiltinSVEInfos = Builtin::MakeInfos<NumSVEBuiltins>({
 #define BUILTIN CLANG_BUILTIN_ENTRY
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsSVE.def"
+});
+static constexpr auto BuiltinSMEInfos = Builtin::MakeInfos<NumSMEBuiltins>({
 #define BUILTIN CLANG_BUILTIN_ENTRY
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsSME.def"
+});
+static constexpr auto BuiltinAArch64Infos =
+    Builtin::MakeInfos<NumAArch64Builtins>({
 #define BUILTIN CLANG_BUILTIN_ENTRY
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
 #define LANGBUILTIN CLANG_LANGBUILTIN_ENTRY
 #define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsAArch64.def"
-});
+    });
 
 void AArch64TargetInfo::setArchFeatures() {
   if (*ArchInfo == llvm::AArch64::ARMV8R) {
@@ -704,9 +729,14 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
   }
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-AArch64TargetInfo::getTargetBuiltinStorage() const {
-  return {&BuiltinStrings, BuiltinInfos};
+llvm::SmallVector<Builtin::InfosShard>
+AArch64TargetInfo::getTargetBuiltins() const {
+  return {
+      {&BuiltinNEONStrings, BuiltinNEONInfos},
+      {&BuiltinSVEStrings, BuiltinSVEInfos},
+      {&BuiltinSMEStrings, BuiltinSMEInfos},
+      {&BuiltinAArch64Strings, BuiltinAArch64Infos},
+  };
 }
 
 std::optional<std::pair<unsigned, unsigned>>
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 1686d7d2e8e779..b70f080041f741 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -180,8 +180,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   std::optional<std::pair<unsigned, unsigned>>
   getVScaleRange(const LangOptions &LangOpts) const override;
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 1b256aafcf9671..ecfc4e8b35e7c5 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -274,9 +274,9 @@ void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
                      !isAMDGCN(getTriple()));
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-AMDGPUTargetInfo::getTargetBuiltinStorage() const {
-  return {&BuiltinStrings, BuiltinInfos};
+llvm::SmallVector<Builtin::InfosShard>
+AMDGPUTargetInfo::getTargetBuiltins() const {
+  return {{&BuiltinStrings, BuiltinInfos}};
 }
 
 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index aac5ae8d9482c0..3d6778fb5a76fb 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -257,8 +257,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
                  StringRef CPU,
                  const std::vector<std::string> &FeatureVec) const override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   bool useFP16ConversionIntrinsics() const override { return false; }
 
diff --git a/clang/lib/Basic/Targets/ARC.h b/clang/lib/Basic/Targets/ARC.h
index 905fdeafec4de0..2b69f95591fa10 100644
--- a/clang/lib/Basic/Targets/ARC.h
+++ b/clang/lib/Basic/Targets/ARC.h
@@ -40,9 +40,8 @@ class LLVM_LIBRARY_VISIBILITY ARCTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override {
-    return {nullptr, {}};
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override {
+    return {};
   }
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index 03c90f745962f7..d111443d04409a 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -1099,9 +1099,9 @@ static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
 #include "clang/Basic/BuiltinsARM.def"
 });
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-ARMTargetInfo::getTargetBuiltinStorage() const {
-  return {&BuiltinStrings, BuiltinInfos};
+llvm::SmallVector<Builtin::InfosShard>
+ARMTargetInfo::getTargetBuiltins() const {
+  return {{&BuiltinStrings, BuiltinInfos}};
 }
 
 bool ARMTargetInfo::isCLZForZeroUndef() const { return false; }
diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h
index 60baf0f4f97122..2c5577c123b309 100644
--- a/clang/lib/Basic/Targets/ARM.h
+++ b/clang/lib/Basic/Targets/ARM.h
@@ -196,8 +196,7 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   bool isCLZForZeroUndef() const override;
   BuiltinVaListKind getBuiltinVaListKind() const override;
diff --git a/clang/lib/Basic/Targets/AVR.h b/clang/lib/Basic/Targets/AVR.h
index 962f5add183fdb..2117ab58e6f303 100644
--- a/clang/lib/Basic/Targets/AVR.h
+++ b/clang/lib/Basic/Targets/AVR.h
@@ -63,9 +63,8 @@ class LLVM_LIBRARY_VISIBILITY AVRTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override {
-    return {nullptr, {}};
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override {
+    return {};
   }
 
   bool allowsLargerPreferedTypeAlignment() const override { return false; }
diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp
index c5efbda520998d..b4504faa4d5eeb 100644
--- a/clang/lib/Basic/Targets/BPF.cpp
+++ b/clang/lib/Basic/Targets/BPF.cpp
@@ -89,9 +89,9 @@ void BPFTargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
   Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames));
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-BPFTargetInfo::getTargetBuiltinStorage() const {
-  return {&BuiltinStrings, BuiltinInfos};
+llvm::SmallVector<Builtin::InfosShard>
+BPFTargetInfo::getTargetBuiltins() const {
+  return {{&BuiltinStrings, BuiltinInfos}};
 }
 
 bool BPFTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
diff --git a/clang/lib/Basic/Targets/BPF.h b/clang/lib/Basic/Targets/BPF.h
index 97aaf35fc523d1..d1f68b842348ea 100644
--- a/clang/lib/Basic/Targets/BPF.h
+++ b/clang/lib/Basic/Targets/BPF.h
@@ -58,8 +58,7 @@ class LLVM_LIBRARY_VISIBILITY BPFTargetInfo : public TargetInfo {
   bool handleTargetFeatures(std::vector<std::string> &Features,
                             DiagnosticsEngine &Diags) override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   std::string_view getClobbers() const override { return ""; }
 
diff --git a/clang/lib/Basic/Targets/CSKY.h b/clang/lib/Basic/Targets/CSKY.h
index 7ecc9bc780412f..ddfbe4794daadd 100644
--- a/clang/lib/Basic/Targets/CSKY.h
+++ b/clang/lib/Basic/Targets/CSKY.h
@@ -73,9 +73,8 @@ class LLVM_LIBRARY_VISIBILITY CSKYTargetInfo : public TargetInfo {
 
   unsigned getMinGlobalAlign(uint64_t, bool HasNonWeakDef) const override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override {
-    return {nullptr, {}};
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override {
+    return {};
   }
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
diff --git a/clang/lib/Basic/Targets/DirectX.h b/clang/lib/Basic/Targets/DirectX.h
index 55ea3f877b23f6..9ab387ebc8d41c 100644
--- a/clang/lib/Basic/Targets/DirectX.h
+++ b/clang/lib/Basic/Targets/DirectX.h
@@ -72,9 +72,8 @@ class LLVM_LIBRARY_VISIBILITY DirectXTargetInfo : public TargetInfo {
     return Feature == "directx";
   }
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override {
-    return {nullptr, {}};
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override {
+    return {};
   }
 
   std::string_view getClobbers() const override { return ""; }
diff --git a/clang/lib/Basic/Targets/Hexagon.cpp b/clang/lib/Basic/Targets/Hexagon.cpp
index 5e1a7a49772016..12f9967eac4525 100644
--- a/clang/lib/Basic/Targets/Hexagon.cpp
+++ b/clang/lib/Basic/Targets/Hexagon.cpp
@@ -274,7 +274,7 @@ void HexagonTargetInfo::fillValidCPUList(
     Values.push_back(Suffix.Name);
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-HexagonTargetInfo::getTargetBuiltinStorage() const {
-  return {&BuiltinStrings, BuiltinInfos};
+llvm::SmallVector<Builtin::InfosShard>
+HexagonTargetInfo::getTargetBuiltins() const {
+  return {{&BuiltinStrings, BuiltinInfos}};
 }
diff --git a/clang/lib/Basic/Targets/Hexagon.h b/clang/lib/Basic/Targets/Hexagon.h
index b93574aa599f4c..a65663ca09eee7 100644
--- a/clang/lib/Basic/Targets/Hexagon.h
+++ b/clang/lib/Basic/Targets/Hexagon.h
@@ -66,8 +66,7 @@ class LLVM_LIBRARY_VISIBILITY HexagonTargetInfo : public TargetInfo {
     BoolWidth = BoolAlign = 8;
   }
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &Info) const override {
diff --git a/clang/lib/Basic/Targets/Lanai.h b/clang/lib/Basic/Targets/Lanai.h
index e715fa220df7a1..e32ef9d7d40daa 100644
--- a/clang/lib/Basic/Targets/Lanai.h
+++ b/clang/lib/Basic/Targets/Lanai.h
@@ -78,9 +78,8 @@ class LLVM_LIBRARY_VISIBILITY LanaiTargetInfo : public TargetInfo {
     return TargetInfo::VoidPtrBuiltinVaList;
   }
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override {
-    return {nullptr, {}};
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override {
+    return {};
   }
 
   bool validateAsmConstraint(const char *&Name,
diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp
index 7c0f40f6af3b5f..95111756d3999f 100644
--- a/clang/lib/Basic/Targets/LoongArch.cpp
+++ b/clang/lib/Basic/Targets/LoongArch.cpp
@@ -270,20 +270,54 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
 }
 
+static constexpr int NumBaseBuiltins =
+    LoongArch::FirstLSXBuiltin - Builtin::FirstTSBuiltin;
+static constexpr int NumLSXBuiltins =
+    LoongArch::FirstLASXBuiltin - LoongArch::FirstLSXBuiltin;
+static constexpr int NumLASXBuiltins =
+    LoongArch::LastTSBuiltin - LoongArch::FirstLASXBuiltin;
 static constexpr int NumBuiltins =
-    clang::LoongArch::LastTSBuiltin - Builtin::FirstTSBuiltin;
+    LoongArch::LastTSBuiltin - Builtin::FirstTSBuiltin;
+static_assert(NumBuiltins ==
+              (NumBaseBuiltins + NumLSXBuiltins + NumLASXBuiltins));
 
-static constexpr llvm::StringTable BuiltinStrings =
+static constexpr llvm::StringTable BuiltinBaseStrings =
     CLANG_BUILTIN_STR_TABLE_START
-#define BUILTIN CLANG_BUILTIN_STR_TABLE
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
-#include "clang/Basic/BuiltinsLoongArch.def"
+#include "clang/Basic/BuiltinsLoongArchBase.def"
+#undef TARGET_BUILTIN
     ;
 
-static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
-#define BUILTIN CLANG_BUILTIN_ENTRY
+static constexpr auto BuiltinBaseInfos = Builtin::MakeInfos<NumBaseBuiltins>({
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
-#include "clang/Basic/BuiltinsLoongArch.def"
+#include "clang/Basic/BuiltinsLoongArchBase.def"
+#undef TARGET_BUILTIN
+});
+
+static constexpr llvm::StringTable BuiltinLSXStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
+#include "clang/Basic/BuiltinsLoongArchLSX.def"
+#undef TARGET_BUILTIN
+    ;
+
+static constexpr auto BuiltinLSXInfos = Builtin::MakeInfos<NumLSXBuiltins>({
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+#include "clang/Basic/BuiltinsLoongArchLSX.def"
+#undef TARGET_BUILTIN
+});
+
+static constexpr llvm::StringTable BuiltinLASXStrings =
+    CLANG_BUILTIN_STR_TABLE_START
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
+#include "clang/Basic/BuiltinsLoongArchLASX.def"
+#undef TARGET_BUILTIN
+    ;
+
+static constexpr auto BuiltinLASXInfos = Builtin::MakeInfos<NumLASXBuiltins>({
+#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+#include "clang/Basic/BuiltinsLoongArchLASX.def"
+#undef TARGET_BUILTIN
 });
 
 bool LoongArchTargetInfo::initFeatureMap(
@@ -311,9 +345,13 @@ bool LoongArchTargetInfo::hasFeature(StringRef Feature) const {
       .Default(false);
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-LoongArchTargetInfo::getTargetBuiltinStorage() const {
-  return {&BuiltinStrings, BuiltinInfos};
+llvm::SmallVector<Builtin::InfosShard>
+LoongArchTargetInfo::getTargetBuiltins() const {
+  return {
+      {&BuiltinBaseStrings, BuiltinBaseInfos},
+      {&BuiltinLSXStrings, BuiltinLSXInfos},
+      {&BuiltinLASXStrings, BuiltinLASXInfos},
+  };
 }
 
 bool LoongArchTargetInfo::handleTargetFeatures(
diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h
index dee92403dac3a3..89eb8e3861ed77 100644
--- a/clang/lib/Basic/Targets/LoongArch.h
+++ b/clang/lib/Basic/Targets/LoongArch.h
@@ -70,8 +70,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;
diff --git a/clang/lib/Basic/Targets/M68k.cpp b/clang/lib/Basic/Targets/M68k.cpp
index e2a382653a5c5d..e5b7f06829cd91 100644
--- a/clang/lib/Basic/Targets/M68k.cpp
+++ b/clang/lib/Basic/Targets/M68k.cpp
@@ -115,10 +115,10 @@ void M68kTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__HAVE_68881__");
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-M68kTargetInfo::getTargetBuiltinStorage() const {
+llvm::SmallVector<Builtin::InfosShard>
+M68kTargetInfo::getTargetBuiltins() const {
   // FIXME: Implement.
-  return {nullptr, {}};
+  return {};
 }
 
 bool M68kTargetInfo::hasFeature(StringRef Feature) const {
diff --git a/clang/lib/Basic/Targets/M68k.h b/clang/lib/Basic/Targets/M68k.h
index 104cdfd20c1bdc..729d79ff77fbf6 100644
--- a/clang/lib/Basic/Targets/M68k.h
+++ b/clang/lib/Basic/Targets/M68k.h
@@ -44,8 +44,7 @@ class LLVM_LIBRARY_VISIBILITY M68kTargetInfo : public TargetInfo {
 
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
   bool hasFeature(StringRef Feature) const override;
   ArrayRef<const char *> getGCCRegNames() const override;
   ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;
diff --git a/clang/lib/Basic/Targets/MSP430.h b/clang/lib/Basic/Targets/MSP430.h
index a998ea0143b7f0..d7d05f992f4f6b 100644
--- a/clang/lib/Basic/Targets/MSP430.h
+++ b/clang/lib/Basic/Targets/MSP430.h
@@ -50,10 +50,9 @@ class LLVM_LIBRARY_VISIBILITY MSP430TargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override {
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override {
     // FIXME: Implement.
-    return {nullptr, {}};
+    return {};
   }
 
   bool allowsLargerPreferedTypeAlignment() const override { return false; }
diff --git a/clang/lib/Basic/Targets/Mips.cpp b/clang/lib/Basic/Targets/Mips.cpp
index 1edae69dbdd6a2..f568e0649eed15 100644
--- a/clang/lib/Basic/Targets/Mips.cpp
+++ b/clang/lib/Basic/Targets/Mips.cpp
@@ -230,9 +230,9 @@ bool MipsTargetInfo::hasFeature(StringRef Feature) const {
       .Default(false);
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-MipsTargetInfo::getTargetBuiltinStorage() const {
-  return {&BuiltinStrings, BuiltinInfos};
+llvm::SmallVector<Builtin::InfosShard>
+MipsTargetInfo::getTargetBuiltins() const {
+  return {{&BuiltinStrings, BuiltinInfos}};
 }
 
 unsigned MipsTargetInfo::getUnwindWordWidth() const {
diff --git a/clang/lib/Basic/Targets/Mips.h b/clang/lib/Basic/Targets/Mips.h
index 33a3b39129a923..7de3f1db069730 100644
--- a/clang/lib/Basic/Targets/Mips.h
+++ b/clang/lib/Basic/Targets/Mips.h
@@ -197,8 +197,7 @@ class LLVM_LIBRARY_VISIBILITY MipsTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   bool hasFeature(StringRef Feature) const override;
 
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index e765ff545b54e0..93e04d3460f0e3 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -302,7 +302,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
   }
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-NVPTXTargetInfo::getTargetBuiltinStorage() const {
-  return {&BuiltinStrings, BuiltinInfos};
+llvm::SmallVector<Builtin::InfosShard>
+NVPTXTargetInfo::getTargetBuiltins() const {
+  return {{&BuiltinStrings, BuiltinInfos}};
 }
diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h
index bd029e10039e26..3dd13836a73035 100644
--- a/clang/lib/Basic/Targets/NVPTX.h
+++ b/clang/lib/Basic/Targets/NVPTX.h
@@ -74,8 +74,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   bool useFP16ConversionIntrinsics() const override { return false; }
 
diff --git a/clang/lib/Basic/Targets/PNaCl.h b/clang/lib/Basic/Targets/PNaCl.h
index 89648f980f4eba..d162776b5a0d63 100644
--- a/clang/lib/Basic/Targets/PNaCl.h
+++ b/clang/lib/Basic/Targets/PNaCl.h
@@ -52,9 +52,8 @@ class LLVM_LIBRARY_VISIBILITY PNaClTargetInfo : public TargetInfo {
     return Feature == "pnacl";
   }
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override {
-    return {nullptr, {}};
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override {
+    return {};
   }
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index ab96983c3dc30c..2d8891a739ca35 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -934,9 +934,9 @@ void PPCTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
     MaxAtomicInlineWidth = 128;
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-PPCTargetInfo::getTargetBuiltinStorage() const {
-  return {&BuiltinStrings, BuiltinInfos};
+llvm::SmallVector<Builtin::InfosShard>
+PPCTargetInfo::getTargetBuiltins() const {
+  return {{&BuiltinStrings, BuiltinInfos}};
 }
 
 bool PPCTargetInfo::validateCpuSupports(StringRef FeatureStr) const {
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 76f4d152ae5919..db6ac6f0bd3380 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -187,8 +187,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
 
   StringRef getABI() const override { return ABI; }
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   bool isCLZForZeroUndef() const override { return false; }
 
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index efa7a91ad03078..ccd70f0e6e7b2a 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -240,31 +240,44 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
   }
 }
 
+static constexpr int NumRVVBuiltins =
+    clang::RISCVVector::FirstTSBuiltin - Builtin::FirstTSBuiltin;
+static constexpr int NumRISCVBuiltins =
+    clang::RISCV::LastTSBuiltin - RISCVVector::FirstTSBuiltin;
 static constexpr int NumBuiltins =
     clang::RISCV::LastTSBuiltin - Builtin::FirstTSBuiltin;
+static_assert(NumBuiltins == (NumRVVBuiltins + NumRISCVBuiltins));
 
-static constexpr llvm::StringTable BuiltinStrings =
+static constexpr llvm::StringTable BuiltinRVVStrings =
     CLANG_BUILTIN_STR_TABLE_START
 #define BUILTIN CLANG_BUILTIN_STR_TABLE
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsRISCVVector.def"
+    ;
+static constexpr llvm::StringTable BuiltinRISCVStrings =
+    CLANG_BUILTIN_STR_TABLE_START
 #define BUILTIN CLANG_BUILTIN_STR_TABLE
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsRISCV.inc"
     ;
 
-static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
+static constexpr auto BuiltinRVVInfos = Builtin::MakeInfos<NumRVVBuiltins>({
 #define BUILTIN CLANG_BUILTIN_ENTRY
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsRISCVVector.def"
+});
+static constexpr auto BuiltinRISCVInfos = Builtin::MakeInfos<NumRISCVBuiltins>({
 #define BUILTIN CLANG_BUILTIN_ENTRY
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsRISCV.inc"
 });
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-RISCVTargetInfo::getTargetBuiltinStorage() const {
-  return {&BuiltinStrings, BuiltinInfos};
+llvm::SmallVector<Builtin::InfosShard>
+RISCVTargetInfo::getTargetBuiltins() const {
+  return {
+      {&BuiltinRVVStrings, BuiltinRVVInfos},
+      {&BuiltinRISCVStrings, BuiltinRISCVInfos},
+  };
 }
 
 bool RISCVTargetInfo::initFeatureMap(
diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h
index c3f62acff2623d..886fdb851f502c 100644
--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@@ -62,8 +62,7 @@ class RISCVTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;
diff --git a/clang/lib/Basic/Targets/SPIR.cpp b/clang/lib/Basic/Targets/SPIR.cpp
index fd755ca056691b..694b0e6cf9c955 100644
--- a/clang/lib/Basic/Targets/SPIR.cpp
+++ b/clang/lib/Basic/Targets/SPIR.cpp
@@ -81,9 +81,9 @@ SPIRV64AMDGCNTargetInfo::convertConstraint(const char *&Constraint) const {
   return AMDGPUTI.convertConstraint(Constraint);
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-SPIRV64AMDGCNTargetInfo::getTargetBuiltinStorage() const {
-  return AMDGPUTI.getTargetBuiltinStorage();
+llvm::SmallVector<Builtin::InfosShard>
+SPIRV64AMDGCNTargetInfo::getTargetBuiltins() const {
+  return AMDGPUTI.getTargetBuiltins();
 }
 
 void SPIRV64AMDGCNTargetInfo::getTargetDefines(const LangOptions &Opts,
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index b1c5c6ea063545..b37e3a09ff5329 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -159,9 +159,8 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo {
   // memcpy as per section 3 of the SPIR spec.
   bool useFP16ConversionIntrinsics() const override { return false; }
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override {
-    return {nullptr, {}};
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override {
+    return {};
   }
 
   std::string_view getClobbers() const override { return ""; }
@@ -411,8 +410,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
 
   std::string convertConstraint(const char *&Constraint) const override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
diff --git a/clang/lib/Basic/Targets/Sparc.h b/clang/lib/Basic/Targets/Sparc.h
index 9836f82a2fc54a..3215e648ba6c31 100644
--- a/clang/lib/Basic/Targets/Sparc.h
+++ b/clang/lib/Basic/Targets/Sparc.h
@@ -48,10 +48,9 @@ class LLVM_LIBRARY_VISIBILITY SparcTargetInfo : public TargetInfo {
 
   bool hasFeature(StringRef Feature) const override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override {
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override {
     // FIXME: Implement!
-    return {nullptr, {}};
+    return {};
   }
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;
diff --git a/clang/lib/Basic/Targets/SystemZ.cpp b/clang/lib/Basic/Targets/SystemZ.cpp
index be84329bf85d41..26e212220f85ad 100644
--- a/clang/lib/Basic/Targets/SystemZ.cpp
+++ b/clang/lib/Basic/Targets/SystemZ.cpp
@@ -178,7 +178,7 @@ void SystemZTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__VEC__", "10304");
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-SystemZTargetInfo::getTargetBuiltinStorage() const {
-  return {&BuiltinStrings, BuiltinInfos};
+llvm::SmallVector<Builtin::InfosShard>
+SystemZTargetInfo::getTargetBuiltins() const {
+  return {{&BuiltinStrings, BuiltinInfos}};
 }
diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index 66292c206cbe41..b64ee286131659 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -99,8 +99,7 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   ArrayRef<const char *> getGCCRegNames() const override;
 
diff --git a/clang/lib/Basic/Targets/TCE.h b/clang/lib/Basic/Targets/TCE.h
index 4f06e013d1dbd4..18c71514fa5ced 100644
--- a/clang/lib/Basic/Targets/TCE.h
+++ b/clang/lib/Basic/Targets/TCE.h
@@ -95,9 +95,8 @@ class LLVM_LIBRARY_VISIBILITY TCETargetInfo : public TargetInfo {
 
   bool hasFeature(StringRef Feature) const override { return Feature == "tce"; }
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override {
-    return {nullptr, {}};
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override {
+    return {};
   }
 
   std::string_view getClobbers() const override { return ""; }
diff --git a/clang/lib/Basic/Targets/VE.cpp b/clang/lib/Basic/Targets/VE.cpp
index a955767f46599a..5451f3c303637d 100644
--- a/clang/lib/Basic/Targets/VE.cpp
+++ b/clang/lib/Basic/Targets/VE.cpp
@@ -47,7 +47,6 @@ void VETargetInfo::getTargetDefines(const LangOptions &Opts,
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-VETargetInfo::getTargetBuiltinStorage() const {
-  return {&BuiltinStrings, BuiltinInfos};
+llvm::SmallVector<Builtin::InfosShard> VETargetInfo::getTargetBuiltins() const {
+  return {{&BuiltinStrings, BuiltinInfos}};
 }
diff --git a/clang/lib/Basic/Targets/VE.h b/clang/lib/Basic/Targets/VE.h
index 69621023acff00..e9b7e92f3f8504 100644
--- a/clang/lib/Basic/Targets/VE.h
+++ b/clang/lib/Basic/Targets/VE.h
@@ -55,8 +55,7 @@ class LLVM_LIBRARY_VISIBILITY VETargetInfo : public TargetInfo {
 
   bool hasSjLjLowering() const override { return true; }
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;
diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp
index 4282b1496333ba..f19c57f1a3a502 100644
--- a/clang/lib/Basic/Targets/WebAssembly.cpp
+++ b/clang/lib/Basic/Targets/WebAssembly.cpp
@@ -367,9 +367,9 @@ bool WebAssemblyTargetInfo::handleTargetFeatures(
   return true;
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-WebAssemblyTargetInfo::getTargetBuiltinStorage() const {
-  return {&BuiltinStrings, BuiltinInfos};
+llvm::SmallVector<Builtin::InfosShard>
+WebAssemblyTargetInfo::getTargetBuiltins() const {
+  return {{&BuiltinStrings, BuiltinInfos}};
 }
 
 void WebAssemblyTargetInfo::adjust(DiagnosticsEngine &Diags,
diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h
index a67bf5e3733088..83dad2b50ef40f 100644
--- a/clang/lib/Basic/Targets/WebAssembly.h
+++ b/clang/lib/Basic/Targets/WebAssembly.h
@@ -120,8 +120,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo {
 
   bool setCPU(const std::string &Name) final { return isValidCPUName(Name); }
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const final;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const final;
 
   BuiltinVaListKind getBuiltinVaListKind() const final {
     return VoidPtrBuiltinVaList;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 4ed8bcec94a5be..9999487c385c50 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -27,33 +27,40 @@ namespace targets {
 static constexpr int NumX86Builtins =
     X86::LastX86CommonBuiltin - Builtin::FirstTSBuiltin + 1;
 static constexpr int NumX86_64Builtins =
-    X86::LastTSBuiltin - Builtin::FirstTSBuiltin;
-static_assert(NumX86Builtins < NumX86_64Builtins);
+    X86::LastTSBuiltin - X86::FirstX86_64Builtin;
+static constexpr int NumBuiltins = X86::LastTSBuiltin - Builtin::FirstTSBuiltin;
+static_assert(NumBuiltins == (NumX86Builtins + NumX86_64Builtins));
 
-static constexpr llvm::StringTable BuiltinStrings =
+static constexpr llvm::StringTable BuiltinX86Strings =
     CLANG_BUILTIN_STR_TABLE_START
 #define BUILTIN CLANG_BUILTIN_STR_TABLE
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsX86.inc"
+    ;
 
+static constexpr llvm::StringTable BuiltinX86_64Strings =
+    CLANG_BUILTIN_STR_TABLE_START
 #define BUILTIN CLANG_BUILTIN_STR_TABLE
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsX86_64.inc"
     ;
 
-static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumX86_64Builtins>({
+static constexpr auto BuiltinX86Infos = Builtin::MakeInfos<NumX86Builtins>({
 #define BUILTIN CLANG_BUILTIN_ENTRY
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
 #define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsX86.inc"
+});
 
+static constexpr auto BuiltinX86_64Infos =
+    Builtin::MakeInfos<NumX86_64Builtins>({
 #define BUILTIN CLANG_BUILTIN_ENTRY
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
 #define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsX86_64.inc"
-});
+    });
 
 static const char *const GCCRegNames[] = {
     "ax",    "dx",    "cx",    "bx",    "si",      "di",    "bp",    "sp",
@@ -1870,14 +1877,15 @@ ArrayRef<TargetInfo::AddlRegName> X86TargetInfo::getGCCAddlRegNames() const {
   return llvm::ArrayRef(AddlRegNames);
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-X86_32TargetInfo::getTargetBuiltinStorage() const {
-  // Only use the relevant prefix of the infos, the string table base is common.
-  return {&BuiltinStrings,
-          llvm::ArrayRef(BuiltinInfos).take_front(NumX86Builtins)};
+llvm::SmallVector<Builtin::InfosShard>
+X86_32TargetInfo::getTargetBuiltins() const {
+  return {{&BuiltinX86Strings, BuiltinX86Infos}};
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-X86_64TargetInfo::getTargetBuiltinStorage() const {
-  return {&BuiltinStrings, BuiltinInfos};
+llvm::SmallVector<Builtin::InfosShard>
+X86_64TargetInfo::getTargetBuiltins() const {
+  return {
+      {&BuiltinX86Strings, BuiltinX86Infos},
+      {&BuiltinX86_64Strings, BuiltinX86_64Infos},
+  };
 }
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 5a2a546b1038bb..fc64bcb844eeeb 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -508,8 +508,7 @@ class LLVM_LIBRARY_VISIBILITY X86_32TargetInfo : public X86TargetInfo {
       MaxAtomicInlineWidth = 64;
   }
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   bool hasBitIntType() const override { return true; }
   size_t getMaxBitIntWidth() const override {
@@ -813,8 +812,7 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo {
       MaxAtomicInlineWidth = 128;
   }
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   bool hasBitIntType() const override { return true; }
   size_t getMaxBitIntWidth() const override {
diff --git a/clang/lib/Basic/Targets/XCore.cpp b/clang/lib/Basic/Targets/XCore.cpp
index 334e853a48a12a..c725703ede5b0b 100644
--- a/clang/lib/Basic/Targets/XCore.cpp
+++ b/clang/lib/Basic/Targets/XCore.cpp
@@ -39,7 +39,7 @@ void XCoreTargetInfo::getTargetDefines(const LangOptions &Opts,
   Builder.defineMacro("__XS1B__");
 }
 
-std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-XCoreTargetInfo::getTargetBuiltinStorage() const {
-  return {&BuiltinStrings, BuiltinInfos};
+llvm::SmallVector<Builtin::InfosShard>
+XCoreTargetInfo::getTargetBuiltins() const {
+  return {{&BuiltinStrings, BuiltinInfos}};
 }
diff --git a/clang/lib/Basic/Targets/XCore.h b/clang/lib/Basic/Targets/XCore.h
index 3f31095fcef297..9af9e0658f629a 100644
--- a/clang/lib/Basic/Targets/XCore.h
+++ b/clang/lib/Basic/Targets/XCore.h
@@ -43,8 +43,7 @@ class LLVM_LIBRARY_VISIBILITY XCoreTargetInfo : public TargetInfo {
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  std::pair<const llvm::StringTable *, ArrayRef<Builtin::Info>>
-  getTargetBuiltinStorage() const override;
+  llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;

>From 7fed20f1f2a2585f39f23eef672628bb7ec28bb2 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc at gmail.com>
Date: Sun, 15 Dec 2024 03:20:15 +0000
Subject: [PATCH 06/10] Switch RISCV to leverage sharded builtins w/ TableGen

This lets the TableGen-ed code be much cleaner, directly building an
efficient string table without duplicates.

The pattern here can then be repeated in other targets.
---
 .../clang/Basic/BuiltinsRISCVVector.def       | 22 -----
 clang/include/clang/Basic/TargetBuiltins.h    |  8 +-
 clang/lib/Basic/Targets/RISCV.cpp             | 59 ++++++++-----
 clang/utils/TableGen/RISCVVEmitter.cpp        | 84 ++++++++++++++-----
 4 files changed, 109 insertions(+), 64 deletions(-)
 delete mode 100644 clang/include/clang/Basic/BuiltinsRISCVVector.def

diff --git a/clang/include/clang/Basic/BuiltinsRISCVVector.def b/clang/include/clang/Basic/BuiltinsRISCVVector.def
deleted file mode 100644
index 6dfa87a1a1d313..00000000000000
--- a/clang/include/clang/Basic/BuiltinsRISCVVector.def
+++ /dev/null
@@ -1,22 +0,0 @@
-//==- BuiltinsRISCVVector.def - RISC-V Vector Builtin Database ---*- C++ -*-==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the RISC-V-specific builtin function database.  Users of
-// this file must define the BUILTIN macro to make use of this information.
-//
-//===----------------------------------------------------------------------===//
-
-#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
-#   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
-#endif
-
-#include "clang/Basic/riscv_vector_builtins.inc"
-#include "clang/Basic/riscv_sifive_vector_builtins.inc"
-
-#undef BUILTIN
-#undef TARGET_BUILTIN
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index f34e4241b26842..370f1f7cb1497f 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -146,8 +146,12 @@ namespace clang {
   namespace RISCVVector {
   enum {
     LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
-#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
-#include "clang/Basic/BuiltinsRISCVVector.def"
+#define GET_RISCVV_BUILTIN_ENUMERATORS
+#include "clang/Basic/riscv_vector_builtins.inc"
+    FirstSiFiveBuiltin,
+    LastRVVBuiltin = FirstSiFiveBuiltin - 1,
+#include "clang/Basic/riscv_sifive_vector_builtins.inc"
+#undef GET_RISCVV_BUILTIN_ENUMERATORS
     FirstTSBuiltin,
   };
   }
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index ccd70f0e6e7b2a..0e78471d6f4d7b 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -241,32 +241,50 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
 }
 
 static constexpr int NumRVVBuiltins =
-    clang::RISCVVector::FirstTSBuiltin - Builtin::FirstTSBuiltin;
+    RISCVVector::FirstSiFiveBuiltin - Builtin::FirstTSBuiltin;
+static constexpr int NumRVVSiFiveBuiltins =
+    RISCVVector::FirstTSBuiltin - RISCVVector::FirstSiFiveBuiltin;
 static constexpr int NumRISCVBuiltins =
-    clang::RISCV::LastTSBuiltin - RISCVVector::FirstTSBuiltin;
+    RISCV::LastTSBuiltin - RISCVVector::FirstTSBuiltin;
 static constexpr int NumBuiltins =
-    clang::RISCV::LastTSBuiltin - Builtin::FirstTSBuiltin;
-static_assert(NumBuiltins == (NumRVVBuiltins + NumRISCVBuiltins));
-
-static constexpr llvm::StringTable BuiltinRVVStrings =
-    CLANG_BUILTIN_STR_TABLE_START
-#define BUILTIN CLANG_BUILTIN_STR_TABLE
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
-#include "clang/Basic/BuiltinsRISCVVector.def"
-    ;
-static constexpr llvm::StringTable BuiltinRISCVStrings =
+    RISCV::LastTSBuiltin - Builtin::FirstTSBuiltin;
+static_assert(NumBuiltins ==
+              (NumRVVBuiltins + NumRVVSiFiveBuiltins + NumRISCVBuiltins));
+
+namespace RVV {
+#define GET_RISCVV_BUILTIN_STR_TABLE
+#include "clang/Basic/riscv_vector_builtins.inc"
+#undef GET_RISCVV_BUILTIN_STR_TABLE
+static_assert(BuiltinStrings.size() < 100'000);
+
+static constexpr std::array<Builtin::Info, NumRVVBuiltins> BuiltinInfos = {
+#define GET_RISCVV_BUILTIN_INFOS
+#include "clang/Basic/riscv_vector_builtins.inc"
+#undef GET_RISCVV_BUILTIN_INFOS
+};
+} // namespace RVV
+
+namespace RVVSiFive {
+#define GET_RISCVV_BUILTIN_STR_TABLE
+#include "clang/Basic/riscv_sifive_vector_builtins.inc"
+#undef GET_RISCVV_BUILTIN_STR_TABLE
+
+static constexpr std::array<Builtin::Info, NumRVVSiFiveBuiltins> BuiltinInfos =
+    {
+#define GET_RISCVV_BUILTIN_INFOS
+#include "clang/Basic/riscv_sifive_vector_builtins.inc"
+#undef GET_RISCVV_BUILTIN_INFOS
+};
+} // namespace RVVSiFive
+
+static constexpr llvm::StringTable BuiltinStrings =
     CLANG_BUILTIN_STR_TABLE_START
 #define BUILTIN CLANG_BUILTIN_STR_TABLE
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsRISCV.inc"
     ;
 
-static constexpr auto BuiltinRVVInfos = Builtin::MakeInfos<NumRVVBuiltins>({
-#define BUILTIN CLANG_BUILTIN_ENTRY
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
-#include "clang/Basic/BuiltinsRISCVVector.def"
-});
-static constexpr auto BuiltinRISCVInfos = Builtin::MakeInfos<NumRISCVBuiltins>({
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumRISCVBuiltins>({
 #define BUILTIN CLANG_BUILTIN_ENTRY
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
 #include "clang/Basic/BuiltinsRISCV.inc"
@@ -275,8 +293,9 @@ static constexpr auto BuiltinRISCVInfos = Builtin::MakeInfos<NumRISCVBuiltins>({
 llvm::SmallVector<Builtin::InfosShard>
 RISCVTargetInfo::getTargetBuiltins() const {
   return {
-      {&BuiltinRVVStrings, BuiltinRVVInfos},
-      {&BuiltinRISCVStrings, BuiltinRISCVInfos},
+      {&RVV::BuiltinStrings, RVV::BuiltinInfos},
+      {&RVVSiFive::BuiltinStrings, RVVSiFive::BuiltinInfos},
+      {&BuiltinStrings, BuiltinInfos},
   };
 }
 
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index acba1a31912816..077854f71d236d 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -18,10 +18,12 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/StringToOffsetTable.h"
 #include <optional>
 
 using namespace llvm;
@@ -498,31 +500,73 @@ void RVVEmitter::createBuiltins(raw_ostream &OS) {
   std::vector<std::unique_ptr<RVVIntrinsic>> Defs;
   createRVVIntrinsics(Defs);
 
-  // Map to keep track of which builtin names have already been emitted.
-  StringMap<RVVIntrinsic *> BuiltinMap;
+  llvm::StringToOffsetTable Table;
+  // Ensure offset zero is the empty string.
+  Table.GetOrAddStringOffset("");
+  // Hard coded strings used in the builtin structures.
+  Table.GetOrAddStringOffset("n");
+  Table.GetOrAddStringOffset("zve32x");
 
-  OS << "#if defined(TARGET_BUILTIN) && !defined(RISCVV_BUILTIN)\n";
-  OS << "#define RISCVV_BUILTIN(ID, TYPE, ATTRS) TARGET_BUILTIN(ID, TYPE, "
-        "ATTRS, \"zve32x\")\n";
-  OS << "#endif\n";
+  auto PrefixName = [](RVVIntrinsic *Def) -> std::string {
+    return ("__builtin_rvv_" + Def->getBuiltinName()).str();
+  };
+
+  // Map to unique the builtin names.
+  StringMap<RVVIntrinsic *> BuiltinMap;
+  std::vector<RVVIntrinsic *> UniqueDefs;
   for (auto &Def : Defs) {
-    auto P =
-        BuiltinMap.insert(std::make_pair(Def->getBuiltinName(), Def.get()));
-    if (!P.second) {
-      // Verf that this would have produced the same builtin definition.
-      if (P.first->second->hasBuiltinAlias() != Def->hasBuiltinAlias())
-        PrintFatalError("Builtin with same name has different hasAutoDef");
-      else if (!Def->hasBuiltinAlias() &&
-               P.first->second->getBuiltinTypeStr() != Def->getBuiltinTypeStr())
-        PrintFatalError("Builtin with same name has different type string");
+    auto P = BuiltinMap.insert({Def->getBuiltinName(), Def.get()});
+    if (P.second) {
+      Table.GetOrAddStringOffset(PrefixName(Def.get()));
+      if (!Def->hasBuiltinAlias())
+        Table.GetOrAddStringOffset(Def->getBuiltinTypeStr());
+      UniqueDefs.push_back(Def.get());
       continue;
     }
-    OS << "RISCVV_BUILTIN(__builtin_rvv_" << Def->getBuiltinName() << ",\"";
-    if (!Def->hasBuiltinAlias())
-      OS << Def->getBuiltinTypeStr();
-    OS << "\", \"n\")\n";
+
+    // Verf that this would have produced the same builtin definition.
+    if (P.first->second->hasBuiltinAlias() != Def->hasBuiltinAlias())
+      PrintFatalError("Builtin with same name has different hasAutoDef");
+    else if (!Def->hasBuiltinAlias() &&
+             P.first->second->getBuiltinTypeStr() != Def->getBuiltinTypeStr())
+      PrintFatalError("Builtin with same name has different type string");
+  }
+
+  // Emit the enumerators of RVV builtins. Note that these are emitted without
+  // any outer context to enable concatenating them.
+  OS << "// RISCV Vector builtin enumerators\n";
+  OS << "#ifdef GET_RISCVV_BUILTIN_ENUMERATORS\n";
+  for (RVVIntrinsic *Def : UniqueDefs)
+    OS << "  BI__builtin_rvv_" << Def->getBuiltinName() << ",\n";
+  OS << "#endif // GET_RISCVV_BUILTIN_ENUMERATORS\n\n";
+
+  // Emit the string table for the RVV builtins.
+  OS << "// RISCV Vector builtin enumerators\n";
+  OS << "#ifdef GET_RISCVV_BUILTIN_STR_TABLE\n";
+  Table.EmitStringLiteralDef(
+      OS, "static constexpr llvm::StringTable BuiltinStrings", "");
+  OS << "#endif // GET_RISCVV_BUILTIN_STR_TABLE\n\n";
+
+  // Emit the info structs of RVV builtins. Note that these are emitted without
+  // any outer context to enable concatenating them.
+  OS << "// RISCV Vector builtin infos\n";
+  OS << "#ifdef GET_RISCVV_BUILTIN_INFOS\n";
+  for (RVVIntrinsic *Def : UniqueDefs) {
+    OS << "    Builtin::Info{Builtin::Info::StrOffsets{"
+       << Table.GetStringOffset(PrefixName(Def)) << " /* " << PrefixName(Def)
+       << " */, ";
+    if (Def->hasBuiltinAlias()) {
+      OS << "0, ";
+    } else {
+      OS << Table.GetStringOffset(Def->getBuiltinTypeStr()) << " /* "
+         << Def->getBuiltinTypeStr() << " */, ";
+    }
+    OS << Table.GetStringOffset("n") << " /* n */, ";
+    OS << Table.GetStringOffset("zve32x") << " /* zve32x */}, ";
+
+    OS << "HeaderDesc::NO_HEADER, ALL_LANGUAGES},\n";
   }
-  OS << "#undef RISCVV_BUILTIN\n";
+  OS << "#endif // GET_RISCVV_BUILTIN_INFOS\n\n";
 }
 
 void RVVEmitter::createCodeGen(raw_ostream &OS) {

>From f7d43517362f44dad4b2c2e7ff289aa0a63ca863 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc at gmail.com>
Date: Mon, 16 Dec 2024 20:27:41 +0000
Subject: [PATCH 07/10] Switch AArch64 and ARM to use directly TableGen-ed
 builtin tables

This leverages the sharded structure of the builtins to make it easy to
directly tablegen most of the AArch64 and ARM builtins while still using
X-macros for a few edge cases. This in turn shrinks the largest string
table by a factor of two.
---
 clang/include/clang/Basic/BuiltinsNEON.def |  22 ----
 clang/include/clang/Basic/TargetBuiltins.h |  26 +++--
 clang/lib/Basic/Targets/AArch64.cpp        | 114 +++++++++++++------
 clang/lib/Basic/Targets/ARM.cpp            |  54 ++++++---
 clang/lib/Sema/SemaARM.cpp                 |  16 +--
 clang/utils/TableGen/NeonEmitter.cpp       |  61 ++++++----
 clang/utils/TableGen/SveEmitter.cpp        | 126 +++++++++++++++++----
 7 files changed, 284 insertions(+), 135 deletions(-)
 delete mode 100644 clang/include/clang/Basic/BuiltinsNEON.def

diff --git a/clang/include/clang/Basic/BuiltinsNEON.def b/clang/include/clang/Basic/BuiltinsNEON.def
deleted file mode 100644
index 9627005ba9824e..00000000000000
--- a/clang/include/clang/Basic/BuiltinsNEON.def
+++ /dev/null
@@ -1,22 +0,0 @@
-//===--- BuiltinsNEON.def - NEON Builtin function database ------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the NEON-specific builtin function database.  Users of
-// this file must define the BUILTIN macro to make use of this information.
-//
-//===----------------------------------------------------------------------===//
-
-// The format of this database matches clang/Basic/Builtins.def.
-
-#define GET_NEON_BUILTINS
-#include "clang/Basic/arm_neon.inc"
-#include "clang/Basic/arm_fp16.inc"
-#undef GET_NEON_BUILTINS
-
-#undef BUILTIN
-#undef TARGET_BUILTIN
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 370f1f7cb1497f..f066f902a18c02 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -26,9 +26,12 @@ namespace clang {
   namespace NEON {
   enum {
     LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
-#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BI##ID,
-#include "clang/Basic/BuiltinsNEON.def"
+#define GET_NEON_BUILTIN_ENUMERATORS
+#include "clang/Basic/arm_neon.inc"
+    FirstFp16Builtin,
+    LastNeonBuiltin = FirstFp16Builtin - 1,
+#include "clang/Basic/arm_fp16.inc"
+#undef GET_NEON_BUILTIN_ENUMERATORS
     FirstTSBuiltin
   };
   }
@@ -47,9 +50,16 @@ namespace clang {
   namespace SVE {
   enum {
     LastNEONBuiltin = NEON::FirstTSBuiltin - 1,
-#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+#define GET_SVE_BUILTIN_ENUMERATORS
+#include "clang/Basic/arm_sve_builtins.inc"
+#undef GET_SVE_BUILTIN_ENUMERATORS
+    FirstNeonBridgeBuiltin,
+    LastSveBuiltin = FirstNeonBridgeBuiltin - 1,
+#define GET_SVE_BUILTINS
 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BI##ID,
-#include "clang/Basic/BuiltinsSVE.def"
+#include "clang/Basic/BuiltinsAArch64NeonSVEBridge.def"
+#undef TARGET_BUILTIN
+#undef GET_SVE_BUILTINS
     FirstTSBuiltin,
   };
   }
@@ -57,9 +67,9 @@ namespace clang {
   namespace SME {
   enum {
     LastSVEBuiltin = SVE::FirstTSBuiltin - 1,
-#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BI##ID,
-#include "clang/Basic/BuiltinsSME.def"
+#define GET_SME_BUILTIN_ENUMERATORS
+#include "clang/Basic/arm_sme_builtins.inc"
+#undef GET_SME_BUILTIN_ENUMERATORS
     FirstTSBuiltin,
   };
   }
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 04697deb50be79..a23cdc5734e49d 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -26,35 +26,80 @@
 using namespace clang;
 using namespace clang::targets;
 
-static constexpr int NumNEONBuiltins =
-    NEON::FirstTSBuiltin - Builtin::FirstTSBuiltin;
+static constexpr int NumNeonBuiltins =
+    NEON::FirstFp16Builtin - Builtin::FirstTSBuiltin;
+static constexpr int NumFp16Builtins =
+    NEON::FirstTSBuiltin - NEON::FirstFp16Builtin;
 static constexpr int NumSVEBuiltins =
-    SVE::FirstTSBuiltin - NEON::FirstTSBuiltin;
+    SVE::FirstNeonBridgeBuiltin - NEON::FirstTSBuiltin;
+static constexpr int NumSVENeonBridgeBuiltins =
+    SVE::FirstTSBuiltin - SVE::FirstNeonBridgeBuiltin;
 static constexpr int NumSMEBuiltins = SME::FirstTSBuiltin - SVE::FirstTSBuiltin;
 static constexpr int NumAArch64Builtins =
     AArch64::LastTSBuiltin - SME::FirstTSBuiltin;
 static constexpr int NumBuiltins =
     AArch64::LastTSBuiltin - Builtin::FirstTSBuiltin;
-static_assert(NumBuiltins == (NumNEONBuiltins + NumSVEBuiltins +
-                              NumSMEBuiltins + NumAArch64Builtins));
+static_assert(NumBuiltins ==
+              (NumNeonBuiltins + NumFp16Builtins + NumSVEBuiltins +
+               NumSVENeonBridgeBuiltins + NumSMEBuiltins + NumAArch64Builtins));
+
+namespace clang {
+namespace NEON {
+#define GET_NEON_BUILTIN_STR_TABLE
+#include "clang/Basic/arm_neon.inc"
+#undef GET_NEON_BUILTIN_STR_TABLE
+
+static constexpr std::array<Builtin::Info, NumNeonBuiltins> BuiltinInfos = {
+#define GET_NEON_BUILTIN_INFOS
+#include "clang/Basic/arm_neon.inc"
+#undef GET_NEON_BUILTIN_INFOS
+};
 
-static constexpr llvm::StringTable BuiltinNEONStrings =
-    CLANG_BUILTIN_STR_TABLE_START
-#define BUILTIN CLANG_BUILTIN_STR_TABLE
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
-#include "clang/Basic/BuiltinsNEON.def"
-    ;
-static constexpr llvm::StringTable BuiltinSVEStrings =
-    CLANG_BUILTIN_STR_TABLE_START
-#define BUILTIN CLANG_BUILTIN_STR_TABLE
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
-#include "clang/Basic/BuiltinsSVE.def"
-    ;
-static constexpr llvm::StringTable BuiltinSMEStrings =
+namespace FP16 {
+#define GET_NEON_BUILTIN_STR_TABLE
+#include "clang/Basic/arm_fp16.inc"
+#undef GET_NEON_BUILTIN_STR_TABLE
+
+static constexpr std::array<Builtin::Info, NumFp16Builtins> BuiltinInfos = {
+#define GET_NEON_BUILTIN_INFOS
+#include "clang/Basic/arm_fp16.inc"
+#undef GET_NEON_BUILTIN_INFOS
+};
+} // namespace FP16
+} // namespace NEON
+
+namespace SVE {
+#define GET_SVE_BUILTIN_STR_TABLE
+#include "clang/Basic/arm_sve_builtins.inc"
+#undef GET_SVE_BUILTIN_STR_TABLE
+
+static constexpr std::array<Builtin::Info, NumSVEBuiltins> BuiltinInfos = {
+#define GET_SVE_BUILTIN_INFOS
+#include "clang/Basic/arm_sve_builtins.inc"
+#undef GET_SVE_BUILTIN_INFOS
+};
+} // namespace SVE
+
+namespace SME {
+#define GET_SME_BUILTIN_STR_TABLE
+#include "clang/Basic/arm_sme_builtins.inc"
+#undef GET_SME_BUILTIN_STR_TABLE
+
+static constexpr std::array<Builtin::Info, NumSMEBuiltins> BuiltinInfos = {
+#define GET_SME_BUILTIN_INFOS
+#include "clang/Basic/arm_sme_builtins.inc"
+#undef GET_SME_BUILTIN_INFOS
+};
+} // namespace SME
+} // namespace clang
+
+static constexpr llvm::StringTable BuiltinSVENeonBridgeStrings =
     CLANG_BUILTIN_STR_TABLE_START
-#define BUILTIN CLANG_BUILTIN_STR_TABLE
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
-#include "clang/Basic/BuiltinsSME.def"
+#define GET_SVE_BUILTINS
+#include "clang/Basic/BuiltinsAArch64NeonSVEBridge.def"
+#undef GET_SVE_BUILTINS
+#undef TARGET_BUILTIN
     ;
 static constexpr llvm::StringTable BuiltinAArch64Strings =
     CLANG_BUILTIN_STR_TABLE_START
@@ -64,21 +109,14 @@ static constexpr llvm::StringTable BuiltinAArch64Strings =
 #include "clang/Basic/BuiltinsAArch64.def"
     ;
 
-static constexpr auto BuiltinNEONInfos = Builtin::MakeInfos<NumNEONBuiltins>({
-#define BUILTIN CLANG_BUILTIN_ENTRY
+static constexpr auto BuiltinSVENeonBridgeInfos =
+    Builtin::MakeInfos<NumSVENeonBridgeBuiltins>({
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
-#include "clang/Basic/BuiltinsNEON.def"
-});
-static constexpr auto BuiltinSVEInfos = Builtin::MakeInfos<NumSVEBuiltins>({
-#define BUILTIN CLANG_BUILTIN_ENTRY
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
-#include "clang/Basic/BuiltinsSVE.def"
-});
-static constexpr auto BuiltinSMEInfos = Builtin::MakeInfos<NumSMEBuiltins>({
-#define BUILTIN CLANG_BUILTIN_ENTRY
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
-#include "clang/Basic/BuiltinsSME.def"
-});
+#define GET_SVE_BUILTINS
+#include "clang/Basic/BuiltinsAArch64NeonSVEBridge.def"
+#undef GET_SVE_BUILTINS
+#undef TARGET_BUILTIN
+    });
 static constexpr auto BuiltinAArch64Infos =
     Builtin::MakeInfos<NumAArch64Builtins>({
 #define BUILTIN CLANG_BUILTIN_ENTRY
@@ -732,9 +770,11 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
 llvm::SmallVector<Builtin::InfosShard>
 AArch64TargetInfo::getTargetBuiltins() const {
   return {
-      {&BuiltinNEONStrings, BuiltinNEONInfos},
-      {&BuiltinSVEStrings, BuiltinSVEInfos},
-      {&BuiltinSMEStrings, BuiltinSMEInfos},
+      {&NEON::BuiltinStrings, NEON::BuiltinInfos},
+      {&NEON::FP16::BuiltinStrings, NEON::FP16::BuiltinInfos},
+      {&SVE::BuiltinStrings, SVE::BuiltinInfos},
+      {&BuiltinSVENeonBridgeStrings, BuiltinSVENeonBridgeInfos},
+      {&SME::BuiltinStrings, SME::BuiltinInfos},
       {&BuiltinAArch64Strings, BuiltinAArch64Infos},
   };
 }
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index d111443d04409a..92428908fe4a61 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -1071,26 +1071,50 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
   }
 }
 
-static constexpr int NumBuiltins =
-    clang::ARM::LastTSBuiltin - Builtin::FirstTSBuiltin;
+static constexpr int NumBuiltins = ARM::LastTSBuiltin - Builtin::FirstTSBuiltin;
+static constexpr int NumNeonBuiltins =
+    NEON::FirstFp16Builtin - Builtin::FirstTSBuiltin;
+static constexpr int NumFp16Builtins =
+    NEON::FirstTSBuiltin - NEON::FirstFp16Builtin;
+static constexpr int NumARMBuiltins = ARM::LastTSBuiltin - NEON::FirstTSBuiltin;
+static_assert(NumBuiltins ==
+              (NumNeonBuiltins + NumFp16Builtins + NumARMBuiltins));
+
+namespace clang {
+namespace NEON {
+#define GET_NEON_BUILTIN_STR_TABLE
+#include "clang/Basic/arm_neon.inc"
+#undef GET_NEON_BUILTIN_STR_TABLE
+
+static constexpr std::array<Builtin::Info, NumNeonBuiltins> BuiltinInfos = {
+#define GET_NEON_BUILTIN_INFOS
+#include "clang/Basic/arm_neon.inc"
+#undef GET_NEON_BUILTIN_INFOS
+};
+
+namespace FP16 {
+#define GET_NEON_BUILTIN_STR_TABLE
+#include "clang/Basic/arm_fp16.inc"
+#undef GET_NEON_BUILTIN_STR_TABLE
+
+static constexpr std::array<Builtin::Info, NumFp16Builtins> BuiltinInfos = {
+#define GET_NEON_BUILTIN_INFOS
+#include "clang/Basic/arm_fp16.inc"
+#undef GET_NEON_BUILTIN_INFOS
+};
+} // namespace FP16
+} // namespace NEON
+} // namespace clang
 
 static constexpr llvm::StringTable BuiltinStrings =
     CLANG_BUILTIN_STR_TABLE_START
-#define BUILTIN CLANG_BUILTIN_STR_TABLE
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
-#include "clang/Basic/BuiltinsNEON.def"
-
 #define BUILTIN CLANG_BUILTIN_STR_TABLE
 #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
 #define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsARM.def"
-    ;
+    ; // namespace clang
 
-static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
-#define BUILTIN CLANG_BUILTIN_ENTRY
-#define LIBBUILTIN CLANG_LIBBUILTIN_ENTRY
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
-#include "clang/Basic/BuiltinsNEON.def"
+static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumARMBuiltins>({
 #define BUILTIN CLANG_BUILTIN_ENTRY
 #define LANGBUILTIN CLANG_LANGBUILTIN_ENTRY
 #define LIBBUILTIN CLANG_LIBBUILTIN_ENTRY
@@ -1101,7 +1125,11 @@ static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
 
 llvm::SmallVector<Builtin::InfosShard>
 ARMTargetInfo::getTargetBuiltins() const {
-  return {{&BuiltinStrings, BuiltinInfos}};
+  return {
+      {&NEON::BuiltinStrings, NEON::BuiltinInfos},
+      {&NEON::FP16::BuiltinStrings, NEON::FP16::BuiltinInfos},
+      {&BuiltinStrings, BuiltinInfos},
+  };
 }
 
 bool ARMTargetInfo::isCLZForZeroUndef() const { return false; }
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 3e93b38143f3b3..16a80be5e15467 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -718,22 +718,18 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
                                            unsigned BuiltinID,
                                            CallExpr *TheCall) {
   if (const FunctionDecl *FD = SemaRef.getCurFunctionDecl()) {
+    std::optional<ArmStreamingType> BuiltinType;
 
     switch (BuiltinID) {
     default:
       break;
-#define GET_NEON_BUILTINS
-#define TARGET_BUILTIN(id, ...) case NEON::BI##id:
-#define BUILTIN(id, ...) case NEON::BI##id:
+#define GET_NEON_STREAMING_COMPAT_FLAG
 #include "clang/Basic/arm_neon.inc"
-      if (checkArmStreamingBuiltin(SemaRef, TheCall, FD, ArmNonStreaming,
-                                   BuiltinID))
-        return true;
-      break;
-#undef TARGET_BUILTIN
-#undef BUILTIN
-#undef GET_NEON_BUILTINS
+#undef GET_NEON_STREAMING_COMPAT_FLAG
     }
+    if (BuiltinType &&
+        checkArmStreamingBuiltin(SemaRef, TheCall, FD, *BuiltinType, BuiltinID))
+      return true;
   }
 
   llvm::APSInt Result;
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index d7d649dd2456d5..e371fa9e8c8a0c 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -37,6 +37,7 @@
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/SetTheory.h"
+#include "llvm/TableGen/StringToOffsetTable.h"
 #include <algorithm>
 #include <cassert>
 #include <cctype>
@@ -2056,40 +2057,56 @@ void NeonEmitter::createIntrinsic(const Record *R,
   CurrentRecord = nullptr;
 }
 
-/// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
-/// declaration of builtins, checking for unique builtin declarations.
+/// genBuiltinsDef: Generate the builtin infos, checking for unique builtin
+/// declarations.
 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
                                  SmallVectorImpl<Intrinsic *> &Defs) {
-  OS << "#ifdef GET_NEON_BUILTINS\n";
+  // We only want to emit a builtin once, and in order of its name.
+  std::map<std::string, Intrinsic *> Builtins;
 
-  // We only want to emit a builtin once, and we want to emit them in
-  // alphabetical order, so use a std::set.
-  std::set<std::pair<std::string, std::string>> Builtins;
+  llvm::StringToOffsetTable Table;
+  Table.GetOrAddStringOffset("");
+  Table.GetOrAddStringOffset("n");
+
+  auto PrefixName = [](Intrinsic *Def) -> std::string {
+    return (llvm::Twine("__builtin_neon_") + Def->getMangledName()).str();
+  };
 
   for (auto *Def : Defs) {
     if (Def->hasBody())
       continue;
 
-    std::string S = "__builtin_neon_" + Def->getMangledName() + ", \"";
-    S += Def->getBuiltinTypeStr();
-    S += "\", \"n\"";
-
-    Builtins.emplace(S, Def->getTargetGuard());
+    if (Builtins.insert({Def->getMangledName(), Def}).second) {
+      Table.GetOrAddStringOffset(PrefixName(Def));
+      Table.GetOrAddStringOffset(Def->getBuiltinTypeStr());
+      Table.GetOrAddStringOffset(Def->getTargetGuard());
+    }
   }
 
-  for (auto &S : Builtins) {
-    if (S.second == "")
-      OS << "BUILTIN(";
-    else
-      OS << "TARGET_BUILTIN(";
-    OS << S.first;
-    if (S.second == "")
-      OS << ")\n";
-    else
-      OS << ", \"" << S.second << "\")\n";
+  OS << "#ifdef GET_NEON_BUILTIN_ENUMERATORS\n";
+  for (const auto &[Name, Def] : Builtins) {
+    OS << "  BI__builtin_neon_" << Name << ",\n";
   }
+  OS << "#endif // GET_NEON_BUILTIN_ENUMERATORS\n\n";
 
-  OS << "#endif\n\n";
+  OS << "#ifdef GET_NEON_BUILTIN_STR_TABLE\n";
+  Table.EmitStringLiteralDef(
+      OS, "static constexpr llvm::StringTable BuiltinStrings", "");
+  OS << "#endif // GET_NEON_BUILTIN_STR_TABLE\n\n";
+
+  OS << "#ifdef GET_NEON_BUILTIN_INFOS\n";
+  for (const auto &[Name, Def] : Builtins) {
+    OS << "    Builtin::Info{Builtin::Info::StrOffsets{"
+       << Table.GetStringOffset(PrefixName(Def)) << " /* " << PrefixName(Def)
+       << " */, ";
+    OS << Table.GetStringOffset(Def->getBuiltinTypeStr()) << " /* "
+       << Def->getBuiltinTypeStr() << " */, ";
+    OS << Table.GetStringOffset("n") << " /* n */, ";
+    OS << Table.GetStringOffset(Def->getTargetGuard()) << " /* "
+       << Def->getTargetGuard() << " */}, ";
+    OS << "HeaderDesc::NO_HEADER, ALL_LANGUAGES},\n";
+  }
+  OS << "#endif // GET_NEON_BUILTIN_INFOS\n\n";
 }
 
 void NeonEmitter::genStreamingSVECompatibleList(
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index cf7e5a1ee3e008..c9cbbb73580c00 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -27,9 +27,11 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/AArch64ImmCheck.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/StringToOffsetTable.h"
 #include <array>
 #include <cctype>
 #include <set>
@@ -198,7 +200,9 @@ class Intrinsic {
 
   StringRef getSVEGuard() const { return SVEGuard; }
   StringRef getSMEGuard() const { return SMEGuard; }
-  void printGuard(raw_ostream &OS) const {
+  std::string getGuard() const {
+    std::string Guard;
+    llvm::raw_string_ostream OS(Guard);
     if (!SVEGuard.empty() && SMEGuard.empty())
       OS << SVEGuard;
     else if (SVEGuard.empty() && !SMEGuard.empty())
@@ -216,6 +220,7 @@ class Intrinsic {
       else
         OS << SMEGuard;
     }
+    return Guard;
   }
   ClassKind getClassKind() const { return Class; }
 
@@ -1471,19 +1476,23 @@ void SVEEmitter::createBuiltins(raw_ostream &OS) {
     return A->getMangledName() < B->getMangledName();
   });
 
-  OS << "#ifdef GET_SVE_BUILTINS\n";
-  for (auto &Def : Defs) {
-    // Only create BUILTINs for non-overloaded intrinsics, as overloaded
-    // declarations only live in the header file.
+  llvm::StringToOffsetTable Table;
+  Table.GetOrAddStringOffset("");
+  Table.GetOrAddStringOffset("n");
+
+  auto PrefixName = [](Intrinsic *Def) -> std::string {
+    return (llvm::Twine("__builtin_sve_") + Def->getMangledName()).str();
+  };
+
+  for (const auto &Def : Defs)
     if (Def->getClassKind() != ClassG) {
-      OS << "TARGET_BUILTIN(__builtin_sve_" << Def->getMangledName() << ", \""
-         << Def->getBuiltinTypeStr() << "\", \"n\", \"";
-      Def->printGuard(OS);
-      OS << "\")\n";
+      Table.GetOrAddStringOffset(PrefixName(Def.get()));
+      Table.GetOrAddStringOffset(Def->getBuiltinTypeStr());
+      Table.GetOrAddStringOffset(Def->getGuard());
     }
-  }
 
-  // Add reinterpret functions.
+  Table.GetOrAddStringOffset("sme|sve");
+  SmallVector<std::pair<std::string, std::string>> ReinterpretBuiltins;
   for (auto [N, Suffix] :
        std::initializer_list<std::pair<unsigned, const char *>>{
            {1, ""}, {2, "_x2"}, {3, "_x3"}, {4, "_x4"}}) {
@@ -1491,14 +1500,55 @@ void SVEEmitter::createBuiltins(raw_ostream &OS) {
       SVEType ToV(To.BaseType, N);
       for (const ReinterpretTypeInfo &From : Reinterprets) {
         SVEType FromV(From.BaseType, N);
-        OS << "TARGET_BUILTIN(__builtin_sve_reinterpret_" << To.Suffix << "_"
-           << From.Suffix << Suffix << +", \"" << ToV.builtin_str()
-           << FromV.builtin_str() << "\", \"n\", \"sme|sve\")\n";
+        std::string Name = (Twine("__builtin_sve_reinterpret_") + To.Suffix +
+                            "_" + From.Suffix + Suffix)
+                               .str();
+        std::string Type = ToV.builtin_str() + FromV.builtin_str();
+        Table.GetOrAddStringOffset(Name);
+        Table.GetOrAddStringOffset(Type);
+        ReinterpretBuiltins.push_back({Name, Type});
       }
     }
   }
 
-  OS << "#endif\n\n";
+  OS << "#ifdef GET_SVE_BUILTIN_ENUMERATORS\n";
+  for (const auto &Def : Defs)
+    if (Def->getClassKind() != ClassG)
+      OS << "  BI" << PrefixName(Def.get()) << ",\n";
+  for (const auto &[Name, _] : ReinterpretBuiltins)
+    OS << "  BI" << Name << ",\n";
+  OS << "#endif // GET_SVE_BUILTIN_ENUMERATORS\n\n";
+
+  OS << "#ifdef GET_SVE_BUILTIN_STR_TABLE\n";
+  Table.EmitStringLiteralDef(
+      OS, "static constexpr llvm::StringTable BuiltinStrings", "");
+  OS << "#endif // GET_SVE_BUILTIN_STR_TABLE\n\n";
+
+  OS << "#ifdef GET_SVE_BUILTIN_INFOS\n";
+  for (const auto &Def : Defs) {
+    // Only create BUILTINs for non-overloaded intrinsics, as overloaded
+    // declarations only live in the header file.
+    if (Def->getClassKind() != ClassG) {
+      OS << "    Builtin::Info{Builtin::Info::StrOffsets{"
+         << Table.GetStringOffset(PrefixName(Def.get())) << " /* "
+         << PrefixName(Def.get()) << " */, ";
+      OS << Table.GetStringOffset(Def->getBuiltinTypeStr()) << " /* "
+         << Def->getBuiltinTypeStr() << " */, ";
+      OS << Table.GetStringOffset("n") << " /* n */, ";
+      OS << Table.GetStringOffset(Def->getGuard()) << " /* " << Def->getGuard()
+         << " */}, ";
+      OS << "HeaderDesc::NO_HEADER, ALL_LANGUAGES},\n";
+    }
+  }
+  for (const auto &[Name, Type] : ReinterpretBuiltins) {
+    OS << "    Builtin::Info{Builtin::Info::StrOffsets{"
+       << Table.GetStringOffset(Name) << " /* " << Name << " */, ";
+    OS << Table.GetStringOffset(Type) << " /* " << Type << " */, ";
+    OS << Table.GetStringOffset("n") << " /* n */, ";
+    OS << Table.GetStringOffset("sme|sve") << " /* sme|sve */}, ";
+    OS << "HeaderDesc::NO_HEADER, ALL_LANGUAGES},\n";
+  }
+  OS << "#endif // GET_SVE_BUILTIN_INFOS\n\n";
 }
 
 void SVEEmitter::createCodeGenMap(raw_ostream &OS) {
@@ -1678,19 +1728,49 @@ void SVEEmitter::createSMEBuiltins(raw_ostream &OS) {
     return A->getMangledName() < B->getMangledName();
   });
 
-  OS << "#ifdef GET_SME_BUILTINS\n";
-  for (auto &Def : Defs) {
+  llvm::StringToOffsetTable Table;
+  Table.GetOrAddStringOffset("");
+  Table.GetOrAddStringOffset("n");
+
+  auto PrefixName = [](Intrinsic *Def) -> std::string {
+    return (llvm::Twine("__builtin_sme_") + Def->getMangledName()).str();
+  };
+
+  for (const auto &Def : Defs)
+    if (Def->getClassKind() != ClassG) {
+      Table.GetOrAddStringOffset(PrefixName(Def.get()));
+      Table.GetOrAddStringOffset(Def->getBuiltinTypeStr());
+      Table.GetOrAddStringOffset(Def->getGuard());
+    }
+
+  OS << "#ifdef GET_SME_BUILTIN_ENUMERATORS\n";
+  for (const auto &Def : Defs)
+    if (Def->getClassKind() != ClassG)
+      OS << "  BI" << PrefixName(Def.get()) << ",\n";
+  OS << "#endif // GET_SME_BUILTIN_ENUMERATORS\n\n";
+
+  OS << "#ifdef GET_SME_BUILTIN_STR_TABLE\n";
+  Table.EmitStringLiteralDef(
+      OS, "static constexpr llvm::StringTable BuiltinStrings", "");
+  OS << "#endif // GET_SME_BUILTIN_STR_TABLE\n\n";
+
+  OS << "#ifdef GET_SME_BUILTIN_INFOS\n";
+  for (const auto &Def : Defs) {
     // Only create BUILTINs for non-overloaded intrinsics, as overloaded
     // declarations only live in the header file.
     if (Def->getClassKind() != ClassG) {
-      OS << "TARGET_BUILTIN(__builtin_sme_" << Def->getMangledName() << ", \""
-         << Def->getBuiltinTypeStr() << "\", \"n\", \"";
-      Def->printGuard(OS);
-      OS << "\")\n";
+      OS << "    Builtin::Info{Builtin::Info::StrOffsets{"
+         << Table.GetStringOffset(PrefixName(Def.get())) << " /* "
+         << PrefixName(Def.get()) << " */, ";
+      OS << Table.GetStringOffset(Def->getBuiltinTypeStr()) << " /* "
+         << Def->getBuiltinTypeStr() << " */, ";
+      OS << Table.GetStringOffset("n") << " /* n */, ";
+      OS << Table.GetStringOffset(Def->getGuard()) << " /* " << Def->getGuard()
+         << " */}, ";
+      OS << "HeaderDesc::NO_HEADER, ALL_LANGUAGES},\n";
     }
   }
-
-  OS << "#endif\n\n";
+  OS << "#endif // GET_SME_BUILTIN_INFOS\n\n";
 }
 
 void SVEEmitter::createSMECodeGenMap(raw_ostream &OS) {

>From d965d9b44872bcad3b9471ad6a6cc0a1a54fff2a Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc at gmail.com>
Date: Sat, 21 Dec 2024 23:42:57 +0000
Subject: [PATCH 08/10] Remove the `CustomEntry` escape hatch from builtin
 TableGen

This was an especially challenging escape hatch because it directly
forced the use of a specific X-macro structure and prevented any other
form of TableGen emission.

The problematic feature that motivated this is a case where a builtin's
prototype can't be represented in the mini-language used by TableGen.
Instead of adding a complete custom entry for this, this PR just teaches
the prototype handling to do the same thing the X-macros did in this
case: emit an empty string and let the Clang builtin handling respond
appropriately.

This should produce identical results while preserving all the rest of
the structured representation in the builtin TableGen code.
---
 clang/include/clang/Basic/Builtins.td         |  8 ++++---
 clang/include/clang/Basic/BuiltinsBase.td     | 13 +++++++----
 clang/utils/TableGen/ClangBuiltinsEmitter.cpp | 22 ++++++++++++++-----
 3 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index d64a66fc9d9cf7..9f5647dc719f33 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -3347,10 +3347,12 @@ def VFork : LibBuiltin<"unistd.h"> {
 }
 
 // POSIX pthread.h
-// FIXME: This should be a GNULibBuiltin, but it's currently missing the prototype.
 
-def PthreadCreate : CustomEntry {
-  let Entry = "LIBBUILTIN(pthread_create, \"\",  \"fC<2,3>\", PTHREAD_H, ALL_GNU_LANGUAGES)";
+def PthreadCreate : GNULibBuiltin<"pthread.h"> {
+  let Spellings = ["pthread_create"];
+  let Attributes = [FunctionWithoutBuiltinPrefix, Callback<[2, 3]>];
+  // Note that we don't have an expressable prototype so we leave it empty.
+  let Prototype = "";
 }
 
 def SigSetJmp : LibBuiltin<"setjmp.h"> {
diff --git a/clang/include/clang/Basic/BuiltinsBase.td b/clang/include/clang/Basic/BuiltinsBase.td
index afed3c815d3290..1cffd4aaa6f646 100644
--- a/clang/include/clang/Basic/BuiltinsBase.td
+++ b/clang/include/clang/Basic/BuiltinsBase.td
@@ -17,6 +17,11 @@ class IndexedAttribute<string baseMangling, int I> : Attribute<baseMangling> {
   int Index = I;
 }
 
+class MultiIndexAttribute<string baseMangling, list<int> Is>
+    : Attribute<baseMangling> {
+  list<int> Indices = Is;
+}
+
 // Standard Attributes
 // -------------------
 def NoReturn : Attribute<"r">;
@@ -77,6 +82,10 @@ def Constexpr : Attribute<"E">;
 // Builtin is immediate and must be constant evaluated. Implies Constexpr, and will only be supported in C++20 mode.
 def Consteval : Attribute<"EG">;
 
+// Callback behavior: the first index argument is called with the arguments
+// indicated by the remaining indices.
+class Callback<list<int> ArgIndices> : MultiIndexAttribute<"C", ArgIndices>;
+
 // Builtin kinds
 // =============
 
@@ -90,10 +99,6 @@ class Builtin {
   bit RequiresUndef = 0;
 }
 
-class CustomEntry {
-  string Entry;
-}
-
 class AtomicBuiltin : Builtin;
 
 class LibBuiltin<string header, string languages = "ALL_LANGUAGES"> : Builtin {
diff --git a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
index b08b02d9dcecaa..3de1576139c7a7 100644
--- a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
+++ b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "TableGenBackends.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
@@ -38,6 +39,14 @@ class PrototypeParser {
 private:
   void ParsePrototype(StringRef Prototype) {
     Prototype = Prototype.trim();
+
+    // Some builtins don't have an expressible prototype, simply emit an empty
+    // string for them.
+    if (Prototype.empty()) {
+      Type = "";
+      return;
+    }
+
     ParseTypes(Prototype);
   }
 
@@ -244,8 +253,15 @@ void PrintAttributes(const Record *Builtin, BuiltinType BT, raw_ostream &OS) {
 
   for (const auto *Attr : Builtin->getValueAsListOfDefs("Attributes")) {
     OS << Attr->getValueAsString("Mangling");
-    if (Attr->isSubClassOf("IndexedAttribute"))
+    if (Attr->isSubClassOf("IndexedAttribute")) {
       OS << ':' << Attr->getValueAsInt("Index") << ':';
+    } else if (Attr->isSubClassOf("MultiIndexAttribute")) {
+      OS << '<';
+      llvm::ListSeparator Sep(",");
+      for (int64_t Index : Attr->getValueAsListOfInts("Indices"))
+        OS << Sep << Index;
+      OS << '>';
+    }
   }
   OS << '\"';
 }
@@ -403,10 +419,6 @@ void clang::EmitClangBuiltins(const RecordKeeper &Records, raw_ostream &OS) {
     EmitBuiltin(OS, Builtin);
   }
 
-  for (const auto *Entry : Records.getAllDerivedDefinitions("CustomEntry")) {
-    OS << Entry->getValueAsString("Entry") << '\n';
-  }
-
   OS << R"c++(
 #undef ATOMIC_BUILTIN
 #undef BUILTIN

>From 506db4263e20dcee7bad3fff1e696fcb6212e21e Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc at gmail.com>
Date: Tue, 17 Dec 2024 19:21:27 +0000
Subject: [PATCH 09/10] Teach main builtin TableGen to use direct enums,
 strings, and info

This moves the main builtins and several targets to use nice generated
string tables and info structures rather than X-macros. Even without
obvious prefixes to factor out, the resulting tables are significantly
smaller and much cheaper to compile with out all the X-macro overhead.

This leaves the X-macros in place for atomic builtins which have a wide
range of uses that don't seem reasonable to fold into TableGen.

As future work, these should move to their own file (whether as X-macros
or just generated patterns) so the AST headers don't have to include all
the data for other builtins.
---
 clang/include/clang/AST/Expr.h                |   2 -
 clang/include/clang/Basic/Builtins.h          |  17 +-
 clang/include/clang/Basic/IdentifierTable.h   |   3 +-
 clang/include/clang/Basic/TargetBuiltins.h    |  14 +-
 clang/lib/AST/StmtPrinter.cpp                 |   1 -
 clang/lib/Basic/Builtins.cpp                  |  26 +-
 clang/lib/Basic/Targets/BPF.cpp               |  14 +-
 clang/lib/Basic/Targets/RISCV.cpp             |  16 +-
 clang/lib/Basic/Targets/X86.cpp               |  51 ++-
 clang/lib/Sema/SemaChecking.cpp               |   1 -
 .../target-builtins-prototype-parser.td       |  16 +-
 clang/utils/TableGen/ClangBuiltinsEmitter.cpp | 342 +++++++++++-------
 12 files changed, 289 insertions(+), 214 deletions(-)

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 708c8656decbe0..b1e969106f8c0a 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -6678,7 +6678,6 @@ class PseudoObjectExpr final
 class AtomicExpr : public Expr {
 public:
   enum AtomicOp {
-#define BUILTIN(ID, TYPE, ATTRS)
 #define ATOMIC_BUILTIN(ID, TYPE, ATTRS) AO ## ID,
 #include "clang/Basic/Builtins.inc"
     // Avoid trailing comma
@@ -6742,7 +6741,6 @@ class AtomicExpr : public Expr {
   AtomicOp getOp() const { return Op; }
   StringRef getOpAsString() const {
     switch (Op) {
-#define BUILTIN(ID, TYPE, ATTRS)
 #define ATOMIC_BUILTIN(ID, TYPE, ATTRS)                                        \
   case AO##ID:                                                                 \
     return #ID;
diff --git a/clang/include/clang/Basic/Builtins.h b/clang/include/clang/Basic/Builtins.h
index a03442e8e29194..4bbb210d7b5246 100644
--- a/clang/include/clang/Basic/Builtins.h
+++ b/clang/include/clang/Basic/Builtins.h
@@ -64,9 +64,10 @@ struct HeaderDesc {
 
 namespace Builtin {
 enum ID {
-  NotBuiltin  = 0,      // This is not a builtin function.
-#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+  NotBuiltin = 0, // This is not a builtin function.
+#define GET_BUILTIN_ENUMERATORS
 #include "clang/Basic/Builtins.inc"
+#undef GET_BUILTIN_ENUMERATORS
   FirstTSBuiltin
 };
 
@@ -75,14 +76,14 @@ struct Info {
   // Rather than store pointers to the string literals describing these four
   // aspects of builtins, we store offsets into a common string table.
   struct StrOffsets {
-    llvm::StringTable::Offset Name;
-    llvm::StringTable::Offset Type;
-    llvm::StringTable::Offset Attributes;
-    llvm::StringTable::Offset Features;
+    llvm::StringTable::Offset Name = 0;
+    llvm::StringTable::Offset Type = 0;
+    llvm::StringTable::Offset Attributes = 0;
+    llvm::StringTable::Offset Features = 0;
   } Offsets;
 
-  HeaderDesc Header;
-  LanguageID Langs;
+  HeaderDesc Header = HeaderDesc::NO_HEADER;
+  LanguageID Langs = ALL_LANGUAGES;
 };
 
 /// A constexpr function to construct an infos array from X-macros.
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index 33d1cdb46f108b..512a2f0b842d78 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -101,8 +101,9 @@ enum class InterestingIdentifier {
   NUM_OBJC_KEYWORDS_AND_NOTABLE_IDENTIFIERS,
 
   NotBuiltin,
-#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+#define GET_BUILTIN_ENUMERATORS
 #include "clang/Basic/Builtins.inc"
+#undef GET_BUILTIN_ENUMERATORS
   FirstTSBuiltin,
 
   NotInterestingIdentifier = 65534
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index f066f902a18c02..b8b1338df690a1 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -93,8 +93,9 @@ namespace clang {
   namespace BPF {
   enum {
     LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
-  #define BUILTIN(ID, TYPE, ATTRS) BI##ID,
-  #include "clang/Basic/BuiltinsBPF.inc"
+#define GET_BUILTIN_ENUMERATORS
+#include "clang/Basic/BuiltinsBPF.inc"
+#undef GET_BUILTIN_ENUMERATORS
     LastTSBuiltin
   };
   }
@@ -133,12 +134,14 @@ namespace clang {
   namespace X86 {
   enum {
     LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
-#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+#define GET_BUILTIN_ENUMERATORS
 #include "clang/Basic/BuiltinsX86.inc"
+#undef GET_BUILTIN_ENUMERATORS
     FirstX86_64Builtin,
     LastX86CommonBuiltin = FirstX86_64Builtin - 1,
-#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+#define GET_BUILTIN_ENUMERATORS
 #include "clang/Basic/BuiltinsX86_64.inc"
+#undef GET_BUILTIN_ENUMERATORS
     LastTSBuiltin
   };
   }
@@ -172,8 +175,9 @@ namespace clang {
     LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
     FirstRVVBuiltin = clang::Builtin::FirstTSBuiltin,
     LastRVVBuiltin = RISCVVector::FirstTSBuiltin - 1,
-#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+#define GET_BUILTIN_ENUMERATORS
 #include "clang/Basic/BuiltinsRISCV.inc"
+#undef GET_BUILTIN_ENUMERATORS
     LastTSBuiltin
   };
   } // namespace RISCV
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index c5d19f70fc6ea0..9a89a4dd0b0c84 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1928,7 +1928,6 @@ void StmtPrinter::VisitPseudoObjectExpr(PseudoObjectExpr *Node) {
 void StmtPrinter::VisitAtomicExpr(AtomicExpr *Node) {
   const char *Name = nullptr;
   switch (Node->getOp()) {
-#define BUILTIN(ID, TYPE, ATTRS)
 #define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \
   case AtomicExpr::AO ## ID: \
     Name = #ID "("; \
diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp
index 52375a3663f39e..16a83cd689db41 100644
--- a/clang/lib/Basic/Builtins.cpp
+++ b/clang/lib/Basic/Builtins.cpp
@@ -29,23 +29,19 @@ const char *HeaderDesc::getName() const {
   llvm_unreachable("Unknown HeaderDesc::HeaderID enum");
 }
 
-static constexpr llvm::StringTable BuiltinStrings =
-    CLANG_BUILTIN_STR_TABLE_START
-        // We inject a non-builtin string into the table.
-        CLANG_BUILTIN_STR_TABLE("not a builtin function", "", "")
-#define BUILTIN CLANG_BUILTIN_STR_TABLE
+static constexpr unsigned NumBuiltins = Builtin::FirstTSBuiltin;
+
+#define GET_BUILTIN_STR_TABLE
 #include "clang/Basic/Builtins.inc"
-    ;
-static_assert(BuiltinStrings.size() < 100'000);
-
-static constexpr auto BuiltinInfos =
-    Builtin::MakeInfos<Builtin::FirstTSBuiltin>(
-        {CLANG_BUILTIN_ENTRY("not a builtin function", "", "")
-#define BUILTIN CLANG_BUILTIN_ENTRY
-#define LANGBUILTIN CLANG_LANGBUILTIN_ENTRY
-#define LIBBUILTIN CLANG_LIBBUILTIN_ENTRY
+#undef GET_BUILTIN_STR_TABLE
+
+static constexpr Builtin::Info BuiltinInfos[] = {
+    Builtin::Info{}, // No-builtin info entry.
+#define GET_BUILTIN_INFOS
 #include "clang/Basic/Builtins.inc"
-        });
+#undef GET_BUILTIN_INFOS
+};
+static_assert(std::size(BuiltinInfos) == NumBuiltins);
 
 std::pair<const Builtin::InfosShard &, const Builtin::Info &>
 Builtin::Context::getShardAndInfo(unsigned ID) const {
diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp
index b4504faa4d5eeb..a463de08840201 100644
--- a/clang/lib/Basic/Targets/BPF.cpp
+++ b/clang/lib/Basic/Targets/BPF.cpp
@@ -22,16 +22,16 @@ using namespace clang::targets;
 static constexpr int NumBuiltins =
     clang::BPF::LastTSBuiltin - Builtin::FirstTSBuiltin;
 
-static constexpr llvm::StringTable BuiltinStrings =
-    CLANG_BUILTIN_STR_TABLE_START
-#define BUILTIN CLANG_BUILTIN_STR_TABLE
+#define GET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsBPF.inc"
-    ;
+#undef GET_BUILTIN_STR_TABLE
 
-static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
-#define BUILTIN CLANG_BUILTIN_ENTRY
+static constexpr Builtin::Info BuiltinInfos[] = {
+#define GET_BUILTIN_INFOS
 #include "clang/Basic/BuiltinsBPF.inc"
-});
+#undef GET_BUILTIN_INFOS
+};
+static_assert(std::size(BuiltinInfos) == NumBuiltins);
 
 void BPFTargetInfo::getTargetDefines(const LangOptions &Opts,
                                      MacroBuilder &Builder) const {
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index 0e78471d6f4d7b..079ddc3c5d6e13 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -277,18 +277,16 @@ static constexpr std::array<Builtin::Info, NumRVVSiFiveBuiltins> BuiltinInfos =
 };
 } // namespace RVVSiFive
 
-static constexpr llvm::StringTable BuiltinStrings =
-    CLANG_BUILTIN_STR_TABLE_START
-#define BUILTIN CLANG_BUILTIN_STR_TABLE
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
+#define GET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsRISCV.inc"
-    ;
+#undef GET_BUILTIN_STR_TABLE
 
-static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumRISCVBuiltins>({
-#define BUILTIN CLANG_BUILTIN_ENTRY
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
+static constexpr Builtin::Info BuiltinInfos[] = {
+#define GET_BUILTIN_INFOS
 #include "clang/Basic/BuiltinsRISCV.inc"
-});
+#undef GET_BUILTIN_INFOS
+};
+static_assert(std::size(BuiltinInfos) == NumRISCVBuiltins);
 
 llvm::SmallVector<Builtin::InfosShard>
 RISCVTargetInfo::getTargetBuiltins() const {
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 9999487c385c50..dacadec27e1ea0 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -31,36 +31,31 @@ static constexpr int NumX86_64Builtins =
 static constexpr int NumBuiltins = X86::LastTSBuiltin - Builtin::FirstTSBuiltin;
 static_assert(NumBuiltins == (NumX86Builtins + NumX86_64Builtins));
 
-static constexpr llvm::StringTable BuiltinX86Strings =
-    CLANG_BUILTIN_STR_TABLE_START
-#define BUILTIN CLANG_BUILTIN_STR_TABLE
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
-#define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_STR_TABLE
+namespace X86 {
+#define GET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsX86.inc"
-    ;
+#undef GET_BUILTIN_STR_TABLE
 
-static constexpr llvm::StringTable BuiltinX86_64Strings =
-    CLANG_BUILTIN_STR_TABLE_START
-#define BUILTIN CLANG_BUILTIN_STR_TABLE
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
-#define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_STR_TABLE
-#include "clang/Basic/BuiltinsX86_64.inc"
-    ;
-
-static constexpr auto BuiltinX86Infos = Builtin::MakeInfos<NumX86Builtins>({
-#define BUILTIN CLANG_BUILTIN_ENTRY
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
-#define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_ENTRY
+static constexpr Builtin::Info BuiltinInfos[] = {
+#define GET_BUILTIN_INFOS
 #include "clang/Basic/BuiltinsX86.inc"
-});
+#undef GET_BUILTIN_INFOS
+};
+static_assert(std::size(BuiltinInfos) == NumX86Builtins);
+} // namespace X86
 
-static constexpr auto BuiltinX86_64Infos =
-    Builtin::MakeInfos<NumX86_64Builtins>({
-#define BUILTIN CLANG_BUILTIN_ENTRY
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
-#define TARGET_HEADER_BUILTIN CLANG_TARGET_HEADER_BUILTIN_ENTRY
+namespace X86_64 {
+#define GET_BUILTIN_STR_TABLE
 #include "clang/Basic/BuiltinsX86_64.inc"
-    });
+#undef GET_BUILTIN_STR_TABLE
+
+static constexpr Builtin::Info BuiltinInfos[] = {
+#define GET_BUILTIN_INFOS
+#include "clang/Basic/BuiltinsX86_64.inc"
+#undef GET_BUILTIN_INFOS
+};
+static_assert(std::size(BuiltinInfos) == NumX86_64Builtins);
+} // namespace X86_64
 
 static const char *const GCCRegNames[] = {
     "ax",    "dx",    "cx",    "bx",    "si",      "di",    "bp",    "sp",
@@ -1879,13 +1874,13 @@ ArrayRef<TargetInfo::AddlRegName> X86TargetInfo::getGCCAddlRegNames() const {
 
 llvm::SmallVector<Builtin::InfosShard>
 X86_32TargetInfo::getTargetBuiltins() const {
-  return {{&BuiltinX86Strings, BuiltinX86Infos}};
+  return {{&X86::BuiltinStrings, X86::BuiltinInfos}};
 }
 
 llvm::SmallVector<Builtin::InfosShard>
 X86_64TargetInfo::getTargetBuiltins() const {
   return {
-      {&BuiltinX86Strings, BuiltinX86Infos},
-      {&BuiltinX86_64Strings, BuiltinX86_64Infos},
+      {&X86::BuiltinStrings, X86::BuiltinInfos},
+      {&X86_64::BuiltinStrings, X86_64::BuiltinInfos},
   };
 }
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index e703a62ff9cf18..203ba418fea60a 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2446,7 +2446,6 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
       CheckNonNullArgument(*this, TheCall->getArg(0), TheCall->getExprLoc());
     break;
   }
-#define BUILTIN(ID, TYPE, ATTRS)
 #define ATOMIC_BUILTIN(ID, TYPE, ATTRS)                                        \
   case Builtin::BI##ID:                                                        \
     return AtomicOpsOverloaded(TheCallResult, AtomicExpr::AO##ID);
diff --git a/clang/test/TableGen/target-builtins-prototype-parser.td b/clang/test/TableGen/target-builtins-prototype-parser.td
index 555aebb3ccfb1f..cbc15c3dba6c7c 100644
--- a/clang/test/TableGen/target-builtins-prototype-parser.td
+++ b/clang/test/TableGen/target-builtins-prototype-parser.td
@@ -10,49 +10,49 @@
 include "clang/Basic/BuiltinsBase.td"
 
 def : Builtin {
-// CHECK: BUILTIN(__builtin_01, "E8idE4b", "")
+// CHECK: Builtin::Info{{.*}} __builtin_01 {{.*}} /* E8idE4b */
   let Prototype = "_ExtVector<8,int>(double, _ExtVector<4,        bool>)";
   let Spellings = ["__builtin_01"];
 }
 
 def : Builtin {
-// CHECK: BUILTIN(__builtin_02, "E8UiE4s", "")
+// CHECK: Builtin::Info{{.*}} __builtin_02 {{.*}} /* E8UiE4s */
   let Prototype = "_ExtVector<8,unsigned int>(_ExtVector<4, short>)";
   let Spellings = ["__builtin_02"];
 }
 
 def : Builtin {
-// CHECK: BUILTIN(__builtin_03, "di", "")
+// CHECK: Builtin::Info{{.*}} __builtin_03 {{.*}} /* di */
   let Prototype = "double(int)";
   let Spellings = ["__builtin_03"];
 }
 
 def : Builtin {
-// CHECK: BUILTIN(__builtin_04, "diIUi", "")
+// CHECK: Builtin::Info{{.*}} __builtin_04 {{.*}} /* diIUi */
  let Prototype = "double(int, _Constant unsigned int)";
   let Spellings = ["__builtin_04"];
 }
 
 def : Builtin {
-// CHECK: BUILTIN(__builtin_05, "v&v&", "")
+// CHECK: Builtin::Info{{.*}} __builtin_05 {{.*}} /* v&v& */
  let Prototype = "void&(void&)";
   let Spellings = ["__builtin_05"];
 }
 
 def : Builtin {
-// CHECK: BUILTIN(__builtin_06, "v*v*cC*.", "")
+// CHECK: Builtin::Info{{.*}} __builtin_06 {{.*}} /* v*v*cC*. */
  let Prototype = "void*(void*, char const*, ...)";
   let Spellings = ["__builtin_06"];
 }
 
 def : Builtin {
-// CHECK: BUILTIN(__builtin_07, "E8iE4dE4b.", "")
+// CHECK: Builtin::Info{{.*}} __builtin_07 {{.*}} /* E8iE4dE4b. */
   let Prototype = "_ExtVector<8, int>(_ExtVector<4,double>, _ExtVector<4, bool>, ...)";
   let Spellings = ["__builtin_07"];
 }
 
 def : Builtin {
-// CHECK: BUILTIN(__builtin_08, "di*R", "")
+// CHECK: Builtin::Info{{.*}} __builtin_08 {{.*}} /* di*R */
   let Prototype = "double(int * restrict)";
   let Spellings = ["__builtin_08"];
 }
diff --git a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
index 3de1576139c7a7..c49836e913deab 100644
--- a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
+++ b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
@@ -15,7 +15,9 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/StringToOffsetTable.h"
 #include "llvm/TableGen/TableGenBackend.h"
+#include <sstream>
 
 using namespace llvm;
 
@@ -29,6 +31,119 @@ enum class BuiltinType {
   TargetLibBuiltin,
 };
 
+class HeaderNameParser {
+public:
+  HeaderNameParser(const Record *Builtin) {
+    for (char c : Builtin->getValueAsString("Header")) {
+      if (std::islower(c))
+        HeaderName += static_cast<char>(std::toupper(c));
+      else if (c == '.' || c == '_' || c == '/' || c == '-')
+        HeaderName += '_';
+      else
+        PrintFatalError(Builtin->getLoc(), "Unexpected header name");
+    }
+  }
+
+  void Print(raw_ostream &OS) const { OS << HeaderName; }
+
+private:
+  std::string HeaderName;
+};
+
+struct Builtin {
+  BuiltinType BT;
+  std::string Name;
+  std::string Type;
+  std::string Attributes;
+
+  const Record *BuiltinRecord;
+
+  void EmitEnumerator(llvm::raw_ostream &OS) const {
+    OS << "    BI" << Name << ",\n";
+  }
+
+  void EmitInfo(llvm::raw_ostream &OS, const StringToOffsetTable &Table) const {
+    OS << "    Builtin::Info{Builtin::Info::StrOffsets{"
+       << Table.GetStringOffset(Name) << " /* " << Name << " */, "
+       << Table.GetStringOffset(Type) << " /* " << Type << " */, "
+       << Table.GetStringOffset(Attributes) << " /* " << Attributes << " */, ";
+    if (BT == BuiltinType::TargetBuiltin) {
+      const auto &Features = BuiltinRecord->getValueAsString("Features");
+      OS << Table.GetStringOffset(Features) << " /* " << Features << " */";
+    } else {
+      OS << "0";
+    }
+    OS << "}, ";
+    if (BT == BuiltinType::LibBuiltin || BT == BuiltinType::TargetLibBuiltin) {
+      OS << "HeaderDesc::";
+      HeaderNameParser{BuiltinRecord}.Print(OS);
+    } else {
+      OS << "HeaderDesc::NO_HEADER";
+    }
+    OS << ", ";
+    if (BT == BuiltinType::LibBuiltin || BT == BuiltinType::LangBuiltin ||
+        BT == BuiltinType::TargetLibBuiltin) {
+      OS << BuiltinRecord->getValueAsString("Languages");
+    } else {
+      OS << "ALL_LANGUAGES";
+    }
+    OS << "},\n";
+  }
+
+  void EmitXMacro(llvm::raw_ostream &OS) const {
+    if (BuiltinRecord->getValueAsBit("RequiresUndef"))
+      OS << "#undef " << Name << '\n';
+    switch (BT) {
+    case BuiltinType::LibBuiltin:
+      OS << "LIBBUILTIN";
+      break;
+    case BuiltinType::LangBuiltin:
+      OS << "LANGBUILTIN";
+      break;
+    case BuiltinType::Builtin:
+      OS << "BUILTIN";
+      break;
+    case BuiltinType::AtomicBuiltin:
+      OS << "ATOMIC_BUILTIN";
+      break;
+    case BuiltinType::TargetBuiltin:
+      OS << "TARGET_BUILTIN";
+      break;
+    case BuiltinType::TargetLibBuiltin:
+      OS << "TARGET_HEADER_BUILTIN";
+      break;
+    }
+
+    OS << "(" << Name << ", \"" << Type << "\", \"" << Attributes << "\"";
+
+    switch (BT) {
+    case BuiltinType::LibBuiltin: {
+      OS << ", ";
+      HeaderNameParser{BuiltinRecord}.Print(OS);
+      [[fallthrough]];
+    }
+    case BuiltinType::LangBuiltin: {
+      OS << ", " << BuiltinRecord->getValueAsString("Languages");
+      break;
+    }
+    case BuiltinType::TargetLibBuiltin: {
+      OS << ", ";
+      HeaderNameParser{BuiltinRecord}.Print(OS);
+      OS << ", " << BuiltinRecord->getValueAsString("Languages");
+      [[fallthrough]];
+    }
+    case BuiltinType::TargetBuiltin: {
+      OS << ", \"" << BuiltinRecord->getValueAsString("Features") << "\"";
+      break;
+    }
+    case BuiltinType::AtomicBuiltin:
+    case BuiltinType::Builtin:
+      break;
+    }
+    OS << ")\n";
+  }
+};
+
 class PrototypeParser {
 public:
   PrototypeParser(StringRef Substitution, const Record *Builtin)
@@ -36,6 +151,8 @@ class PrototypeParser {
     ParsePrototype(Builtin->getValueAsString("Prototype"));
   }
 
+  std::string takeTypeString() && { return std::move(Type); }
+
 private:
   void ParsePrototype(StringRef Prototype) {
     Prototype = Prototype.trim();
@@ -205,36 +322,14 @@ class PrototypeParser {
     }
   }
 
-public:
-  void Print(raw_ostream &OS) const { OS << ", \"" << Type << '\"'; }
-
-private:
   SMLoc Loc;
   StringRef Substitution;
   std::string Type;
 };
 
-class HeaderNameParser {
-public:
-  HeaderNameParser(const Record *Builtin) {
-    for (char c : Builtin->getValueAsString("Header")) {
-      if (std::islower(c))
-        HeaderName += static_cast<char>(std::toupper(c));
-      else if (c == '.' || c == '_' || c == '/' || c == '-')
-        HeaderName += '_';
-      else
-        PrintFatalError(Builtin->getLoc(), "Unexpected header name");
-    }
-  }
-
-  void Print(raw_ostream &OS) const { OS << HeaderName; }
-
-private:
-  std::string HeaderName;
-};
-
-void PrintAttributes(const Record *Builtin, BuiltinType BT, raw_ostream &OS) {
-  OS << '\"';
+std::string renderAttributes(const Record *Builtin, BuiltinType BT) {
+  std::string Attributes;
+  raw_string_ostream OS(Attributes);
   if (Builtin->isSubClassOf("LibBuiltin")) {
     if (BT == BuiltinType::LibBuiltin) {
       OS << 'f';
@@ -263,63 +358,18 @@ void PrintAttributes(const Record *Builtin, BuiltinType BT, raw_ostream &OS) {
       OS << '>';
     }
   }
-  OS << '\"';
+  return Attributes;
 }
 
-void EmitBuiltinDef(raw_ostream &OS, StringRef Substitution,
-                    const Record *Builtin, Twine Spelling, BuiltinType BT) {
-  if (Builtin->getValueAsBit("RequiresUndef"))
-    OS << "#undef " << Spelling << '\n';
-  switch (BT) {
-  case BuiltinType::LibBuiltin:
-    OS << "LIBBUILTIN";
-    break;
-  case BuiltinType::LangBuiltin:
-    OS << "LANGBUILTIN";
-    break;
-  case BuiltinType::Builtin:
-    OS << "BUILTIN";
-    break;
-  case BuiltinType::AtomicBuiltin:
-    OS << "ATOMIC_BUILTIN";
-    break;
-  case BuiltinType::TargetBuiltin:
-    OS << "TARGET_BUILTIN";
-    break;
-  case BuiltinType::TargetLibBuiltin:
-    OS << "TARGET_HEADER_BUILTIN";
-    break;
-  }
-
-  OS << "(" << Spelling;
-  PrototypeParser{Substitution, Builtin}.Print(OS);
-  OS << ", ";
-  PrintAttributes(Builtin, BT, OS);
-
-  switch (BT) {
-  case BuiltinType::LibBuiltin: {
-    OS << ", ";
-    HeaderNameParser{Builtin}.Print(OS);
-    [[fallthrough]];
-  }
-  case BuiltinType::LangBuiltin: {
-    OS << ", " << Builtin->getValueAsString("Languages");
-    break;
-  }
-  case BuiltinType::TargetLibBuiltin: {
-    OS << ", ";
-    HeaderNameParser{Builtin}.Print(OS);
-    OS << ", " << Builtin->getValueAsString("Languages");
-    [[fallthrough]];
-  }
-  case BuiltinType::TargetBuiltin:
-    OS << ", \"" << Builtin->getValueAsString("Features") << "\"";
-    break;
-  case BuiltinType::AtomicBuiltin:
-  case BuiltinType::Builtin:
-    break;
-  }
-  OS << ")\n";
+Builtin buildBuiltin(StringRef Substitution, const Record *BuiltinRecord,
+                     Twine Spelling, BuiltinType BT) {
+  Builtin B;
+  B.BT = BT;
+  B.Name = Spelling.str();
+  B.Type = PrototypeParser(Substitution, BuiltinRecord).takeTypeString();
+  B.Attributes = renderAttributes(BuiltinRecord, BT);
+  B.BuiltinRecord = BuiltinRecord;
+  return B;
 }
 
 struct TemplateInsts {
@@ -345,10 +395,11 @@ TemplateInsts getTemplateInsts(const Record *R) {
   return temp;
 }
 
-void EmitBuiltin(raw_ostream &OS, const Record *Builtin) {
+void collectBuiltins(const Record *BuiltinRecord,
+                     SmallVectorImpl<Builtin> &Builtins) {
   TemplateInsts Templates = {};
-  if (Builtin->isSubClassOf("Template")) {
-    Templates = getTemplateInsts(Builtin);
+  if (BuiltinRecord->isSubClassOf("Template")) {
+    Templates = getTemplateInsts(BuiltinRecord);
   } else {
     Templates.Affix.emplace_back();
     Templates.Substitution.emplace_back();
@@ -356,26 +407,28 @@ void EmitBuiltin(raw_ostream &OS, const Record *Builtin) {
 
   for (auto [Substitution, Affix] :
        zip(Templates.Substitution, Templates.Affix)) {
-    for (StringRef Spelling : Builtin->getValueAsListOfStrings("Spellings")) {
+    for (StringRef Spelling :
+         BuiltinRecord->getValueAsListOfStrings("Spellings")) {
       auto FullSpelling =
           (Templates.IsPrefix ? Affix + Spelling : Spelling + Affix).str();
       BuiltinType BT = BuiltinType::Builtin;
-      if (Builtin->isSubClassOf("AtomicBuiltin")) {
+      if (BuiltinRecord->isSubClassOf("AtomicBuiltin")) {
         BT = BuiltinType::AtomicBuiltin;
-      } else if (Builtin->isSubClassOf("LangBuiltin")) {
+      } else if (BuiltinRecord->isSubClassOf("LangBuiltin")) {
         BT = BuiltinType::LangBuiltin;
-      } else if (Builtin->isSubClassOf("TargetLibBuiltin")) {
+      } else if (BuiltinRecord->isSubClassOf("TargetLibBuiltin")) {
         BT = BuiltinType::TargetLibBuiltin;
-      } else if (Builtin->isSubClassOf("TargetBuiltin")) {
+      } else if (BuiltinRecord->isSubClassOf("TargetBuiltin")) {
         BT = BuiltinType::TargetBuiltin;
-      } else if (Builtin->isSubClassOf("LibBuiltin")) {
+      } else if (BuiltinRecord->isSubClassOf("LibBuiltin")) {
         BT = BuiltinType::LibBuiltin;
-        if (Builtin->getValueAsBit("AddBuiltinPrefixedAlias"))
-          EmitBuiltinDef(OS, Substitution, Builtin,
-                         std::string("__builtin_") + FullSpelling,
-                         BuiltinType::Builtin);
+        if (BuiltinRecord->getValueAsBit("AddBuiltinPrefixedAlias"))
+          Builtins.push_back(buildBuiltin(
+              Substitution, BuiltinRecord,
+              std::string("__builtin_") + FullSpelling, BuiltinType::Builtin));
       }
-      EmitBuiltinDef(OS, Substitution, Builtin, FullSpelling, BT);
+      Builtins.push_back(
+          buildBuiltin(Substitution, BuiltinRecord, FullSpelling, BT));
     }
   }
 }
@@ -384,47 +437,78 @@ void EmitBuiltin(raw_ostream &OS, const Record *Builtin) {
 void clang::EmitClangBuiltins(const RecordKeeper &Records, raw_ostream &OS) {
   emitSourceFileHeader("List of builtins that Clang recognizes", OS);
 
-  OS << R"c++(
-#if defined(BUILTIN) && !defined(LIBBUILTIN)
-#  define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
-#endif
-
-#if defined(BUILTIN) && !defined(LANGBUILTIN)
-#  define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
-#endif
-
-// Some of our atomics builtins are handled by AtomicExpr rather than
-// as normal builtin CallExprs. This macro is used for such builtins.
-#ifndef ATOMIC_BUILTIN
-#  define ATOMIC_BUILTIN(ID, TYPE, ATTRS) BUILTIN(ID, TYPE, ATTRS)
-#endif
-
-#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
-#  define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
-#endif
-
-#if defined(BUILTIN) && !defined(TARGET_HEADER_BUILTIN)
-#  define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANG, FEATURE) BUILTIN(ID, TYPE, ATTRS)
-#endif
-)c++";
+  SmallVector<Builtin> Builtins;
+  // AtomicBuiltins are order dependent. Emit them first to make manual checking
+  // easier and so we can build a special atomic builtin X-macro.
+  for (const auto *BuiltinRecord :
+       Records.getAllDerivedDefinitions("AtomicBuiltin"))
+    collectBuiltins(BuiltinRecord, Builtins);
 
-  // AtomicBuiltins are order dependent
-  // emit them first to make manual checking easier
-  for (const auto *Builtin : Records.getAllDerivedDefinitions("AtomicBuiltin"))
-    EmitBuiltin(OS, Builtin);
+  unsigned NumAtomicBuiltins = Builtins.size();
 
-  for (const auto *Builtin : Records.getAllDerivedDefinitions("Builtin")) {
-    if (Builtin->isSubClassOf("AtomicBuiltin"))
+  for (const auto *BuiltinRecord :
+       Records.getAllDerivedDefinitions("Builtin")) {
+    if (BuiltinRecord->isSubClassOf("AtomicBuiltin"))
       continue;
-    EmitBuiltin(OS, Builtin);
+    collectBuiltins(BuiltinRecord, Builtins);
+  }
+
+  auto AtomicBuiltins = ArrayRef(Builtins).slice(0, NumAtomicBuiltins);
+
+  // Collect strings into a table.
+  StringToOffsetTable Table;
+  Table.GetOrAddStringOffset("");
+  for (const auto &B : Builtins) {
+    Table.GetOrAddStringOffset(B.Name);
+    Table.GetOrAddStringOffset(B.Type);
+    Table.GetOrAddStringOffset(B.Attributes);
+    if (B.BT == BuiltinType::TargetBuiltin)
+      Table.GetOrAddStringOffset(B.BuiltinRecord->getValueAsString("Features"));
   }
 
+  // Emit enumerators.
+  OS << R"c++(
+#ifdef GET_BUILTIN_ENUMERATORS
+)c++";
+  for (const auto &B : Builtins)
+    B.EmitEnumerator(OS);
+  OS << R"c++(
+#endif // GET_BUILTIN_ENUMERATORS
+)c++";
+
+  // Emit a string table that can be referenced for these builtins.
+  OS << R"c++(
+#ifdef GET_BUILTIN_STR_TABLE
+)c++";
+  Table.EmitStringLiteralDef(
+      OS, "static constexpr llvm::StringTable BuiltinStrings", /*Indent=*/"");
+  OS << R"c++(
+#endif // GET_BUILTIN_STR_TABLE
+)c++";
+
+  // Emit a direct set of `Builtin::Info` initializers.
+  OS << R"c++(
+#ifdef GET_BUILTIN_INFOS
+)c++";
+  for (const auto &B : Builtins)
+    B.EmitInfo(OS, Table);
+  OS << R"c++(
+#endif // GET_BUILTIN_INFOS
+)c++";
+
+  // Emit X-macros for the atomic builtins to support various custome patterns
+  // used exclusively with those builtins.
+  //
+  // FIXME: We should eventually move this to a separate file so that users
+  // don't need to include the full set of builtins.
+  OS << R"c++(
+#ifdef ATOMIC_BUILTIN
+)c++";
+  for (const auto &Builtin : AtomicBuiltins) {
+    Builtin.EmitXMacro(OS);
+  }
   OS << R"c++(
+#endif // ATOMIC_BUILTIN
 #undef ATOMIC_BUILTIN
-#undef BUILTIN
-#undef LIBBUILTIN
-#undef LANGBUILTIN
-#undef TARGET_BUILTIN
-#undef TARGET_HEADER_BUILTIN
 )c++";
 }

>From 979ae6b10db8a74ae3bff7f3f9fd34f535ce0910 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc at gmail.com>
Date: Wed, 18 Dec 2024 07:11:02 +0000
Subject: [PATCH 10/10] Switch Neon, SVE, SME, and RVV builtins to use a prefix

This avoids repeating this part of the name in every string, shrinking
the string tables. For SVE in particular, which is by-far the largest
builtin string table, this gets us well under 200KiB. Others shrink by
30% - 50% depending on how long the rest of the strings end up.

Overall, this completes restructuring the builtin string tables to try
and minimize their size and hopefully avoid both toolchain bugs and
compile-time memory overheads of the full sized string tables.
---
 clang/include/clang/Basic/Builtins.h          | 14 ++++++-
 clang/lib/Basic/Builtins.cpp                  | 39 ++++++++++++-------
 clang/lib/Basic/Targets/AArch64.cpp           |  9 +++--
 clang/lib/Basic/Targets/ARM.cpp               |  5 ++-
 clang/lib/Basic/Targets/RISCV.cpp             |  5 +--
 clang/lib/CodeGen/CGBuiltin.cpp               | 10 +++--
 clang/lib/CodeGen/CodeGenModule.cpp           |  3 +-
 clang/lib/Sema/SemaChecking.cpp               | 16 ++++----
 clang/lib/Sema/SemaExpr.cpp                   |  2 +-
 .../StaticAnalyzer/Core/CheckerContext.cpp    |  2 +-
 clang/utils/TableGen/NeonEmitter.cpp          | 10 ++---
 clang/utils/TableGen/RISCVVEmitter.cpp        | 10 ++---
 clang/utils/TableGen/SveEmitter.cpp           | 32 ++++++---------
 13 files changed, 83 insertions(+), 74 deletions(-)

diff --git a/clang/include/clang/Basic/Builtins.h b/clang/include/clang/Basic/Builtins.h
index 4bbb210d7b5246..5ebe4c03fadccd 100644
--- a/clang/include/clang/Basic/Builtins.h
+++ b/clang/include/clang/Basic/Builtins.h
@@ -71,6 +71,8 @@ enum ID {
   FirstTSBuiltin
 };
 
+struct InfosShard;
+
 /// The info used to represent each builtin.
 struct Info {
   // Rather than store pointers to the string literals describing these four
@@ -84,6 +86,11 @@ struct Info {
 
   HeaderDesc Header = HeaderDesc::NO_HEADER;
   LanguageID Langs = ALL_LANGUAGES;
+
+  /// Get the name for the builtin represented by this `Info` object.
+  ///
+  /// Must be provided the `Shard` for this `Info` object.
+  std::string getName(const InfosShard &Shard) const;
 };
 
 /// A constexpr function to construct an infos array from X-macros.
@@ -121,6 +128,8 @@ static constexpr std::array<Info, N> MakeInfos(std::array<Info, N> Infos) {
 struct InfosShard {
   const llvm::StringTable *Strings;
   llvm::ArrayRef<Info> Infos;
+
+  llvm::StringLiteral NamePrefix = "";
 };
 
 // A detail macro used below to emit a string literal that, after string literal
@@ -236,9 +245,10 @@ class Context {
 
   /// Return the identifier name for the specified builtin,
   /// e.g. "__builtin_abs".
-  llvm::StringRef getName(unsigned ID) const;
+  std::string getName(unsigned ID) const;
 
-  /// Return a quoted name for the specified builtin for use in diagnostics.
+  /// Return the identifier name for the specified builtin inside single quotes
+  /// for a diagnostic, e.g. "'__builtin_abs'".
   std::string getQuotedName(unsigned ID) const;
 
   /// Get the type descriptor string for the specified builtin.
diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp
index 16a83cd689db41..e7829a461bbc53 100644
--- a/clang/lib/Basic/Builtins.cpp
+++ b/clang/lib/Basic/Builtins.cpp
@@ -71,11 +71,22 @@ Builtin::Context::getShardAndInfo(unsigned ID) const {
   llvm_unreachable("Invalid target builtin shard structure!");
 }
 
+std::string Builtin::Info::getName(const Builtin::InfosShard &Shard) const {
+  return (Twine(Shard.NamePrefix) + (*Shard.Strings)[Offsets.Name]).str();
+}
+
 /// Return the identifier name for the specified builtin,
 /// e.g. "__builtin_abs".
-llvm::StringRef Builtin::Context::getName(unsigned ID) const {
+std::string Builtin::Context::getName(unsigned ID) const {
   const auto &[Shard, I] = getShardAndInfo(ID);
-  return (*Shard.Strings)[I.Offsets.Name];
+  return I.getName(Shard);
+}
+
+std::string Builtin::Context::getQuotedName(unsigned ID) const {
+  const auto &[Shard, I] = getShardAndInfo(ID);
+  return (Twine("'") + Shard.NamePrefix + (*Shard.Strings)[I.Offsets.Name] +
+          "'")
+      .str();
 }
 
 const char *Builtin::Context::getTypeString(unsigned ID) const {
@@ -112,12 +123,14 @@ void Builtin::Context::InitializeTarget(const TargetInfo &Target,
 bool Builtin::Context::isBuiltinFunc(llvm::StringRef FuncName) {
   bool InStdNamespace = FuncName.consume_front("std-");
   for (const auto &Shard : {InfosShard{&BuiltinStrings, BuiltinInfos}})
-    for (const auto &I : Shard.Infos)
-      if (FuncName == (*Shard.Strings)[I.Offsets.Name] &&
-          (bool)strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'z') ==
-              InStdNamespace)
-        return strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'f') !=
-               nullptr;
+    if (llvm::StringRef FuncNameSuffix = FuncName;
+        FuncNameSuffix.consume_front(Shard.NamePrefix))
+      for (const auto &I : Shard.Infos)
+        if (FuncNameSuffix == (*Shard.Strings)[I.Offsets.Name] &&
+            (bool)strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'z') ==
+                InStdNamespace)
+          return strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'f') !=
+                 nullptr;
 
   return false;
 }
@@ -193,7 +206,7 @@ void Builtin::Context::initializeBuiltins(IdentifierTable &Table,
       for (const auto &I : Shard.Infos) {
         // If this is a real builtin (ID != 0) and is supported, add it.
         if (ID != 0 && builtinIsSupported(*Shard.Strings, I, LangOpts))
-          Table.get((*Shard.Strings)[I.Offsets.Name]).setBuiltinID(ID);
+          Table.get(I.getName(Shard)).setBuiltinID(ID);
         ++ID;
       }
     assert(ID == FirstTSBuiltin && "Should have added all non-target IDs!");
@@ -202,14 +215,14 @@ void Builtin::Context::initializeBuiltins(IdentifierTable &Table,
     for (const auto &Shard : TargetShards)
       for (const auto &I : Shard.Infos) {
         if (builtinIsSupported(*Shard.Strings, I, LangOpts))
-          Table.get((*Shard.Strings)[I.Offsets.Name]).setBuiltinID(ID);
+          Table.get(I.getName(Shard)).setBuiltinID(ID);
         ++ID;
       }
 
     // Step #3: Register target-specific builtins for AuxTarget.
     for (const auto &Shard : AuxTargetShards)
       for (const auto &I : Shard.Infos) {
-        Table.get((*Shard.Strings)[I.Offsets.Name]).setBuiltinID(ID);
+        Table.get(I.getName(Shard)).setBuiltinID(ID);
         ++ID;
       }
   }
@@ -228,10 +241,6 @@ void Builtin::Context::initializeBuiltins(IdentifierTable &Table,
   }
 }
 
-std::string Builtin::Context::getQuotedName(unsigned ID) const {
-  return (llvm::Twine("'") + getName(ID) + "'").str();
-}
-
 unsigned Builtin::Context::getRequiredVectorWidth(unsigned ID) const {
   const char *WidthPos = ::strchr(getAttributesString(ID), 'V');
   if (!WidthPos)
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index a23cdc5734e49d..c1220ea47de1fd 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -770,11 +770,12 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
 llvm::SmallVector<Builtin::InfosShard>
 AArch64TargetInfo::getTargetBuiltins() const {
   return {
-      {&NEON::BuiltinStrings, NEON::BuiltinInfos},
-      {&NEON::FP16::BuiltinStrings, NEON::FP16::BuiltinInfos},
-      {&SVE::BuiltinStrings, SVE::BuiltinInfos},
+      {&NEON::BuiltinStrings, NEON::BuiltinInfos, "__builtin_neon_"},
+      {&NEON::FP16::BuiltinStrings, NEON::FP16::BuiltinInfos,
+       "__builtin_neon_"},
+      {&SVE::BuiltinStrings, SVE::BuiltinInfos, "__builtin_sve_"},
       {&BuiltinSVENeonBridgeStrings, BuiltinSVENeonBridgeInfos},
-      {&SME::BuiltinStrings, SME::BuiltinInfos},
+      {&SME::BuiltinStrings, SME::BuiltinInfos, "__builtin_sme_"},
       {&BuiltinAArch64Strings, BuiltinAArch64Infos},
   };
 }
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index 92428908fe4a61..045d35f418e343 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -1126,8 +1126,9 @@ static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumARMBuiltins>({
 llvm::SmallVector<Builtin::InfosShard>
 ARMTargetInfo::getTargetBuiltins() const {
   return {
-      {&NEON::BuiltinStrings, NEON::BuiltinInfos},
-      {&NEON::FP16::BuiltinStrings, NEON::FP16::BuiltinInfos},
+      {&NEON::BuiltinStrings, NEON::BuiltinInfos, "__builtin_neon_"},
+      {&NEON::FP16::BuiltinStrings, NEON::FP16::BuiltinInfos,
+       "__builtin_neon_"},
       {&BuiltinStrings, BuiltinInfos},
   };
 }
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index 079ddc3c5d6e13..ee25598395d789 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -255,7 +255,6 @@ namespace RVV {
 #define GET_RISCVV_BUILTIN_STR_TABLE
 #include "clang/Basic/riscv_vector_builtins.inc"
 #undef GET_RISCVV_BUILTIN_STR_TABLE
-static_assert(BuiltinStrings.size() < 100'000);
 
 static constexpr std::array<Builtin::Info, NumRVVBuiltins> BuiltinInfos = {
 #define GET_RISCVV_BUILTIN_INFOS
@@ -291,8 +290,8 @@ static_assert(std::size(BuiltinInfos) == NumRISCVBuiltins);
 llvm::SmallVector<Builtin::InfosShard>
 RISCVTargetInfo::getTargetBuiltins() const {
   return {
-      {&RVV::BuiltinStrings, RVV::BuiltinInfos},
-      {&RVVSiFive::BuiltinStrings, RVVSiFive::BuiltinInfos},
+      {&RVV::BuiltinStrings, RVV::BuiltinInfos, "__builtin_rvv_"},
+      {&RVVSiFive::BuiltinStrings, RVVSiFive::BuiltinInfos, "__builtin_rvv_"},
       {&BuiltinStrings, BuiltinInfos},
   };
 }
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4d4b7428abd505..42409ba5ae5be4 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -263,8 +263,10 @@ llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
                                                      unsigned BuiltinID) {
   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
 
-  // Get the name, skip over the __builtin_ prefix (if necessary).
-  StringRef Name;
+  // Get the name, skip over the __builtin_ prefix (if necessary). We may have
+  // to build this up so provide a small stack buffer to handle the vast
+  // majority of names.
+  llvm::SmallString<64> Name;
   GlobalDecl D(FD);
 
   // TODO: This list should be expanded or refactored after all GCC-compatible
@@ -6566,7 +6568,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
 
   // See if we have a target specific intrinsic.
-  StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
+  std::string Name = getContext().BuiltinInfo.getName(BuiltinID);
   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
   StringRef Prefix =
       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
@@ -21213,7 +21215,7 @@ static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
   auto &C = CGF.CGM.getContext();
   if (!(C.getLangOpts().NativeHalfType ||
         !C.getTargetInfo().useFP16ConversionIntrinsics())) {
-    CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
+    CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getQuotedName(BuiltinID) +
                                        " requires native half type support.");
     return nullptr;
   }
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c49f7631488285..51c23ce6dca5a5 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3983,7 +3983,8 @@ namespace {
       unsigned BuiltinID = FD->getBuiltinID();
       if (!BuiltinID || !BI.isLibFunction(BuiltinID))
         return false;
-      StringRef BuiltinName = BI.getName(BuiltinID);
+      std::string BuiltinNameStr = BI.getName(BuiltinID);
+      StringRef BuiltinName = BuiltinNameStr;
       if (BuiltinName.starts_with("__builtin_") &&
           Name == BuiltinName.slice(strlen("__builtin_"), StringRef::npos)) {
         return true;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 203ba418fea60a..eca6b1cbe03285 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1235,7 +1235,9 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
   bool IsChkVariant = false;
 
   auto GetFunctionName = [&]() {
-    StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID);
+    std::string FunctionNameStr =
+        getASTContext().BuiltinInfo.getName(BuiltinID);
+    llvm::StringRef FunctionName = FunctionNameStr;
     // Skim off the details of whichever builtin was called to produce a better
     // diagnostic, as it's unlikely that the user wrote the __builtin
     // explicitly.
@@ -1245,7 +1247,7 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
     } else {
       FunctionName.consume_front("__builtin_");
     }
-    return FunctionName;
+    return FunctionName.str();
   };
 
   switch (BuiltinID) {
@@ -1289,7 +1291,7 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
                         unsigned SourceSize) {
       DiagID = diag::warn_fortify_scanf_overflow;
       unsigned Index = ArgIndex + DataIndex;
-      StringRef FunctionName = GetFunctionName();
+      std::string FunctionName = GetFunctionName();
       DiagRuntimeBehavior(TheCall->getArg(Index)->getBeginLoc(), TheCall,
                           PDiag(DiagID) << FunctionName << (Index + 1)
                                         << DestSize << SourceSize);
@@ -1438,7 +1440,7 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
       llvm::APSInt::compareValues(*SourceSize, *DestinationSize) <= 0)
     return;
 
-  StringRef FunctionName = GetFunctionName();
+  std::string FunctionName = GetFunctionName();
 
   SmallString<16> DestinationStr;
   SmallString<16> SourceStr;
@@ -4545,7 +4547,7 @@ ExprResult Sema::BuiltinAtomicOverloaded(ExprResult TheCallResult) {
   // Get the decl for the concrete builtin from this, we can tell what the
   // concrete integer type we should convert to is.
   unsigned NewBuiltinID = BuiltinIndices[BuiltinIndex][SizeIndex];
-  StringRef NewBuiltinName = Context.BuiltinInfo.getName(NewBuiltinID);
+  std::string NewBuiltinName = Context.BuiltinInfo.getName(NewBuiltinID);
   FunctionDecl *NewBuiltinDecl;
   if (NewBuiltinID == BuiltinID)
     NewBuiltinDecl = FDecl;
@@ -8334,7 +8336,7 @@ static void emitReplacement(Sema &S, SourceLocation Loc, SourceRange Range,
                             unsigned AbsKind, QualType ArgType) {
   bool EmitHeaderHint = true;
   const char *HeaderName = nullptr;
-  StringRef FunctionName;
+  std::string FunctionName;
   if (S.getLangOpts().CPlusPlus && !ArgType->isAnyComplexType()) {
     FunctionName = "std::abs";
     if (ArgType->isIntegralOrEnumerationType()) {
@@ -8483,7 +8485,7 @@ void Sema::CheckAbsoluteValueFunction(const CallExpr *Call,
   // Unsigned types cannot be negative.  Suggest removing the absolute value
   // function call.
   if (ArgType->isUnsignedIntegerType()) {
-    StringRef FunctionName =
+    std::string FunctionName =
         IsStdAbs ? "std::abs" : Context.BuiltinInfo.getName(AbsKind);
     Diag(Call->getExprLoc(), diag::warn_unsigned_abs) << ArgType << ParamType;
     Diag(Call->getExprLoc(), diag::note_remove_abs)
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 24f7d27c691154..56ed211be73ab0 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6687,7 +6687,7 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
 
 Expr *Sema::BuildBuiltinCallExpr(SourceLocation Loc, Builtin::ID Id,
                                  MultiExprArg CallArgs) {
-  StringRef Name = Context.BuiltinInfo.getName(Id);
+  std::string Name = Context.BuiltinInfo.getName(Id);
   LookupResult R(*this, &Context.Idents.get(Name), Loc,
                  Sema::LookupOrdinaryName);
   LookupName(R, TUScope, /*AllowBuiltinCreation=*/true);
diff --git a/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
index 96464b30c078f4..d0145293fa3e52 100644
--- a/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
@@ -55,7 +55,7 @@ bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD,
   if (BId != 0) {
     if (Name.empty())
       return true;
-    StringRef BName = FD->getASTContext().BuiltinInfo.getName(BId);
+    std::string BName = FD->getASTContext().BuiltinInfo.getName(BId);
     size_t start = BName.find(Name);
     if (start != StringRef::npos) {
       // Accept exact match.
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index e371fa9e8c8a0c..8e1246f86cc885 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -2068,16 +2068,12 @@ void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
   Table.GetOrAddStringOffset("");
   Table.GetOrAddStringOffset("n");
 
-  auto PrefixName = [](Intrinsic *Def) -> std::string {
-    return (llvm::Twine("__builtin_neon_") + Def->getMangledName()).str();
-  };
-
   for (auto *Def : Defs) {
     if (Def->hasBody())
       continue;
 
     if (Builtins.insert({Def->getMangledName(), Def}).second) {
-      Table.GetOrAddStringOffset(PrefixName(Def));
+      Table.GetOrAddStringOffset(Def->getMangledName());
       Table.GetOrAddStringOffset(Def->getBuiltinTypeStr());
       Table.GetOrAddStringOffset(Def->getTargetGuard());
     }
@@ -2097,8 +2093,8 @@ void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
   OS << "#ifdef GET_NEON_BUILTIN_INFOS\n";
   for (const auto &[Name, Def] : Builtins) {
     OS << "    Builtin::Info{Builtin::Info::StrOffsets{"
-       << Table.GetStringOffset(PrefixName(Def)) << " /* " << PrefixName(Def)
-       << " */, ";
+       << Table.GetStringOffset(Def->getMangledName()) << " /* "
+       << Def->getMangledName() << " */, ";
     OS << Table.GetStringOffset(Def->getBuiltinTypeStr()) << " /* "
        << Def->getBuiltinTypeStr() << " */, ";
     OS << Table.GetStringOffset("n") << " /* n */, ";
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index 077854f71d236d..4a36a255c46b74 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -507,17 +507,13 @@ void RVVEmitter::createBuiltins(raw_ostream &OS) {
   Table.GetOrAddStringOffset("n");
   Table.GetOrAddStringOffset("zve32x");
 
-  auto PrefixName = [](RVVIntrinsic *Def) -> std::string {
-    return ("__builtin_rvv_" + Def->getBuiltinName()).str();
-  };
-
   // Map to unique the builtin names.
   StringMap<RVVIntrinsic *> BuiltinMap;
   std::vector<RVVIntrinsic *> UniqueDefs;
   for (auto &Def : Defs) {
     auto P = BuiltinMap.insert({Def->getBuiltinName(), Def.get()});
     if (P.second) {
-      Table.GetOrAddStringOffset(PrefixName(Def.get()));
+      Table.GetOrAddStringOffset(Def->getBuiltinName());
       if (!Def->hasBuiltinAlias())
         Table.GetOrAddStringOffset(Def->getBuiltinTypeStr());
       UniqueDefs.push_back(Def.get());
@@ -553,8 +549,8 @@ void RVVEmitter::createBuiltins(raw_ostream &OS) {
   OS << "#ifdef GET_RISCVV_BUILTIN_INFOS\n";
   for (RVVIntrinsic *Def : UniqueDefs) {
     OS << "    Builtin::Info{Builtin::Info::StrOffsets{"
-       << Table.GetStringOffset(PrefixName(Def)) << " /* " << PrefixName(Def)
-       << " */, ";
+       << Table.GetStringOffset(Def->getBuiltinName()) << " /* "
+       << Def->getBuiltinName() << " */, ";
     if (Def->hasBuiltinAlias()) {
       OS << "0, ";
     } else {
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index c9cbbb73580c00..12b5c986d4e9b8 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1480,13 +1480,9 @@ void SVEEmitter::createBuiltins(raw_ostream &OS) {
   Table.GetOrAddStringOffset("");
   Table.GetOrAddStringOffset("n");
 
-  auto PrefixName = [](Intrinsic *Def) -> std::string {
-    return (llvm::Twine("__builtin_sve_") + Def->getMangledName()).str();
-  };
-
   for (const auto &Def : Defs)
     if (Def->getClassKind() != ClassG) {
-      Table.GetOrAddStringOffset(PrefixName(Def.get()));
+      Table.GetOrAddStringOffset(Def->getMangledName());
       Table.GetOrAddStringOffset(Def->getBuiltinTypeStr());
       Table.GetOrAddStringOffset(Def->getGuard());
     }
@@ -1500,9 +1496,9 @@ void SVEEmitter::createBuiltins(raw_ostream &OS) {
       SVEType ToV(To.BaseType, N);
       for (const ReinterpretTypeInfo &From : Reinterprets) {
         SVEType FromV(From.BaseType, N);
-        std::string Name = (Twine("__builtin_sve_reinterpret_") + To.Suffix +
-                            "_" + From.Suffix + Suffix)
-                               .str();
+        std::string Name =
+            (Twine("reinterpret_") + To.Suffix + "_" + From.Suffix + Suffix)
+                .str();
         std::string Type = ToV.builtin_str() + FromV.builtin_str();
         Table.GetOrAddStringOffset(Name);
         Table.GetOrAddStringOffset(Type);
@@ -1514,9 +1510,9 @@ void SVEEmitter::createBuiltins(raw_ostream &OS) {
   OS << "#ifdef GET_SVE_BUILTIN_ENUMERATORS\n";
   for (const auto &Def : Defs)
     if (Def->getClassKind() != ClassG)
-      OS << "  BI" << PrefixName(Def.get()) << ",\n";
+      OS << "  BI__builtin_sve_" << Def->getMangledName() << ",\n";
   for (const auto &[Name, _] : ReinterpretBuiltins)
-    OS << "  BI" << Name << ",\n";
+    OS << "  BI__builtin_sve_" << Name << ",\n";
   OS << "#endif // GET_SVE_BUILTIN_ENUMERATORS\n\n";
 
   OS << "#ifdef GET_SVE_BUILTIN_STR_TABLE\n";
@@ -1530,8 +1526,8 @@ void SVEEmitter::createBuiltins(raw_ostream &OS) {
     // declarations only live in the header file.
     if (Def->getClassKind() != ClassG) {
       OS << "    Builtin::Info{Builtin::Info::StrOffsets{"
-         << Table.GetStringOffset(PrefixName(Def.get())) << " /* "
-         << PrefixName(Def.get()) << " */, ";
+         << Table.GetStringOffset(Def->getMangledName()) << " /* "
+         << Def->getMangledName() << " */, ";
       OS << Table.GetStringOffset(Def->getBuiltinTypeStr()) << " /* "
          << Def->getBuiltinTypeStr() << " */, ";
       OS << Table.GetStringOffset("n") << " /* n */, ";
@@ -1732,13 +1728,9 @@ void SVEEmitter::createSMEBuiltins(raw_ostream &OS) {
   Table.GetOrAddStringOffset("");
   Table.GetOrAddStringOffset("n");
 
-  auto PrefixName = [](Intrinsic *Def) -> std::string {
-    return (llvm::Twine("__builtin_sme_") + Def->getMangledName()).str();
-  };
-
   for (const auto &Def : Defs)
     if (Def->getClassKind() != ClassG) {
-      Table.GetOrAddStringOffset(PrefixName(Def.get()));
+      Table.GetOrAddStringOffset(Def->getMangledName());
       Table.GetOrAddStringOffset(Def->getBuiltinTypeStr());
       Table.GetOrAddStringOffset(Def->getGuard());
     }
@@ -1746,7 +1738,7 @@ void SVEEmitter::createSMEBuiltins(raw_ostream &OS) {
   OS << "#ifdef GET_SME_BUILTIN_ENUMERATORS\n";
   for (const auto &Def : Defs)
     if (Def->getClassKind() != ClassG)
-      OS << "  BI" << PrefixName(Def.get()) << ",\n";
+      OS << "  BI__builtin_sme_" << Def->getMangledName() << ",\n";
   OS << "#endif // GET_SME_BUILTIN_ENUMERATORS\n\n";
 
   OS << "#ifdef GET_SME_BUILTIN_STR_TABLE\n";
@@ -1760,8 +1752,8 @@ void SVEEmitter::createSMEBuiltins(raw_ostream &OS) {
     // declarations only live in the header file.
     if (Def->getClassKind() != ClassG) {
       OS << "    Builtin::Info{Builtin::Info::StrOffsets{"
-         << Table.GetStringOffset(PrefixName(Def.get())) << " /* "
-         << PrefixName(Def.get()) << " */, ";
+         << Table.GetStringOffset(Def->getMangledName()) << " /* "
+         << Def->getMangledName() << " */, ";
       OS << Table.GetStringOffset(Def->getBuiltinTypeStr()) << " /* "
          << Def->getBuiltinTypeStr() << " */, ";
       OS << Table.GetStringOffset("n") << " /* n */, ";



More information about the cfe-commits mailing list