[clang] 2419409 - [X86][RFC] Add new option `-m[no-]evex512` to disable ZMM and 64-bit mask instructions for AVX512 features

Phoebe Wang via cfe-commits cfe-commits at lists.llvm.org
Fri Sep 8 07:47:33 PDT 2023


Author: Phoebe Wang
Date: 2023-09-08T22:47:22+08:00
New Revision: 24194090e17b599522a080d502ab0f68125d53dd

URL: https://github.com/llvm/llvm-project/commit/24194090e17b599522a080d502ab0f68125d53dd
DIFF: https://github.com/llvm/llvm-project/commit/24194090e17b599522a080d502ab0f68125d53dd.diff

LOG: [X86][RFC] Add new option `-m[no-]evex512` to disable ZMM and 64-bit mask instructions for AVX512 features

This is an alternative of D157485 and a pre-feature to support AVX10.

AVX10 Architecture Specification: https://cdrdv2.intel.com/v1/dl/getContent/784267
AVX10 Technical Paper: https://cdrdv2.intel.com/v1/dl/getContent/784343
RFC: https://discourse.llvm.org/t/rfc-design-for-avx10-feature-support/72661

Based on the feedbacks from LLVM and GCC community, we have agreed to
start from supporting `-m[no-]evex512` on existing AVX512 features.
The option `-mno-evex512` can be used with `-mavx512xxx` to build
binaries that can run on both legacy AVX512 targets and AVX10-256.

There're still arguments about what's the expected behavior when this
option as well as `-mavx512xxx` used together with `-mavx10.1-256`. We
decided to defer the support of `-mavx10.1` after we made consensus.
Or furthermore, we start from supporting AVX10.2 and not providing any
AVX10.1 options.

Reviewed By: RKSimon, skan

Differential Revision: https://reviews.llvm.org/D159250

Added: 
    clang/test/CodeGen/X86/avx512-error.c

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/BuiltinsX86.def
    clang/include/clang/Driver/Options.td
    clang/lib/Basic/Targets/X86.cpp
    clang/lib/Basic/Targets/X86.h
    clang/lib/CodeGen/Targets/X86.cpp
    clang/lib/Headers/avx512bf16intrin.h
    clang/lib/Headers/avx512bitalgintrin.h
    clang/lib/Headers/avx512bwintrin.h
    clang/lib/Headers/avx512cdintrin.h
    clang/lib/Headers/avx512dqintrin.h
    clang/lib/Headers/avx512fintrin.h
    clang/lib/Headers/avx512fp16intrin.h
    clang/lib/Headers/avx512ifmaintrin.h
    clang/lib/Headers/avx512vbmi2intrin.h
    clang/lib/Headers/avx512vbmiintrin.h
    clang/lib/Headers/avx512vnniintrin.h
    clang/lib/Headers/avx512vp2intersectintrin.h
    clang/lib/Headers/avx512vpopcntdqintrin.h
    clang/lib/Headers/gfniintrin.h
    clang/lib/Headers/vaesintrin.h
    clang/test/CodeGen/attr-cpuspecific.c
    clang/test/CodeGen/attr-target-x86.c
    clang/test/CodeGen/regcall2.c
    clang/test/CodeGen/target-avx-abi-diag.c
    clang/test/Driver/x86-target-features.c
    clang/test/Preprocessor/x86_target_features.c
    llvm/include/llvm/TargetParser/X86TargetParser.def
    llvm/lib/IR/Verifier.cpp
    llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
    llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
    llvm/lib/Target/X86/X86.td
    llvm/lib/Target/X86/X86InstrInfo.td
    llvm/lib/Target/X86/X86RegisterInfo.cpp
    llvm/lib/Target/X86/X86Subtarget.cpp
    llvm/lib/Target/X86/X86Subtarget.h
    llvm/lib/TargetParser/X86TargetParser.cpp
    llvm/test/CodeGen/X86/avx512bwvl-arith.ll
    llvm/test/CodeGen/X86/avx512vl-arith.ll

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 19252f00499bc0c..2d0302c399fb6f3 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -304,6 +304,9 @@ AMDGPU Support
 X86 Support
 ^^^^^^^^^^^
 
+- Added option ``-m[no-]evex512`` to disable ZMM and 64-bit mask instructions
+  for AVX512 features.
+
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
 

diff  --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 10ac3b3c34efd29..e4802f8ab1c1562 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -421,31 +421,31 @@ TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2OiV2OiIc", "ncV:128:", "aes
 
 // VAES
 TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,vaes")
+TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512,vaes")
 TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,vaes")
+TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512,vaes")
 TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,vaes")
+TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512,vaes")
 TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,vaes")
+TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512,vaes")
 
 // GFNI
 TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "ncV:128:", "gfni")
 TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v32qi, "V32cV32cV32cIc", "ncV:256:", "avx,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512f,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512f,evex512,gfni")
 TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v16qi, "V16cV16cV16cIc", "ncV:128:", "gfni")
 TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v32qi, "V32cV32cV32cIc", "ncV:256:", "avx,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512f,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512f,evex512,gfni")
 TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v16qi, "V16cV16cV16c", "ncV:128:", "gfni")
 TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "ncV:256:", "avx,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "ncV:512:", "avx512f,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "ncV:512:", "avx512f,evex512,gfni")
 
 // CLMUL
 TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2OiV2OiV2OiIc", "ncV:128:", "pclmul")
 
 // VPCLMULQDQ
 TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4OiV4OiV4OiIc", "ncV:256:", "vpclmulqdq")
-TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8OiV8OiV8OiIc", "ncV:512:", "avx512f,vpclmulqdq")
+TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8OiV8OiV8OiIc", "ncV:512:", "avx512f,evex512,vpclmulqdq")
 
 // AVX
 TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "ncV:256:", "avx")
@@ -732,22 +732,22 @@ TARGET_BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "ncV:256:", "fma|fma4
 TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "ncV:256:", "fma|fma4")
 TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "ncV:256:", "fma|fma4")
 
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_maskz, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmsubpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps512_maskz, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmsubps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_maskz, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_maskz, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_maskz, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmsubpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps512_maskz, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmsubps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_maskz, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_maskz, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
 
 // XOP
 TARGET_BUILTIN(__builtin_ia32_vpmacssww, "V8sV8sV8sV8s", "ncV:128:", "xop")
@@ -832,93 +832,93 @@ TARGET_BUILTIN(__builtin_ia32_rdpkru, "Ui", "n", "pku")
 TARGET_BUILTIN(__builtin_ia32_wrpkru, "vUi", "n", "pku")
 
 // AVX-512
-TARGET_BUILTIN(__builtin_ia32_sqrtpd512, "V8dV8dIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_sqrtps512, "V16fV16fIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_sqrtpd512, "V8dV8dIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_sqrtps512, "V16fV16fIi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_rsqrt14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er")
 TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er")
 TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
 
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2udq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2udq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f,evex512")
 
-TARGET_BUILTIN(__builtin_ia32_cmpps512_mask,   "UsV16fV16fIiUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cmpps512_mask,   "UsV16fV16fIiUsIi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_cmpps256_mask,   "UcV8fV8fIiUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmpps128_mask,   "UcV4fV4fIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmppd512_mask, "UcV8dV8dIiUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cmppd512_mask, "UcV8dV8dIiUcIi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_cmppd256_mask, "UcV4dV4dIiUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmppd128_mask, "UcV2dV2dIiUc", "ncV:128:", "avx512vl")
 
-TARGET_BUILTIN(__builtin_ia32_rndscaleps_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rndscalepd_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2udq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_minps512, "V16fV16fV16fIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_minpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_maxps512, "V16fV16fV16fIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_maxpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8OiV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8OiV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8OiOiC*V8OiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loadups512_mask, "V16ffC*V16fUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loadaps512_mask, "V16fV16fC*V16fUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loadupd512_mask, "V8ddC*V8dUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loadapd512_mask, "V8dV8dC*V8dUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storedqudi512_mask, "vOi*V8OiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storedqusi512_mask, "vi*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storeupd512_mask, "vd*V8dUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storeapd512_mask, "vV8d*V8dUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_alignq512, "V8OiV8OiV8OiIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_alignd512, "V16iV16iV16iIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rndscaleps_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_rndscalepd_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2udq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_minps512, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_minpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_maxps512, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_maxpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8OiV16iV16i", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8OiV16iV16i", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8OiOiC*V8OiUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_loadups512_mask, "V16ffC*V16fUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_loadaps512_mask, "V16fV16fC*V16fUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_loadupd512_mask, "V8ddC*V8dUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_loadapd512_mask, "V8dV8dC*V8dUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_storedqudi512_mask, "vOi*V8OiUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_storedqusi512_mask, "vi*V16iUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_storeupd512_mask, "vd*V8dUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_storeapd512_mask, "vV8d*V8dUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_alignq512, "V8OiV8OiV8OiIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_alignd512, "V16iV16iV16iIi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_alignd128, "V4iV4iV4iIi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_alignd256, "V8iV8iV8iIi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_alignq128, "V2OiV2OiV2OiIi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_alignq256, "V4OiV4OiV4OiIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "ncV:512:", "avx512f,evex512")
 
 // AVX-VNNI and AVX512-VNNI
 TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni")
 TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpdpbusds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni")
 TARGET_BUILTIN(__builtin_ia32_vpdpbusds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpdpwssd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni")
 TARGET_BUILTIN(__builtin_ia32_vpdpwssd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni")
 TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni,evex512")
 
 // AVX-VNNI-INT8
 TARGET_BUILTIN(__builtin_ia32_vpdpbssd128, "V4iV4iV4iV4i", "ncV:128:", "avxvnniint8")
@@ -950,36 +950,36 @@ TARGET_BUILTIN(__builtin_ia32_gather3siv4sf, "V4fV4fvC*V4iUcIi", "nV:128:", "avx
 TARGET_BUILTIN(__builtin_ia32_gather3siv4si, "V4iV4ivC*V4iUcIi", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_gather3siv8sf, "V8fV8fvC*V8iUcIi", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_gather3siv8si, "V8iV8ivC*V8iUcIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8dvC*V8iUcIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16fvC*V16iUsIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8dvC*V8OiUcIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv16sf, "V8fV8fvC*V8OiUcIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gathersiv8di, "V8OiV8OivC*V8iUcIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gathersiv16si, "V16iV16ivC*V16iUsIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv8di, "V8OiV8OivC*V8OiUcIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv16si, "V8iV8ivC*V8OiUcIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scattersiv8df, "vv*UcV8iV8dIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scattersiv16sf, "vv*UsV16iV16fIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv8df,  "vv*UcV8OiV8dIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv16sf, "vv*UcV8OiV8fIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scattersiv8di,  "vv*UcV8iV8OiIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vv*UsV16iV16iIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv8di,  "vv*UcV8OiV8OiIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8OiV8iIi", "nV:512:", "avx512f")
-
-TARGET_BUILTIN(__builtin_ia32_gatherpfdpd,  "vUcV8ivC*IiIi", "nV:512:", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_gatherpfdps,  "vUsV16ivC*IiIi", "nV:512:", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_gatherpfqpd,  "vUcV8OivC*IiIi", "nV:512:", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_gatherpfqps,  "vUcV8OivC*IiIi", "nV:512:", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iv*IiIi", "nV:512:", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16iv*IiIi", "nV:512:", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8dvC*V8iUcIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16fvC*V16iUsIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8dvC*V8OiUcIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv16sf, "V8fV8fvC*V8OiUcIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_gathersiv8di, "V8OiV8OivC*V8iUcIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_gathersiv16si, "V16iV16ivC*V16iUsIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv8di, "V8OiV8OivC*V8OiUcIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv16si, "V8iV8ivC*V8OiUcIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_scattersiv8df, "vv*UcV8iV8dIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_scattersiv16sf, "vv*UsV16iV16fIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv8df,  "vv*UcV8OiV8dIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv16sf, "vv*UcV8OiV8fIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_scattersiv8di,  "vv*UcV8iV8OiIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vv*UsV16iV16iIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv8di,  "vv*UcV8OiV8OiIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8OiV8iIi", "nV:512:", "avx512f,evex512")
+
+TARGET_BUILTIN(__builtin_ia32_gatherpfdpd,  "vUcV8ivC*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherpfdps,  "vUsV16ivC*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherpfqpd,  "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherpfqps,  "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iv*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16iv*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_knotdi, "UOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_knotdi, "UOiUOi", "nc", "avx512bw,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
@@ -989,10 +989,10 @@ TARGET_BUILTIN(__builtin_ia32_cmpb256_mask, "UiV32cV32cIiUi", "ncV:256:", "avx51
 TARGET_BUILTIN(__builtin_ia32_cmpd256_mask, "UcV8iV8iIiUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmpq256_mask, "UcV4OiV4OiIiUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmpw256_mask, "UsV16sV16sIiUs", "ncV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cmpb512_mask, "UOiV64cV64cIiUOi", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cmpb512_mask, "UOiV64cV64cIiUOi", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_cmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_cmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_cmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_ucmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_ucmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_ucmpq128_mask, "UcV2OiV2OiIiUc", "ncV:128:", "avx512vl")
@@ -1001,61 +1001,61 @@ TARGET_BUILTIN(__builtin_ia32_ucmpb256_mask, "UiV32cV32cIiUi", "ncV:256:", "avx5
 TARGET_BUILTIN(__builtin_ia32_ucmpd256_mask, "UcV8iV8iIiUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_ucmpq256_mask, "UcV4OiV4OiIiUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_ucmpw256_mask, "UsV16sV16sIiUs", "ncV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ucmpb512_mask, "UOiV64cV64cIiUOi", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw")
-
-TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_packuswb512, "V64cV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pavgb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pavgw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ucmpb512_mask, "UOiV64cV64cIiUOi", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw,evex512")
+
+TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_packuswb512, "V64cV32sV32s", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_pavgb512, "V64cV64cV64c", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_pavgw512, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128, "V2OiV2Oi", "ncV:128:", "avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256, "V4OiV4Oi", "ncV:256:", "avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpconflictsi_128, "V4iV4i", "ncV:128:", "avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpconflictsi_256, "V8iV8i", "ncV:256:", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpconflictdi_512, "V8OiV8Oi", "ncV:512:", "avx512cd")
-TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512, "V16iV16i", "ncV:512:", "avx512cd")
-TARGET_BUILTIN(__builtin_ia32_vplzcntd_512, "V16iV16i", "ncV:512:", "avx512cd")
-TARGET_BUILTIN(__builtin_ia32_vplzcntq_512, "V8OiV8Oi", "ncV:512:", "avx512cd")
+TARGET_BUILTIN(__builtin_ia32_vpconflictdi_512, "V8OiV8Oi", "ncV:512:", "avx512cd,evex512")
+TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512, "V16iV16i", "ncV:512:", "avx512cd,evex512")
+TARGET_BUILTIN(__builtin_ia32_vplzcntd_512, "V16iV16i", "ncV:512:", "avx512cd,evex512")
+TARGET_BUILTIN(__builtin_ia32_vplzcntq_512, "V8OiV8Oi", "ncV:512:", "avx512cd,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_vpopcntd_128, "V4iV4i", "ncV:128:", "avx512vpopcntdq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2OiV2Oi", "ncV:128:", "avx512vpopcntdq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpopcntd_256, "V8iV8i", "ncV:256:", "avx512vpopcntdq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4OiV4Oi", "ncV:256:", "avx512vpopcntdq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "ncV:512:", "avx512vpopcntdq")
-TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8OiV8Oi", "ncV:512:", "avx512vpopcntdq")
+TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "ncV:512:", "avx512vpopcntdq,evex512")
+TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8OiV8Oi", "ncV:512:", "avx512vpopcntdq,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "ncV:128:", "avx512vl,avx512bitalg")
 TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "ncV:128:", "avx512vl,avx512bitalg")
 TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "ncV:256:", "avx512vl,avx512bitalg")
 TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "ncV:256:", "avx512vl,avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "ncV:512:", "avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "ncV:512:", "avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "ncV:512:", "avx512bitalg,evex512")
+TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "ncV:512:", "avx512bitalg,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "ncV:128:", "avx512vl,avx512bitalg")
 TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "ncV:256:", "avx512vl,avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "UOiV64cV64cUOi", "ncV:512:", "avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "UOiV64cV64cUOi", "ncV:512:", "avx512bitalg,evex512")
 
-TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
 
-TARGET_BUILTIN(__builtin_ia32_addpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_addps512, "V16fV16fV16fIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_divpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_divps512, "V16fV16fV16fIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_mulpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_mulps512, "V16fV16fV16fIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_subpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_subps512, "V16fV16fV16fIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_addpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_addps512, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_divpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_divps512, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_mulpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_mulps512, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_subpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_subps512, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
 
-TARGET_BUILTIN(__builtin_ia32_pmaddubsw512, "V32sV64cV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmaddwd512, "V16iV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaddubsw512, "V32sV64cV64c", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmaddwd512, "V16iV32sV32s", "ncV:512:", "avx512bw,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_addss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_divss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
@@ -1169,66 +1169,66 @@ TARGET_BUILTIN(__builtin_ia32_scattersiv8si, "vv*UcV8iV8iIi", "nV:256:", "avx512
 
 TARGET_BUILTIN(__builtin_ia32_vpermi2vard128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpermi2vard256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2vard512, "V16iV16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermi2vard512, "V16iV16iV16iV16i", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varpd128, "V2dV2dV2OiV2d", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varpd256, "V4dV4dV4OiV4d", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varpd512, "V8dV8dV8OiV8d", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varpd512, "V8dV8dV8OiV8d", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varps128, "V4fV4fV4iV4f", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varps256, "V8fV8fV8iV8f", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varps512, "V16fV16fV16iV16f", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varps512, "V16fV16fV16iV16f", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varqi128, "V16cV16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varqi256, "V32cV32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varqi512, "V64cV64cV64cV64c", "ncV:512:", "avx512vbmi")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varqi512, "V64cV64cV64cV64c", "ncV:512:", "avx512vbmi,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128, "V8sV8sV8sV8s", "ncV:128:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256, "V16sV16sV16sV16s", "ncV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varhi512, "V32sV32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varhi512, "V32sV32sV32sV32s", "ncV:512:", "avx512bw,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_vpshldd128, "V4iV4iV4iIi", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldd256, "V8iV8iV8iIi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldd512, "V16iV16iV16iIi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldd512, "V16iV16iV16iIi", "ncV:512:", "avx512vbmi2,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpshldq128, "V2OiV2OiV2OiIi", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldq256, "V4OiV4OiV4OiIi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldq512, "V8OiV8OiV8OiIi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldq512, "V8OiV8OiV8OiIi", "ncV:512:", "avx512vbmi2,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpshldw128, "V8sV8sV8sIi", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldw256, "V16sV16sV16sIi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldw512, "V32sV32sV32sIi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldw512, "V32sV32sV32sIi", "ncV:512:", "avx512vbmi2,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_vpshldvd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldvd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vbmi2,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpshldvq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldvq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512vbmi2,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpshldvw128, "V8sV8sV8sV8s", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldvw256, "V16sV16sV16sV16s", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvw512, "V32sV32sV32sV32s", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvw512, "V32sV32sV32sV32s", "ncV:512:", "avx512vbmi2,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_vpshrdvd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdvd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vbmi2,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpshrdvq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdvq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512vbmi2,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpshrdvw128, "V8sV8sV8sV8s", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdvw256, "V16sV16sV16sV16s", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvw512, "V32sV32sV32sV32s", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvw512, "V32sV32sV32sV32s", "ncV:512:", "avx512vbmi2,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_vpshrdd128, "V4iV4iV4iIi", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdd256, "V8iV8iV8iIi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdd512, "V16iV16iV16iIi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdd512, "V16iV16iV16iIi", "ncV:512:", "avx512vbmi2,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpshrdq128, "V2OiV2OiV2OiIi", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdq256, "V4OiV4OiV4OiIi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq512, "V8OiV8OiV8OiIi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq512, "V8OiV8OiV8OiIi", "ncV:512:", "avx512vbmi2,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpshrdw128, "V8sV8sV8sIi", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdw256, "V16sV16sV16sIi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdw512, "V32sV32sV32sIi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdw512, "V32sV32sV32sIi", "ncV:512:", "avx512vbmi2,evex512")
 
-TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2OiV2dV2OiUc", "ncV:128:", "avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4OiV4dV4OiUc", "ncV:256:", "avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2OiV2dV2OiUc", "ncV:128:", "avx512vl,avx512dq")
@@ -1264,32 +1264,32 @@ TARGET_BUILTIN(__builtin_ia32_pmovswb256_mask, "V16cV16sV16cUs", "ncV:256:", "av
 TARGET_BUILTIN(__builtin_ia32_pmovuswb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovuswb256_mask, "V16cV16sV16cUs", "ncV:256:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovwb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2qq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2qq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2uqq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtqq2pd512_mask, "V8dV8OiV8dUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtqq2ps512_mask, "V8fV8OiV8fUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2qq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2qq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2uqq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd512_mask, "V8dV8OiV8dUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps512_mask, "V8fV8OiV8fUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangepd512_mask, "V8dV8dV8dIiV8dUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangeps512_mask, "V16fV16fV16fIiV16fUsIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reducepd512_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reduceps512_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_prold512, "V16iV16iIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prolq512, "V8OiV8OiIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2qq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtps2qq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtps2uqq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtqq2pd512_mask, "V8dV8OiV8dUcIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtqq2ps512_mask, "V8fV8OiV8fUcIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2qq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvttps2qq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvttps2uqq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd512_mask, "V8dV8OiV8dUcIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps512_mask, "V8fV8OiV8fUcIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_rangepd512_mask, "V8dV8dV8dIiV8dUcIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_rangeps512_mask, "V16fV16fV16fIiV16fUsIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_reducepd512_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_reduceps512_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_prold512, "V16iV16iIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_prolq512, "V8OiV8OiIi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_prold128, "V4iV4iIi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prold256, "V8iV8iIi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prolq128, "V2OiV2OiIi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prolq256, "V4OiV4OiIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolvd512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prolvq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prord512, "V16iV16iIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prorq512, "V8OiV8OiIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_prolvd512, "V16iV16iV16i", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_prolvq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_prord512, "V16iV16iIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_prorq512, "V8OiV8OiIi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_prolvd128, "V4iV4iV4i", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prolvd256, "V8iV8iV8i", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prolvq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
@@ -1298,65 +1298,65 @@ TARGET_BUILTIN(__builtin_ia32_prord128, "V4iV4iIi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prord256, "V8iV8iIi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prorq128, "V2OiV2OiIi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prorq256, "V4OiV4OiIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorvd512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prorvq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_prorvd512, "V16iV16iV16i", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_prorvq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_prorvd128, "V4iV4iV4i", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prorvd256, "V8iV8iV8i", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prorvq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prorvq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pshufhw512, "V32sV32sIi", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pshuflw512, "V32sV32sIi", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psllv32hi, "V32sV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psllw512, "V32sV32sV8s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psllwi512, "V32sV32si", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pshufhw512, "V32sV32sIi", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_pshuflw512, "V32sV32sIi", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_psllv32hi, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_psllw512, "V32sV32sV8s", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_psllwi512, "V32sV32si", "ncV:512:", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_psllv16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_psllv8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pslldi512, "V16iV16ii", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psllqi512, "V8OiV8Oii", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrlv32hi, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pslldi512, "V16iV16ii", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psllqi512, "V8OiV8Oii", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psrlv32hi, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_psrlv16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_psrlv8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psrldi512, "V16iV16ii", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrlqi512, "V8OiV8Oii", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrav32hi, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psrldi512, "V16iV16ii", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psrlqi512, "V8OiV8Oii", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psrav32hi, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_psrav16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_psrav8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_psravq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_psravq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psraw512, "V32sV32sV8s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psrawi512, "V32sV32si", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psrlw512, "V32sV32sV8s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psrlwi512, "V32sV32si", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pslldqi512_byteshift, "V8OiV8OiIi", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psrldqi512_byteshift, "V8OiV8OiIi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psraw512, "V32sV32sV8s", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_psrawi512, "V32sV32si", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_psrlw512, "V32sV32sV8s", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_psrlwi512, "V32sV32si", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_pslldqi512_byteshift, "V8OiV8OiIi", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_psrldqi512_byteshift, "V8OiV8OiIi", "ncV:512:", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4iC*V4iUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8iC*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa32load512_mask, "V16iV16iC*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_movdqa32store512_mask, "vV16i*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_movdqa64load512_mask, "V8OiV8OiC*V8OiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_movdqa64store512_mask, "vV8Oi*V8OiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_movdqa32load512_mask, "V16iV16iC*V16iUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_movdqa32store512_mask, "vV16i*V16iUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_movdqa64load512_mask, "V8OiV8OiC*V8OiUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_movdqa64store512_mask, "vV8Oi*V8OiUc", "nV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_movdqa32store128_mask, "vV4i*V4iUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa32store256_mask, "vV8i*V8iUc", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa64load128_mask, "V2OiV2OiC*V2OiUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4OiV4OiC*V4OiUc", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2Oi*V2OiUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4Oi*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512ifma")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512ifma")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512ifma,evex512")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512ifma,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512ifma,avx512vl|avxifma")
 TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512ifma,avx512vl|avxifma")
 TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512ifma,avx512vl|avxifma")
 TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512ifma,avx512vl|avxifma")
 TARGET_BUILTIN(__builtin_ia32_vcomisd, "iV2dV2dIiIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kunpckdi, "UOiUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kunpckdi, "UOiUOiUOi", "nc", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_kunpcksi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32sC*V32sUi", "nV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64cC*V64cUOi", "nV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_mask, "V8dV8dV8dV8OiIiUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_maskz, "V8dV8dV8dV8OiIiUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmps512_mask, "V16fV16fV16fV16iIiUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmps512_maskz, "V16fV16fV16fV16iIiUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32sC*V32sUi", "nV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64cC*V64cUOi", "nV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_mask, "V8dV8dV8dV8OiIiUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_maskz, "V8dV8dV8dV8OiIiUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_fixupimmps512_mask, "V16fV16fV16fV16iIiUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_fixupimmps512_maskz, "V16fV16fV16fV16iIiUsIi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_fixupimmsd_mask, "V2dV2dV2dV2OiIiUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_fixupimmsd_maskz, "V2dV2dV2dV2OiIiUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_fixupimmss_mask, "V4fV4fV4fV4iIiUcIi", "ncV:128:", "avx512f")
@@ -1391,8 +1391,8 @@ TARGET_BUILTIN(__builtin_ia32_loadupd128_mask, "V2dV2dC*V2dUc", "nV:128:", "avx5
 TARGET_BUILTIN(__builtin_ia32_loadupd256_mask, "V4dV4dC*V4dUc", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loadups128_mask, "V4fV4fC*V4fUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loadups256_mask, "V8fV8fC*V8fUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storedquhi512_mask, "vV32s*V32sUi", "nV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_storedquqi512_mask, "vV64c*V64cUOi", "nV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_storedquhi512_mask, "vV32s*V32sUi", "nV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_storedquqi512_mask, "vV64c*V64cUOi", "nV:512:", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_storedquhi128_mask, "vV8s*V8sUc", "nV:128:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_storedquhi256_mask, "vV16s*V16sUs", "nV:256:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, "vV16c*V16cUs", "nV:128:", "avx512vl,avx512bw")
@@ -1427,38 +1427,38 @@ TARGET_BUILTIN(__builtin_ia32_vcvttsd2si32, "iV2dIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi32, "UiV2dIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvttss2si32, "iV4fIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvttss2usi32, "UiV4fIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermilpd512, "V8dV8dIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermilps512, "V16fV16fIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512, "V8dV8dV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarps512, "V16fV16fV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermilpd512, "V8dV8dIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vpermilps512, "V16fV16fIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512, "V8dV8dV8Oi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarps512, "V16fV16fV16i", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_rndscalesd_round_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_rndscaless_round_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scalefpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scalefps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scalefpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_scalefps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_scalefsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_scalefss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psradi512, "V16iV16ii", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psraqi512, "V8OiV8Oii", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psradi512, "V16iV16ii", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psraqi512, "V8OiV8Oii", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_psraq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_psraq256, "V4OiV4OiV2Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_psraqi128, "V2OiV2Oii", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_psraqi256, "V4OiV4Oii", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pslld512, "V16iV16iV4i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psllq512, "V8OiV8OiV2Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psllv16si, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psllv8di, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrad512, "V16iV16iV4i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psraq512, "V8OiV8OiV2Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrav16si, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrav8di, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrld512, "V16iV16iV4i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrlq512, "V8OiV8OiV2Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrlv16si, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrlv8di, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pternlogd512_mask, "V16iV16iV16iV16iIiUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pternlogd512_maskz, "V16iV16iV16iV16iIiUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pternlogq512_mask, "V8OiV8OiV8OiV8OiIiUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pternlogq512_maskz, "V8OiV8OiV8OiV8OiIiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pslld512, "V16iV16iV4i", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psllq512, "V8OiV8OiV2Oi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psllv16si, "V16iV16iV16i", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psllv8di, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psrad512, "V16iV16iV4i", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psraq512, "V8OiV8OiV2Oi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psrav16si, "V16iV16iV16i", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psrav8di, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psrld512, "V16iV16iV4i", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psrlq512, "V8OiV8OiV2Oi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psrlv16si, "V16iV16iV16i", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_psrlv8di, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pternlogd512_mask, "V16iV16iV16iV16iIiUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pternlogd512_maskz, "V16iV16iV16iV16iIiUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pternlogq512_mask, "V8OiV8OiV8OiV8OiIiUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pternlogq512_maskz, "V8OiV8OiV8OiV8OiIiUc", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_pternlogd128_mask, "V4iV4iV4iV4iIiUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pternlogd128_maskz, "V4iV4iV4iV4iIiUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pternlogd256_mask, "V8iV8iV8iV8iIiUc", "ncV:256:", "avx512vl")
@@ -1467,12 +1467,12 @@ TARGET_BUILTIN(__builtin_ia32_pternlogq128_mask, "V2OiV2OiV2OiV2OiIiUc", "ncV:12
 TARGET_BUILTIN(__builtin_ia32_pternlogq128_maskz, "V2OiV2OiV2OiV2OiIiUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pternlogq256_mask, "V4OiV4OiV4OiV4OiIiUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pternlogq256_maskz, "V4OiV4OiV4OiV4OiIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_shuf_f32x4, "V16fV16fV16fIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_shuf_f64x2, "V8dV8dV8dIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_shuf_i32x4, "V16iV16iV16iIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_shuf_i64x2, "V8OiV8OiV8OiIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_shufpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_shufps512, "V16fV16fV16fIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_shuf_f32x4, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_shuf_f64x2, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_shuf_i32x4, "V16iV16iV16iIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_shuf_i64x2, "V8OiV8OiV8OiIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_shufpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_shufps512, "V16fV16fV16fIi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_256, "V8fV8fV8fIi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_256, "V4dV4dV4dIi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_256, "V8iV8iV8iIi", "ncV:256:", "avx512vl")
@@ -1483,13 +1483,13 @@ TARGET_BUILTIN(__builtin_ia32_rsqrt14pd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx
 TARGET_BUILTIN(__builtin_ia32_rsqrt14pd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rsqrt14ps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rsqrt14ps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtb2mask512, "UOiV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2b512, "V64cUOi", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2w512, "V32sUi", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cvtd2mask512, "UsV16i", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2d512, "V16iUs", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2q512, "V8OiUc", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtq2mask512, "UcV8Oi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtb2mask512, "UOiV64c", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2b512, "V64cUOi", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2w512, "V32sUi", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtd2mask512, "UsV16i", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2d512, "V16iUs", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2q512, "V8OiUc", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtq2mask512, "UcV8Oi", "ncV:512:", "avx512dq,evex512")
 TARGET_BUILTIN(__builtin_ia32_cvtb2mask128, "UsV16c", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtb2mask256, "UiV32c", "ncV:256:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtmask2b128, "V16cUs", "ncV:128:", "avx512bw,avx512vl")
@@ -1504,17 +1504,17 @@ TARGET_BUILTIN(__builtin_ia32_cvtmask2q128, "V2OiUc", "ncV:128:", "avx512dq,avx5
 TARGET_BUILTIN(__builtin_ia32_cvtmask2q256, "V4OiUc", "ncV:256:", "avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtq2mask128, "UcV2Oi", "ncV:128:", "avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtq2mask256, "UcV4Oi", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovsdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb512_mask, "V16cV8OiV16cUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb512mem_mask, "vV16c*V8OiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd512_mask, "V8iV8OiV8iUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd512mem_mask, "vV8i*V8OiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw512_mask, "V8sV8OiV8sUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw512mem_mask, "vV8s*V8OiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovsdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovsdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovsqb512_mask, "V16cV8OiV16cUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovsqb512mem_mask, "vV16c*V8OiUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovsqd512_mask, "V8iV8OiV8iUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovsqd512mem_mask, "vV8i*V8OiUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovsqw512_mask, "V8sV8OiV8sUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovsqw512mem_mask, "vV8s*V8OiUc", "nV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_pmovsdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovsdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovswb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw")
@@ -1537,17 +1537,17 @@ TARGET_BUILTIN(__builtin_ia32_pmovsqw128_mask, "V8sV2OiV8sUc", "ncV:128:", "avx5
 TARGET_BUILTIN(__builtin_ia32_pmovsqw128mem_mask, "vV8s*V2OiUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovsqw256_mask, "V8sV4OiV8sUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovsqw256mem_mask, "vV8s*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovuswb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovusdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb512_mask, "V16cV8OiV16cUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb512mem_mask, "vV16c*V8OiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd512_mask, "V8iV8OiV8iUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd512mem_mask, "vV8i*V8OiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw512_mask, "V8sV8OiV8sUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw512mem_mask, "vV8s*V8OiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovusdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovuswb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovusdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovusdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovusqb512_mask, "V16cV8OiV16cUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovusqb512mem_mask, "vV16c*V8OiUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovusqd512_mask, "V8iV8OiV8iUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovusqd512mem_mask, "vV8i*V8OiUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovusqw512_mask, "V8sV8OiV8sUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovusqw512mem_mask, "vV8s*V8OiUc", "nV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_pmovusdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovuswb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw")
@@ -1570,17 +1570,17 @@ TARGET_BUILTIN(__builtin_ia32_pmovusqw128_mask, "V8sV2OiV8sUc", "ncV:128:", "avx
 TARGET_BUILTIN(__builtin_ia32_pmovusqw128mem_mask, "vV8s*V2OiUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4OiV8sUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8OiV16cUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8OiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqd512_mask, "V8iV8OiV8iUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8OiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8OiV8sUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8OiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8OiV16cUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8OiUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovqd512_mask, "V8iV8OiV8iUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8OiUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8OiV8sUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8OiUc", "nV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovwb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl")
@@ -1602,36 +1602,36 @@ TARGET_BUILTIN(__builtin_ia32_pmovqw128_mask, "V8sV2OiV8sUc", "ncV:128:", "avx51
 TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2OiUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4OiV8sUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4OiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf32x8_mask, "V8fV16fIiV8fUc", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extractf64x2_512_mask, "V2dV8dIiV2dUc", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti32x8_mask, "V8iV16iIiV8iUc", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti64x2_512_mask, "V2OiV8OiIiV2OiUc", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti32x4_mask, "V4iV16iIiV4iUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_extracti64x4_mask, "V4OiV8OiIiV4OiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_extractf32x8_mask, "V8fV16fIiV8fUc", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_extractf64x2_512_mask, "V2dV8dIiV2dUc", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_extracti32x8_mask, "V8iV16iIiV8iUc", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_extracti64x2_512_mask, "V2OiV8OiIiV2OiUc", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_extracti32x4_mask, "V4iV16iIiV4iUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_extracti64x4_mask, "V4OiV8OiIiV4OiUc", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_extractf64x2_256_mask, "V2dV4dIiV2dUc", "ncV:256:", "avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_extracti64x2_256_mask, "V2OiV4OiIiV2OiUc", "ncV:256:", "avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_extractf32x4_256_mask, "V4fV8fIiV4fUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_extracti32x4_256_mask, "V4iV8iIiV4iUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_insertf32x8, "V16fV16fV8fIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_insertf64x2_512, "V8dV8dV2dIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_inserti32x8, "V16iV16iV8iIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_inserti64x2_512, "V8OiV8OiV2OiIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_insertf64x4, "V8dV8dV4dIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_inserti64x4, "V8OiV8OiV4OiIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_insertf32x8, "V16fV16fV8fIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_insertf64x2_512, "V8dV8dV2dIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_inserti32x8, "V16iV16iV8iIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_inserti64x2_512, "V8OiV8OiV2OiIi", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_insertf64x4, "V8dV8dV4dIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_inserti64x4, "V8OiV8OiV4OiIi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_insertf64x2_256, "V4dV4dV2dIi", "ncV:256:", "avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_inserti64x2_256, "V4OiV4OiV2OiIi", "ncV:256:", "avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_insertf32x4_256, "V8fV8fV4fIi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_inserti32x4_256, "V8iV8iV4iIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_insertf32x4, "V16fV16fV4fIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_inserti32x4, "V16iV16iV4iIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_insertf32x4, "V16fV16fV4fIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_inserti32x4, "V16iV16iV4iIi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_getmantpd128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getmantpd256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getmantps128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getmantps256_mask, "V8fV8fIiV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getmantpd512_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_getmantps512_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_getexppd512_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_getexpps512_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_getmantpd512_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_getmantps512_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_getexppd512_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_getexpps512_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask,  "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vfmaddss3_maskz, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
@@ -1640,14 +1640,14 @@ TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_maskz, "V2dV2dV2dV2dUcIi", "ncV:128:", "
 TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask3, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vfmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vfmsubss3_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permdf512, "V8dV8dIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permdi512, "V8OiV8OiIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvarhi512, "V32sV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_permvardf512, "V8dV8dV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvardi512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvarsf512, "V16fV16fV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvarsi512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvarqi512, "V64cV64cV64c", "ncV:512:", "avx512vbmi")
+TARGET_BUILTIN(__builtin_ia32_permdf512, "V8dV8dIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_permdi512, "V8OiV8OiIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_permvarhi512, "V32sV32sV32s", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_permvardf512, "V8dV8dV8Oi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_permvardi512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_permvarsf512, "V16fV16fV16i", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_permvarsi512, "V16iV16iV16i", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_permvarqi512, "V64cV64cV64c", "ncV:512:", "avx512vbmi,evex512")
 TARGET_BUILTIN(__builtin_ia32_permvarqi128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_permvarqi256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_permvarhi128, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
@@ -1658,138 +1658,138 @@ TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc", "ncV:128:", "avx51
 TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc", "ncV:256:", "avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc", "ncV:128:", "avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fpclassps256_mask, "UcV8fIiUc", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fpclassps512_mask, "UsV16fIiUs", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_fpclassps512_mask, "UsV16fIiUs", "ncV:512:", "avx512dq,evex512")
+TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "ncV:512:", "avx512dq,evex512")
 TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "ncV:128:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kaddqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kaddhi, "UsUsUs", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kaddsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kadddi, "UOiUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kadddi, "UOiUOiUOi", "nc", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kanddi, "UOiUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kanddi, "UOiUOiUOi", "nc", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_kandnqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kandnsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kandndi, "UOiUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kandndi, "UOiUOiUOi", "nc", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_korqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kordi, "UOiUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_kortestcqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kortestzqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kortestcsi, "iUiUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iUOiUOi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iUOiUOi", "nc", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_ktestcqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestzqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestchi, "iUsUs", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestzhi, "iUsUs", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestcsi, "iUiUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_ktestzsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iUOiUOi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iUOiUOi", "nc", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxnorsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kxnordi, "UOiUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kxnordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_kxorqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kxordi, "UOiUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kxordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_kshiftliqi, "UcUcIUi", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kshiftlihi, "UsUsIUi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kshiftlisi, "UiUiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "UOiUOiIUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "UOiUOiIUi", "nc", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_kshiftriqi, "UcUcIUi", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kshiftrihi, "UsUsIUi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kshiftrisi, "UiUiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftridi, "UOiUOiIUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kshiftridi, "UOiUOiIUi", "nc", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_kmovb, "UcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kmovw, "UsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kmovd, "UiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kmovq, "UOiUOi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kmovq, "UOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_dbpsadbw512, "V32sV64cV64cIi", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8OiV64cV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8OiV8OiV8OiUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compresshi512_mask, "V32sV32sV32sUi", "ncV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressqi512_mask, "V64cV64cV64cUOi", "ncV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_dbpsadbw512, "V32sV64cV64cIi", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8OiV64cV64c", "ncV:512:", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8OiV8OiV8OiUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_compresshi512_mask, "V32sV32sV32sUi", "ncV:512:", "avx512vbmi2,evex512")
+TARGET_BUILTIN(__builtin_ia32_compressqi512_mask, "V64cV64cV64cUOi", "ncV:512:", "avx512vbmi2,evex512")
+TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pshufd512, "V16iV16iIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8OiV8OiV8OiUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expandhi512_mask, "V32sV32sV32sUi", "ncV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandqi512_mask, "V64cV64cV64cUOi", "ncV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8OiV8OiC*V8OiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expandloadhi512_mask, "V32sV32sC*V32sUi", "nV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandloadqi512_mask, "V64cV64cC*V64cUOi", "nV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandloadsf512_mask, "V16fV16fC*V16fUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expandloadsi512_mask, "V16iV16iC*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8Oi*V8OiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compressstorehi512_mask, "vV32s*V32sUi", "nV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressstoreqi512_mask, "vV64c*V64cUOi", "nV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compressstoresi512_mask, "vV16i*V16iUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pshufd512, "V16iV16iIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8OiV8OiV8OiUc", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_expandhi512_mask, "V32sV32sV32sUi", "ncV:512:", "avx512vbmi2,evex512")
+TARGET_BUILTIN(__builtin_ia32_expandqi512_mask, "V64cV64cV64cUOi", "ncV:512:", "avx512vbmi2,evex512")
+TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8OiV8OiC*V8OiUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_expandloadhi512_mask, "V32sV32sC*V32sUi", "nV:512:", "avx512vbmi2,evex512")
+TARGET_BUILTIN(__builtin_ia32_expandloadqi512_mask, "V64cV64cC*V64cUOi", "nV:512:", "avx512vbmi2,evex512")
+TARGET_BUILTIN(__builtin_ia32_expandloadsf512_mask, "V16fV16fC*V16fUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_expandloadsi512_mask, "V16iV16iC*V16iUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8Oi*V8OiUc", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_compressstorehi512_mask, "vV32s*V32sUi", "nV:512:", "avx512vbmi2,evex512")
+TARGET_BUILTIN(__builtin_ia32_compressstoreqi512_mask, "vV64c*V64cUOi", "nV:512:", "avx512vbmi2,evex512")
+TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_compressstoresi512_mask, "vV16i*V16iUs", "nV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2ps_mask, "V4fV8sV4fUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256_mask, "V8fV8sV8fUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtps2ph_mask, "V8sV4fIiV8sUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256_mask, "V8sV8fIiV8sUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtw2mask512, "UiV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cvtw2mask512, "UiV32s", "ncV:512:", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_cvtw2mask128, "UcV8s", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtw2mask256, "UsV16s", "ncV:256:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_round_mask, "V4fV4fV2dV4fUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtsi2ss32, "V4fV4fiIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtss2sd_round_mask, "V2dV2dV4fV2dUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512, "V64cV64cV64c", "ncV:512:", "avx512vbmi")
+TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512, "V64cV64cV64c", "ncV:512:", "avx512vbmi,evex512")
 TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl")
 
 // bf16 intrinsics
 TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_128, "V8yV4fV4f", "ncV:128:", "avx512bf16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_256, "V16yV8fV8f", "ncV:256:", "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_512, "V32yV16fV16f", "ncV:512:", "avx512bf16")
+TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_512, "V32yV16fV16f", "ncV:512:", "avx512bf16,evex512")
 TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_128_mask, "V8yV4fV8yUc", "ncV:128:", "avx512bf16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_256_mask, "V8yV8fV8yUc", "ncV:256:", "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_512_mask, "V16yV16fV16yUs", "ncV:512:", "avx512bf16")
+TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_512_mask, "V16yV16fV16yUs", "ncV:512:", "avx512bf16,evex512")
 TARGET_BUILTIN(__builtin_ia32_dpbf16ps_128, "V4fV4fV8yV8y", "ncV:128:", "avx512bf16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_dpbf16ps_256, "V8fV8fV16yV16y", "ncV:256:", "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_dpbf16ps_512, "V16fV16fV32yV32y", "ncV:512:", "avx512bf16")
+TARGET_BUILTIN(__builtin_ia32_dpbf16ps_512, "V16fV16fV32yV32y", "ncV:512:", "avx512bf16,evex512")
 TARGET_BUILTIN(__builtin_ia32_cvtsbf162ss_32, "fy", "nc", "avx512bf16")
 
-TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_512, "vV8OiV8OiUc*Uc*", "nV:512:", "avx512vp2intersect")
+TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_512, "vV8OiV8OiUc*Uc*", "nV:512:", "avx512vp2intersect,evex512")
 TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_256, "vV4OiV4OiUc*Uc*", "nV:256:", "avx512vp2intersect,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_128, "vV2OiV2OiUc*Uc*", "nV:128:", "avx512vp2intersect,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_512, "vV16iV16iUs*Us*", "nV:512:", "avx512vp2intersect")
+TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_512, "vV16iV16iUs*Us*", "nV:512:", "avx512vp2intersect,evex512")
 TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_256, "vV8iV8iUc*Uc*", "nV:256:", "avx512vp2intersect,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_128, "vV4iV4iUc*Uc*", "nV:128:", "avx512vp2intersect,avx512vl")
 
 // AVX512 fp16 intrinsics
 TARGET_BUILTIN(__builtin_ia32_vcomish,       "iV8xV8xIiIi",    "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_addph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_subph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_mulph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_divph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_maxph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_minph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_addph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16,evex512")
+TARGET_BUILTIN(__builtin_ia32_subph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16,evex512")
+TARGET_BUILTIN(__builtin_ia32_mulph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16,evex512")
+TARGET_BUILTIN(__builtin_ia32_divph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16,evex512")
+TARGET_BUILTIN(__builtin_ia32_maxph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16,evex512")
+TARGET_BUILTIN(__builtin_ia32_minph512,      "V32xV32xV32xIi", "ncV:512:", "avx512fp16,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_minph256,      "V16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_minph128,      "V8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
@@ -1802,7 +1802,7 @@ TARGET_BUILTIN(__builtin_ia32_mulsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:",
 TARGET_BUILTIN(__builtin_ia32_subsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_maxsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_minsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_cmpph512_mask, "UiV32xV32xIiUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_cmpph512_mask, "UiV32xV32xIiUiIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_cmpph256_mask, "UsV16xV16xIiUs", "ncV:256:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmpph128_mask, "UcV8xV8xIiUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmpsh_mask, "UcV8xV8xIiUcIi", "ncV:128:", "avx512fp16")
@@ -1811,28 +1811,28 @@ TARGET_BUILTIN(__builtin_ia32_storesh128_mask, "vV8x*V8xUc", "nV:128:", "avx512f
 
 TARGET_BUILTIN(__builtin_ia32_rcpph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rcpph256_mask, "V16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rcpph512_mask, "V32xV32xV32xUi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_rcpph512_mask, "V32xV32xV32xUi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_rsqrtph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rsqrtph256_mask, "V16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rsqrtph512_mask, "V32xV32xV32xUi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_rsqrtph512_mask, "V32xV32xV32xUi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_getmantph128_mask, "V8xV8xIiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getmantph256_mask, "V16xV16xIiV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getmantph512_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_getmantph512_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_getexpph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpph256_mask, "V16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getexpph512_mask, "V32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_getexpph512_mask, "V32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_scalefph128_mask, "V8xV8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_scalefph256_mask, "V16xV16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scalefph512_mask, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_scalefph512_mask, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_rndscaleph_128_mask, "V8xV8xIiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscaleph_256_mask, "V16xV16xIiV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rndscaleph_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_rndscaleph_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_reduceph128_mask, "V8xV8xIiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_reduceph256_mask, "V16xV16xIiV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_reduceph512_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_reduceph512_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_rcpsh_mask, "V8xV8xV8xV8xUc", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_rsqrtsh_mask, "V8xV8xV8xV8xUc", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_getmantsh_round_mask, "V8xV8xV8xIiV8xUcIi", "ncV:128:", "avx512fp16")
@@ -1843,77 +1843,77 @@ TARGET_BUILTIN(__builtin_ia32_reducesh_mask, "V8xV8xV8xV8xUcIiIi", "ncV:128:", "
 
 TARGET_BUILTIN(__builtin_ia32_sqrtph, "V8xV8x", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_sqrtph256, "V16xV16x", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_sqrtph512, "V32xV32xIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_sqrtph512, "V32xV32xIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_sqrtsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_fpclassph128_mask, "UcV8xIiUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fpclassph256_mask, "UsV16xIiUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fpclassph512_mask, "UiV32xIiUi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_fpclassph512_mask, "UiV32xIiUi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_fpclasssh_mask, "UcV8xIiUc", "ncV:128:", "avx512fp16")
 
 TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph128_mask, "V8xV2dV8xUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph256_mask, "V8xV4dV8xUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph512_mask, "V8xV8dV8xUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph512_mask, "V8xV8dV8xUcIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2pd128_mask, "V2dV8xV2dUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2pd256_mask, "V4dV8xV4dUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2pd512_mask, "V8dV8xV8dUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2pd512_mask, "V8dV8xV8dUcIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtsh2ss_round_mask, "V4fV4fV8xV4fUcIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_vcvtss2sh_round_mask, "V8xV8xV4fV8xUcIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_vcvtsd2sh_round_mask, "V8xV8xV2dV8xUcIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_vcvtsh2sd_round_mask, "V2dV2dV8xV2dUcIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2w128_mask, "V8sV8xV8sUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2w256_mask, "V16sV16xV16sUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvttph2w128_mask, "V8sV8xV8sUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvttph2w256_mask, "V16sV16xV16sUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtw2ph128_mask, "V8xV8sV8xUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtw2ph256_mask, "V16xV16sV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtw2ph512_mask, "V32xV32sV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtw2ph512_mask, "V32xV32sV32xUiIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2uw128_mask, "V8UsV8xV8UsUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2uw256_mask, "V16UsV16xV16UsUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvttph2uw128_mask, "V8UsV8xV8UsUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvttph2uw256_mask, "V16UsV16xV16UsUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph128_mask, "V8xV8UsV8xUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph256_mask, "V16xV16UsV16xUs", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph512_mask, "V32xV32UsV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph512_mask, "V32xV32UsV32xUiIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2dq128_mask, "V4iV8xV4iUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2dq256_mask, "V8iV8xV8iUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2udq128_mask, "V4UiV8xV4UiUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2udq256_mask, "V8UiV8xV8UiUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph128_mask, "V8xV4iV8xUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph256_mask, "V8xV8iV8xUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph512_mask, "V16xV16iV16xUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph512_mask, "V16xV16iV16xUsIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph128_mask, "V8xV4UiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph256_mask, "V8xV8UiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph512_mask, "V16xV16UiV16xUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph512_mask, "V16xV16UiV16xUsIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvttph2dq128_mask, "V4iV8xV4iUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvttph2dq256_mask, "V8iV8xV8iUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvttph2udq128_mask, "V4UiV8xV4UiUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvttph2udq256_mask, "V8UiV8xV8UiUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph128_mask, "V8xV2OiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph256_mask, "V8xV4OiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph512_mask, "V8xV8OiV8xUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph512_mask, "V8xV8OiV8xUcIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2qq128_mask, "V2OiV8xV2OiUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2qq256_mask, "V4OiV8xV4OiUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph128_mask, "V8xV2UOiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph256_mask, "V8xV4UOiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph512_mask, "V8xV8UOiV8xUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph512_mask, "V8xV8UOiV8xUcIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq128_mask, "V2UOiV8xV2UOiUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq256_mask, "V4UOiV8xV4UOiUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvttph2qq128_mask, "V2OiV8xV2OiUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvttph2qq256_mask, "V4OiV8xV4OiUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq128_mask, "V2UOiV8xV2UOiUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq256_mask, "V4UOiV8xV4UOiUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtsh2si32, "iV8xIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_vcvtsh2usi32, "UiV8xIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_vcvtusi2sh, "V8xV8xUiIi", "ncV:128:", "avx512fp16")
@@ -1923,24 +1923,24 @@ TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi32, "UiV8xIi", "ncV:128:", "avx512fp16"
 
 TARGET_BUILTIN(__builtin_ia32_vcvtph2psx128_mask, "V4fV8xV4fUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2psx256_mask, "V8fV8xV8fUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2psx512_mask, "V16fV16xV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2psx512_mask, "V16fV16xV16fUsIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vcvtps2phx128_mask, "V8xV4fV8xUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtps2phx256_mask, "V8xV8fV8xUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2phx512_mask, "V16xV16fV16xUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2phx512_mask, "V16xV16fV16xUsIi", "ncV:512:", "avx512fp16,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_vfmaddph, "V8xV8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vfmaddph256, "V16xV16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfmaddph512_mask,  "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfmaddph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfmaddph512_maskz, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddph512_mask,  "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddph512_maskz, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vfmaddsubph, "V8xV8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vfmaddsubph256, "V16xV16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubph512_mask, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubph512_maskz, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubph512_mask, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubph512_maskz, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
 
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfmsubph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmsubph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16,evex512")
 
 TARGET_BUILTIN(__builtin_ia32_vfmaddsh3_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_vfmaddsh3_maskz, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
@@ -1951,16 +1951,16 @@ TARGET_BUILTIN(__builtin_ia32_vfmaddcph128_mask,  "V4fV4fV4fV4fUc", "ncV:128:",
 TARGET_BUILTIN(__builtin_ia32_vfmaddcph128_maskz,  "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_mask,  "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_maskz,  "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcph512_mask,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcph512_maskz,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfmaddcph512_mask3,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph512_mask,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph512_maskz,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph512_mask3,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vfcmaddcph128_mask,  "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vfcmaddcph128_maskz,  "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_mask,  "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_maskz,  "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcph512_mask,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcph512_maskz,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_vfcmaddcph512_mask3,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph512_mask,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph512_maskz,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph512_mask3,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_mask,   "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_maskz,   "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_mask,  "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
@@ -1974,60 +1974,60 @@ TARGET_BUILTIN(__builtin_ia32_vfmulcsh_mask,   "V4fV4fV4fV4fUcIi", "ncV:128:", "
 TARGET_BUILTIN(__builtin_ia32_vfcmulcsh_mask,  "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_vfmulcph128_mask,  "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vfmulcph256_mask,  "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfmulcph512_mask,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmulcph512_mask,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_vfcmulcph128_mask,  "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vfcmulcph256_mask,  "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vfcmulcph512_mask,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfcmulcph512_mask,  "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")
 
 // generic select intrinsics
 TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectb_512, "V64cUOiV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_selectb_512, "V64cUOiV64cV64c", "ncV:512:", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_selectw_128, "V8sUcV8sV8s", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectw_256, "V16sUsV16sV16s", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectw_512, "V32sUiV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_selectw_512, "V32sUiV32sV32s", "ncV:512:", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_selectd_128, "V4iUcV4iV4i", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectd_256, "V8iUcV8iV8i", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectd_512, "V16iUsV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_selectd_512, "V16iUsV16iV16i", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_selectph_128, "V8xUcV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectph_256, "V16xUsV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectph_512, "V32xUiV32xV32x", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_selectph_512, "V32xUiV32xV32x", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_selectpbf_128, "V8yUcV8yV8y", "ncV:128:", "avx512bf16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectpbf_256, "V16yUsV16yV16y", "ncV:256:", "avx512bf16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectpbf_512, "V32yUiV32yV32y", "ncV:512:", "avx512bf16")
+TARGET_BUILTIN(__builtin_ia32_selectpbf_512, "V32yUiV32yV32y", "ncV:512:", "avx512bf16,evex512")
 TARGET_BUILTIN(__builtin_ia32_selectq_128, "V2OiUcV2OiV2Oi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectq_256, "V4OiUcV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectq_512, "V8OiUcV8OiV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_selectq_512, "V8OiUcV8OiV8Oi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_selectps_128, "V4fUcV4fV4f", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectps_256, "V8fUcV8fV8f", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectps_512, "V16fUsV16fV16f", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_selectps_512, "V16fUsV16fV16f", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_selectpd_128, "V2dUcV2dV2d", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectpd_256, "V4dUcV4dV4d", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectpd_512, "V8dUcV8dV8d", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_selectpd_512, "V8dUcV8dV8d", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_selectsh_128, "V8xUcV8xV8x", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_selectsbf_128, "V8yUcV8yV8y", "ncV:128:", "avx512bf16")
 TARGET_BUILTIN(__builtin_ia32_selectss_128, "V4fUcV4fV4f", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_selectsd_128, "V2dUcV2dV2d", "ncV:128:", "avx512f")
 
 // generic reduction intrinsics
-TARGET_BUILTIN(__builtin_ia32_reduce_fadd_pd512, "ddV8d", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ps512, "ffV16f", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph512, "xxV32x", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_reduce_fadd_pd512, "ddV8d", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ps512, "ffV16f", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph512, "xxV32x", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph256, "xxV16x", "ncV:256:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph128, "xxV8x", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmax_pd512, "dV8d", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ps512, "fV16f", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph512, "xV32x", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmax_pd512, "dV8d", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ps512, "fV16f", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph512, "xV32x", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph256, "xV16x", "ncV:256:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph128, "xV8x", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmin_pd512, "dV8d", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ps512, "fV16f", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph512, "xV32x", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmin_pd512, "dV8d", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ps512, "fV16f", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph512, "xV32x", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph256, "xV16x", "ncV:256:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph128, "xV8x", "ncV:128:", "avx512fp16,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmul_pd512, "ddV8d", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ps512, "ffV16f", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph512, "xxV32x", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmul_pd512, "ddV8d", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ps512, "ffV16f", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph512, "xxV32x", "ncV:512:", "avx512fp16,evex512")
 TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph256, "xxV16x", "ncV:256:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph128, "xxV8x", "ncV:128:", "avx512fp16,avx512vl")
 

diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 4bc292c1fa1620c..635c40f1a9278ed 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5755,6 +5755,8 @@ def mcx16 : Flag<["-"], "mcx16">, Group<m_x86_Features_Group>;
 def mno_cx16 : Flag<["-"], "mno-cx16">, Group<m_x86_Features_Group>;
 def menqcmd : Flag<["-"], "menqcmd">, Group<m_x86_Features_Group>;
 def mno_enqcmd : Flag<["-"], "mno-enqcmd">, Group<m_x86_Features_Group>;
+def mevex512 : Flag<["-"], "mevex512">, Group<m_x86_Features_Group>;
+def mno_evex512 : Flag<["-"], "mno-evex512">, Group<m_x86_Features_Group>;
 def mf16c : Flag<["-"], "mf16c">, Group<m_x86_Features_Group>;
 def mno_f16c : Flag<["-"], "mno-f16c">, Group<m_x86_Features_Group>;
 def mfma : Flag<["-"], "mfma">, Group<m_x86_Features_Group>;

diff  --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 9c464a3afdf7679..022d5753135e160 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -119,6 +119,8 @@ bool X86TargetInfo::initFeatureMap(
     setFeatureEnabled(Features, F, true);
 
   std::vector<std::string> UpdatedFeaturesVec;
+  bool HasEVEX512 = true;
+  bool HasAVX512F = false;
   for (const auto &Feature : FeaturesVec) {
     // Expand general-regs-only to -x86, -mmx and -sse
     if (Feature == "+general-regs-only") {
@@ -128,8 +130,17 @@ bool X86TargetInfo::initFeatureMap(
       continue;
     }
 
+    if (!HasAVX512F && Feature.substr(0, 7) == "+avx512")
+      HasAVX512F = true;
+    if (HasAVX512F && Feature == "-avx512f")
+      HasAVX512F = false;
+    if (HasEVEX512 && Feature == "-evex512")
+      HasEVEX512 = false;
+
     UpdatedFeaturesVec.push_back(Feature);
   }
+  if (HasAVX512F && HasEVEX512)
+    UpdatedFeaturesVec.push_back("+evex512");
 
   if (!TargetInfo::initFeatureMap(Features, Diags, CPU, UpdatedFeaturesVec))
     return false;
@@ -228,6 +239,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasF16C = true;
     } else if (Feature == "+gfni") {
       HasGFNI = true;
+    } else if (Feature == "+evex512") {
+      HasEVEX512 = true;
     } else if (Feature == "+avx512cd") {
       HasAVX512CD = true;
     } else if (Feature == "+avx512vpopcntdq") {
@@ -731,6 +744,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   if (HasGFNI)
     Builder.defineMacro("__GFNI__");
 
+  if (HasEVEX512)
+    Builder.defineMacro("__EVEX512__");
   if (HasAVX512CD)
     Builder.defineMacro("__AVX512CD__");
   if (HasAVX512VPOPCNTDQ)
@@ -986,6 +1001,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
       .Case("crc32", true)
       .Case("cx16", true)
       .Case("enqcmd", true)
+      .Case("evex512", true)
       .Case("f16c", true)
       .Case("fma", true)
       .Case("fma4", true)
@@ -1093,6 +1109,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("cx8", HasCX8)
       .Case("cx16", HasCX16)
       .Case("enqcmd", HasENQCMD)
+      .Case("evex512", HasEVEX512)
       .Case("f16c", HasF16C)
       .Case("fma", HasFMA)
       .Case("fma4", XOPLevel >= FMA4)
@@ -1533,8 +1550,9 @@ bool X86TargetInfo::validateOperandSize(const llvm::StringMap<bool> &FeatureMap,
       return Size <= 64;
     case 'z':
       // XMM0/YMM/ZMM0
-      if (hasFeatureEnabled(FeatureMap, "avx512f"))
-        // ZMM0 can be used if target supports AVX512F.
+      if (hasFeatureEnabled(FeatureMap, "avx512f") &&
+          hasFeatureEnabled(FeatureMap, "evex512"))
+        // ZMM0 can be used if target supports AVX512F and EVEX512 is set.
         return Size <= 512U;
       else if (hasFeatureEnabled(FeatureMap, "avx"))
         // YMM0 can be used if target supports AVX.
@@ -1553,8 +1571,10 @@ bool X86TargetInfo::validateOperandSize(const llvm::StringMap<bool> &FeatureMap,
     break;
   case 'v':
   case 'x':
-    if (hasFeatureEnabled(FeatureMap, "avx512f"))
-      // 512-bit zmm registers can be used if target supports AVX512F.
+    if (hasFeatureEnabled(FeatureMap, "avx512f") &&
+        hasFeatureEnabled(FeatureMap, "evex512"))
+      // 512-bit zmm registers can be used if target supports AVX512F and
+      // EVEX512 is set.
       return Size <= 512U;
     else if (hasFeatureEnabled(FeatureMap, "avx"))
       // 256-bit ymm registers can be used if target supports AVX.

diff  --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 039c05893d2692e..b759c76fc95ca0c 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -95,6 +95,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasLWP = false;
   bool HasFMA = false;
   bool HasF16C = false;
+  bool HasEVEX512 = false;
   bool HasAVX512CD = false;
   bool HasAVX512VPOPCNTDQ = false;
   bool HasAVX512VNNI = false;

diff  --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp
index 9f5c3258d65cb47..2ea82397f11909e 100644
--- a/clang/lib/CodeGen/Targets/X86.cpp
+++ b/clang/lib/CodeGen/Targets/X86.cpp
@@ -1508,6 +1508,24 @@ static bool checkAVXParamFeature(DiagnosticsEngine &Diag,
   return false;
 }
 
+static bool checkAVX512ParamFeature(DiagnosticsEngine &Diag,
+                                    SourceLocation CallLoc,
+                                    const llvm::StringMap<bool> &CallerMap,
+                                    const llvm::StringMap<bool> &CalleeMap,
+                                    QualType Ty, bool IsArgument) {
+  bool Caller256 = CallerMap.lookup("avx512f") && !CallerMap.lookup("evex512");
+  bool Callee256 = CalleeMap.lookup("avx512f") && !CalleeMap.lookup("evex512");
+
+  // Forbid 512-bit or larger vector pass or return when we disabled ZMM
+  // instructions.
+  if (Caller256 || Callee256)
+    return Diag.Report(CallLoc, diag::err_avx_calling_convention)
+           << IsArgument << Ty << "evex512";
+
+  return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
+                              "avx512f", IsArgument);
+}
+
 static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx,
                           SourceLocation CallLoc,
                           const llvm::StringMap<bool> &CallerMap,
@@ -1515,8 +1533,8 @@ static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx,
                           bool IsArgument) {
   uint64_t Size = Ctx.getTypeSize(Ty);
   if (Size > 256)
-    return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
-                                "avx512f", IsArgument);
+    return checkAVX512ParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
+                                   IsArgument);
 
   if (Size > 128)
     return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx",

diff  --git a/clang/lib/Headers/avx512bf16intrin.h b/clang/lib/Headers/avx512bf16intrin.h
index a864c1e3350b164..ce1dd2ee5bdfe0e 100644
--- a/clang/lib/Headers/avx512bf16intrin.h
+++ b/clang/lib/Headers/avx512bf16intrin.h
@@ -20,7 +20,7 @@ typedef __bf16 __m512bh __attribute__((__vector_size__(64), __aligned__(64)));
 typedef __bf16 __bfloat16 __attribute__((deprecated("use __bf16 instead")));
 
 #define __DEFAULT_FN_ATTRS512 \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16,evex512"), \
                  __min_vector_width__(512)))
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16")))

diff  --git a/clang/lib/Headers/avx512bitalgintrin.h b/clang/lib/Headers/avx512bitalgintrin.h
index d4411d156ba51e6..bad265ceb7db23f 100644
--- a/clang/lib/Headers/avx512bitalgintrin.h
+++ b/clang/lib/Headers/avx512bitalgintrin.h
@@ -15,7 +15,10 @@
 #define __AVX512BITALGINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512bitalg,evex512"),                           \
+                 __min_vector_width__(512)))
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_popcnt_epi16(__m512i __A)

diff  --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index aaeb9364801c6c4..df3c7294fba7a08 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -18,7 +18,8 @@ typedef unsigned int __mmask32;
 typedef unsigned long long __mmask64;
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,evex512"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS64 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,evex512")))
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
 
 static __inline __mmask32 __DEFAULT_FN_ATTRS
@@ -27,7 +28,7 @@ _knot_mask32(__mmask32 __M)
   return __builtin_ia32_knotsi(__M);
 }
 
-static __inline __mmask64 __DEFAULT_FN_ATTRS
+static __inline __mmask64 __DEFAULT_FN_ATTRS64
 _knot_mask64(__mmask64 __M)
 {
   return __builtin_ia32_knotdi(__M);
@@ -39,7 +40,7 @@ _kand_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
 _kand_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
@@ -51,7 +52,7 @@ _kandn_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
 _kandn_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
@@ -63,7 +64,7 @@ _kor_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
 _kor_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
@@ -75,7 +76,7 @@ _kxnor_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
 _kxnor_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
@@ -87,7 +88,7 @@ _kxor_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
 _kxor_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
@@ -111,19 +112,19 @@ _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
   return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS64
 _kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
 {
   return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS64
 _kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
 {
   return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS64
 _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
   *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
   return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
@@ -147,19 +148,19 @@ _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
   return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS64
 _ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
 {
   return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS64
 _ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
 {
   return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS64
 _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
   *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
   return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
@@ -171,7 +172,7 @@ _kadd_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
 _kadd_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
@@ -194,7 +195,7 @@ _cvtmask32_u32(__mmask32 __A) {
   return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
 }
 
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS64
 _cvtmask64_u64(__mmask64 __A) {
   return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
 }
@@ -204,7 +205,7 @@ _cvtu32_mask32(unsigned int __A) {
   return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
 _cvtu64_mask64(unsigned long long __A) {
   return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
 }
@@ -214,7 +215,7 @@ _load_mask32(__mmask32 *__A) {
   return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
 _load_mask64(__mmask64 *__A) {
   return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
 }
@@ -224,7 +225,7 @@ _store_mask32(__mmask32 *__A, __mmask32 __B) {
   *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS64
 _store_mask64(__mmask64 *__A, __mmask64 __B) {
   *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
 }
@@ -1714,7 +1715,7 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
                                               (__v64qi) _mm512_setzero_si512());
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
 _mm512_kunpackd (__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,

diff  --git a/clang/lib/Headers/avx512cdintrin.h b/clang/lib/Headers/avx512cdintrin.h
index bfdba84aa28b577..33b552f6fe6ad8e 100644
--- a/clang/lib/Headers/avx512cdintrin.h
+++ b/clang/lib/Headers/avx512cdintrin.h
@@ -15,7 +15,9 @@
 #define __AVX512CDINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512cd,evex512"), __min_vector_width__(512)))
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_conflict_epi64 (__m512i __A)

diff  --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index 3ba0a0cfd5fdfb3..225d3eaf57faea4 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -15,7 +15,7 @@
 #define __AVX512DQINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq,evex512"), __min_vector_width__(512)))
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
 
 static __inline __mmask8 __DEFAULT_FN_ATTRS

diff  --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 88a8cebbee30120..5823728f22252b2 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -167,7 +167,7 @@ typedef enum
 } _MM_MANTISSA_SIGN_ENUM;
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512)))
 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
 

diff  --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index d326586578bb36d..a9428c6feba2e91 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -22,8 +22,8 @@ typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1)));
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS512                                                  \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"),     \
-                 __min_vector_width__(512)))
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512fp16,evex512"), __min_vector_width__(512)))
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"),     \
                  __min_vector_width__(256)))

diff  --git a/clang/lib/Headers/avx512ifmaintrin.h b/clang/lib/Headers/avx512ifmaintrin.h
index 5f7da52f1f73ac6..9468d17556e7214 100644
--- a/clang/lib/Headers/avx512ifmaintrin.h
+++ b/clang/lib/Headers/avx512ifmaintrin.h
@@ -15,7 +15,9 @@
 #define __IFMAINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512ifma,evex512"), __min_vector_width__(512)))
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)

diff  --git a/clang/lib/Headers/avx512vbmi2intrin.h b/clang/lib/Headers/avx512vbmi2intrin.h
index 17fa77722c64f6f..11598c888787c7a 100644
--- a/clang/lib/Headers/avx512vbmi2intrin.h
+++ b/clang/lib/Headers/avx512vbmi2intrin.h
@@ -15,7 +15,7 @@
 #define __AVX512VBMI2INTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2,evex512"), __min_vector_width__(512)))
 
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS

diff  --git a/clang/lib/Headers/avx512vbmiintrin.h b/clang/lib/Headers/avx512vbmiintrin.h
index c0e0f94d48d489d..e47cd5caddaad5b 100644
--- a/clang/lib/Headers/avx512vbmiintrin.h
+++ b/clang/lib/Headers/avx512vbmiintrin.h
@@ -15,8 +15,9 @@
 #define __VBMIINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), __min_vector_width__(512)))
-
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512vbmi,evex512"), __min_vector_width__(512)))
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B)

diff  --git a/clang/lib/Headers/avx512vnniintrin.h b/clang/lib/Headers/avx512vnniintrin.h
index 9935a119aacab2f..0fb381a12f2fd78 100644
--- a/clang/lib/Headers/avx512vnniintrin.h
+++ b/clang/lib/Headers/avx512vnniintrin.h
@@ -15,8 +15,9 @@
 #define __AVX512VNNIINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), __min_vector_width__(512)))
-
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512vnni,evex512"), __min_vector_width__(512)))
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)

diff  --git a/clang/lib/Headers/avx512vp2intersectintrin.h b/clang/lib/Headers/avx512vp2intersectintrin.h
index 5d3cb48cfd20f28..16552cae3b4fb90 100644
--- a/clang/lib/Headers/avx512vp2intersectintrin.h
+++ b/clang/lib/Headers/avx512vp2intersectintrin.h
@@ -28,8 +28,9 @@
 #ifndef _AVX512VP2INTERSECT_H
 #define _AVX512VP2INTERSECT_H
 
-#define __DEFAULT_FN_ATTRS \
-  __attribute__((__always_inline__, __nodebug__,  __target__("avx512vp2intersect"), \
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512vp2intersect,evex512"),                     \
                  __min_vector_width__(512)))
 
 /// Store, in an even/odd pair of mask registers, the indicators of the

diff  --git a/clang/lib/Headers/avx512vpopcntdqintrin.h b/clang/lib/Headers/avx512vpopcntdqintrin.h
index bb435e623330292..e73e7e4f713137f 100644
--- a/clang/lib/Headers/avx512vpopcntdqintrin.h
+++ b/clang/lib/Headers/avx512vpopcntdqintrin.h
@@ -17,7 +17,9 @@
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq"), __min_vector_width__(512)))
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512vpopcntdq,evex512"),                        \
+                 __min_vector_width__(512)))
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
   return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A);

diff  --git a/clang/lib/Headers/gfniintrin.h b/clang/lib/Headers/gfniintrin.h
index 5ec53c54fc4ec2f..20491f041ff1d4f 100644
--- a/clang/lib/Headers/gfniintrin.h
+++ b/clang/lib/Headers/gfniintrin.h
@@ -21,9 +21,15 @@
 #define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256)))
 
 /* Default attributes for ZMM unmasked forms. */
-#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS_Z                                                   \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512f,evex512,gfni"),                           \
+                 __min_vector_width__(512)))
 /* Default attributes for ZMM masked forms. */
-#define __DEFAULT_FN_ATTRS_Z_MASK __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS_Z_MASK                                              \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512bw,evex512,gfni"),                          \
+                 __min_vector_width__(512)))
 
 /* Default attributes for VLX masked forms. */
 #define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128)))

diff  --git a/clang/lib/Headers/vaesintrin.h b/clang/lib/Headers/vaesintrin.h
index 294dcff2addd71b..d7c162f5c0b16f5 100644
--- a/clang/lib/Headers/vaesintrin.h
+++ b/clang/lib/Headers/vaesintrin.h
@@ -18,8 +18,10 @@
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"), __min_vector_width__(256)))
 
 /* Default attributes for ZMM forms. */
-#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"), __min_vector_width__(512)))
-
+#define __DEFAULT_FN_ATTRS_F                                                   \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512f,evex512,vaes"),                           \
+                 __min_vector_width__(512)))
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
  _mm256_aesenc_epi128(__m256i __A, __m256i __B)

diff  --git a/clang/test/CodeGen/X86/avx512-error.c b/clang/test/CodeGen/X86/avx512-error.c
new file mode 100644
index 000000000000000..1642527f292828e
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx512-error.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -verify -DFEATURE_TEST=1
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -verify -DFEATURE_TEST=2
+
+#include <immintrin.h>
+
+#if FEATURE_TEST == 1
+__attribute__((target("avx512bw,evex512")))
+__m512d zmm_verify_ok(__m512d a) {
+  // No error emitted if we have "evex512" feature.
+  return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m512d zmm_error(__m512d a) {
+  // CHECK-LABEL: @test_mm512_sqrt_pd
+  return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION); // expected-error {{'__builtin_ia32_sqrtpd512' needs target feature avx512f,evex512}}
+}
+#endif
+
+#if FEATURE_TEST == 2
+__attribute__((target("avx512bw,evex512")))
+__mmask64 k64_verify_ok(__mmask64 a) {
+  // No error emitted if we have "evex512" feature.
+  return _knot_mask64(a);
+}
+
+__mmask64 test_knot_mask64(__mmask64 a) {
+  return _knot_mask64(a); // expected-error {{always_inline function '_knot_mask64' requires target feature 'evex512', but would be inlined into function 'test_knot_mask64' that is compiled without support for 'evex512'}}
+}
+#endif

diff  --git a/clang/test/CodeGen/attr-cpuspecific.c b/clang/test/CodeGen/attr-cpuspecific.c
index b7150b2947f3bbb..9150597e8c5a847 100644
--- a/clang/test/CodeGen/attr-cpuspecific.c
+++ b/clang/test/CodeGen/attr-cpuspecific.c
@@ -353,7 +353,7 @@ void OrderDispatchUsageSpecific(void) {}
 
 // CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
 // CHECK-SAME: "tune-cpu"="ivybridge"
-// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
+// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
 // CHECK-SAME: "tune-cpu"="knl"
 // CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+movbe,+sahf,+sse,+sse2,+sse3,+ssse3,+x87"
 // CHECK-SAME: "tune-cpu"="atom"

diff  --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index d2f09b67c7c3e45..d261c007a0a2a1f 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -69,4 +69,4 @@ void __attribute__((target("arch=x86-64-v4"))) x86_64_v4(void) {}
 // CHECK: "target-cpu"="x86-64-v3"
 // CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
 // CHECK: "target-cpu"="x86-64-v4"
-// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"

diff  --git a/clang/test/CodeGen/regcall2.c b/clang/test/CodeGen/regcall2.c
index ddc90c38dc4951b..96bc6615012af34 100644
--- a/clang/test/CodeGen/regcall2.c
+++ b/clang/test/CodeGen/regcall2.c
@@ -21,8 +21,8 @@ double __regcall bar(__sVector a) {
 // FIXME: Do we need to change for Windows?
 // Win: define dso_local x86_regcallcc void @__regcall3__foo(ptr noalias sret(%struct.__sVector) align 64 %agg.result, i32 noundef %a) #0
 // Win: define dso_local x86_regcallcc double @__regcall3__bar(ptr noundef %a) #0
-// Win: attributes #0 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+avx512f,+avx512vl,+crc32,+cx8,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }
+// Win: attributes #0 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+avx512f,+avx512vl,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }
 
 // Lin: define dso_local x86_regcallcc %struct.__sVector @__regcall3__foo(i32 noundef %a) #0
 // Lin: define dso_local x86_regcallcc double @__regcall3__bar([4 x <8 x double>] %a.coerce0, [4 x <16 x float>] %a.coerce1) #0
-// Lin: attributes #0 = { noinline nounwind optnone "min-legal-vector-width"="512" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+avx512f,+avx512vl,+crc32,+cx8,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }
+// Lin: attributes #0 = { noinline nounwind optnone "min-legal-vector-width"="512" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+avx512f,+avx512vl,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }

diff  --git a/clang/test/CodeGen/target-avx-abi-diag.c b/clang/test/CodeGen/target-avx-abi-diag.c
index f3d4462a552d22a..34995dc47154592 100644
--- a/clang/test/CodeGen/target-avx-abi-diag.c
+++ b/clang/test/CodeGen/target-avx-abi-diag.c
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -verify=no256,no512 -o - -S
 // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx -verify=no512 -o - -S
 // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -verify=both -o - -S
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -target-feature +evex512 -verify=both -o - -S
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -target-feature -evex512 -verify=avx512-256 -DAVX512_ERR=1 -o - -S
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -target-feature -evex512 -verify=avx512-256 -DAVX512_ERR=2 -o - -S
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -target-feature -evex512 -verify=avx512-256 -DAVX512_ERR=3 -o - -S
 // REQUIRES: x86-registered-target
 
 // both-no-diagnostics
@@ -10,42 +14,58 @@ typedef short avx256Type __attribute__((vector_size(32)));
 
 __attribute__((target("avx"))) void takesAvx256(avx256Type t);
 __attribute__((target("avx512f"))) void takesAvx512(avx512fType t);
+__attribute__((target("avx512f,evex512"))) void takesAvx512_2(avx512fType t);
 void takesAvx256_no_target(avx256Type t);
 void takesAvx512_no_target(avx512fType t);
 
 void variadic(int i, ...);
 __attribute__((target("avx512f"))) void variadic_err(int i, ...);
 
+#if !defined(AVX512_ERR) || AVX512_ERR == 1
 // If neither side has an attribute, warn.
 void call_warn(void) {
   avx256Type t1;
   takesAvx256_no_target(t1); // no256-warning {{AVX vector argument of type 'avx256Type' (vector of 16 'short' values) without 'avx' enabled changes the ABI}}
 
   avx512fType t2;
+  // avx512-256-error at +1 {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'evex512' enabled changes the ABI}}
   takesAvx512_no_target(t2); // no512-warning {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx512f' enabled changes the ABI}}
 
   variadic(1, t1); // no256-warning {{AVX vector argument of type 'avx256Type' (vector of 16 'short' values) without 'avx' enabled changes the ABI}}
+  // avx512-256-error at +1 {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'evex512' enabled changes the ABI}}
   variadic(3, t2); // no512-warning {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx512f' enabled changes the ABI}}
 }
+#endif
 
+#if !defined(AVX512_ERR) || AVX512_ERR == 2
 // If only 1 side has an attribute, error.
 void call_errors(void) {
   avx256Type t1;
   takesAvx256(t1); // no256-error {{AVX vector argument of type 'avx256Type' (vector of 16 'short' values) without 'avx' enabled changes the ABI}}
   avx512fType t2;
+  // avx512-256-error at +1 {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'evex512' enabled changes the ABI}}
   takesAvx512(t2); // no512-error {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx512f' enabled changes the ABI}}
 
   variadic_err(1, t1); // no256-error {{AVX vector argument of type 'avx256Type' (vector of 16 'short' values) without 'avx' enabled changes the ABI}}
+  // avx512-256-error at +1 {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'evex512' enabled changes the ABI}}
   variadic_err(3, t2); // no512-error {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx512f' enabled changes the ABI}}
 }
+#endif
 
-// These two don't diagnose anything, since these are valid calls.
+#if !defined(AVX512_ERR) || AVX512_ERR == 3
 __attribute__((target("avx"))) void call_avx256_ok(void) {
   avx256Type t;
   takesAvx256(t);
 }
 
-__attribute__((target("avx512f"))) void call_avx512_ok(void) {
+// Option -mno-evex512 affects target attributes. To retain the 512-bit capability, an explict "evex512" must be added together.
+__attribute__((target("avx512f,evex512"))) void call_avx512_ok1(void) {
   avx512fType t;
-  takesAvx512(t);
+  takesAvx512_2(t);
 }
+
+__attribute__((target("avx512f"))) void call_avx512_ok2(void) {
+  avx512fType t;
+  takesAvx512(t); // avx512-256-error {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'evex512' enabled changes the ABI}}
+}
+#endif

diff  --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index 385176d2923e4e5..a6ecedbb8a58e7b 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -369,6 +369,11 @@
 // AVXVNNIINT16: "-target-feature" "+avxvnniint16"
 // NO-AVXVNNIINT16: "-target-feature" "-avxvnniint16"
 
+// RUN: %clang --target=i386 -mevex512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=EVEX512 %s
+// RUN: %clang --target=i386 -mno-evex512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-EVEX512 %s
+// EVEX512: "-target-feature" "+evex512"
+// NO-EVEX512: "-target-feature" "-evex512"
+
 // RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s
 // RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
 // CRC32: "-target-feature" "+crc32"

diff  --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 7a9c8eb0f8dddc7..36d4af59d4c66f6 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -64,6 +64,7 @@
 // AVX512F: #define __AVX2__ 1
 // AVX512F: #define __AVX512F__ 1
 // AVX512F: #define __AVX__ 1
+// AVX512F: #define __EVEX512__ 1
 // AVX512F: #define __SSE2_MATH__ 1
 // AVX512F: #define __SSE2__ 1
 // AVX512F: #define __SSE3__ 1
@@ -79,6 +80,7 @@
 // AVX512CD: #define __AVX512CD__ 1
 // AVX512CD: #define __AVX512F__ 1
 // AVX512CD: #define __AVX__ 1
+// AVX512CD: #define __EVEX512__ 1
 // AVX512CD: #define __SSE2_MATH__ 1
 // AVX512CD: #define __SSE2__ 1
 // AVX512CD: #define __SSE3__ 1
@@ -94,6 +96,7 @@
 // AVX512ER: #define __AVX512ER__ 1
 // AVX512ER: #define __AVX512F__ 1
 // AVX512ER: #define __AVX__ 1
+// AVX512ER: #define __EVEX512__ 1
 // AVX512ER: #define __SSE2_MATH__ 1
 // AVX512ER: #define __SSE2__ 1
 // AVX512ER: #define __SSE3__ 1
@@ -109,6 +112,7 @@
 // AVX512PF: #define __AVX512F__ 1
 // AVX512PF: #define __AVX512PF__ 1
 // AVX512PF: #define __AVX__ 1
+// AVX512PF: #define __EVEX512__ 1
 // AVX512PF: #define __SSE2_MATH__ 1
 // AVX512PF: #define __SSE2__ 1
 // AVX512PF: #define __SSE3__ 1
@@ -124,6 +128,7 @@
 // AVX512DQ: #define __AVX512DQ__ 1
 // AVX512DQ: #define __AVX512F__ 1
 // AVX512DQ: #define __AVX__ 1
+// AVX512DQ: #define __EVEX512__ 1
 // AVX512DQ: #define __SSE2_MATH__ 1
 // AVX512DQ: #define __SSE2__ 1
 // AVX512DQ: #define __SSE3__ 1
@@ -139,6 +144,7 @@
 // AVX512BW: #define __AVX512BW__ 1
 // AVX512BW: #define __AVX512F__ 1
 // AVX512BW: #define __AVX__ 1
+// AVX512BW: #define __EVEX512__ 1
 // AVX512BW: #define __SSE2_MATH__ 1
 // AVX512BW: #define __SSE2__ 1
 // AVX512BW: #define __SSE3__ 1
@@ -154,6 +160,7 @@
 // AVX512VL: #define __AVX512F__ 1
 // AVX512VL: #define __AVX512VL__ 1
 // AVX512VL: #define __AVX__ 1
+// AVX512VL: #define __EVEX512__ 1
 // AVX512VL: #define __SSE2_MATH__ 1
 // AVX512VL: #define __SSE2__ 1
 // AVX512VL: #define __SSE3__ 1
@@ -168,6 +175,7 @@
 // AVX512F2: #define __AVX2__ 1
 // AVX512F2-NOT: #define __AVX512F__ 1
 // AVX512F2-NOT: #define __AVX512PF__ 1
+// AVX512F2-NOT: #define __EVEX512__ 1
 // AVX512F2: #define __AVX__ 1
 // AVX512F2: #define __SSE2_MATH__ 1
 // AVX512F2: #define __SSE2__ 1
@@ -184,6 +192,7 @@
 // AVX512IFMA: #define __AVX512F__ 1
 // AVX512IFMA: #define __AVX512IFMA__ 1
 // AVX512IFMA: #define __AVX__ 1
+// AVX512IFMA: #define __EVEX512__ 1
 // AVX512IFMA: #define __SSE2_MATH__ 1
 // AVX512IFMA: #define __SSE2__ 1
 // AVX512IFMA: #define __SSE3__ 1
@@ -200,6 +209,7 @@
 // AVX512VBMI: #define __AVX512F__ 1
 // AVX512VBMI: #define __AVX512VBMI__ 1
 // AVX512VBMI: #define __AVX__ 1
+// AVX512VBMI: #define __EVEX512__ 1
 // AVX512VBMI: #define __SSE2_MATH__ 1
 // AVX512VBMI: #define __SSE2__ 1
 // AVX512VBMI: #define __SSE3__ 1
@@ -216,6 +226,7 @@
 // AVX512BITALG: #define __AVX512BW__ 1
 // AVX512BITALG: #define __AVX512F__ 1
 // AVX512BITALG: #define __AVX__ 1
+// AVX512BITALG: #define __EVEX512__ 1
 // AVX512BITALG: #define __SSE2_MATH__ 1
 // AVX512BITALG: #define __SSE2__ 1
 // AVX512BITALG: #define __SSE3__ 1
@@ -230,6 +241,7 @@
 
 // AVX512VBMINOAVX512BW-NOT: #define __AVX512BW__ 1
 // AVX512VBMINOAVX512BW-NOT: #define __AVX512VBMI__ 1
+// AVX512VBMINOAVX512BW: #define __EVEX512__ 1
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512vbmi2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512VBMI2 %s
 
@@ -238,6 +250,7 @@
 // AVX512VBMI2: #define __AVX512F__ 1
 // AVX512VBMI2: #define __AVX512VBMI2__ 1
 // AVX512VBMI2: #define __AVX__ 1
+// AVX512VBMI2: #define __EVEX512__ 1
 // AVX512VBMI2: #define __SSE2_MATH__ 1
 // AVX512VBMI2: #define __SSE2__ 1
 // AVX512VBMI2: #define __SSE3__ 1
@@ -251,11 +264,13 @@
 
 // AVX512VBMI2NOAVX512BW-NOT: #define __AVX512BW__ 1
 // AVX512VBMI2NOAVX512BW-NOT: #define __AVX512VBMI2__ 1
+// AVX512VBMI2NOAVX512BW: #define __EVEX512__ 1
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bitalg -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BITALGNOAVX512BW %s
 
 // AVX512BITALGNOAVX512BW-NOT: #define __AVX512BITALG__ 1
 // AVX512BITALGNOAVX512BW-NOT: #define __AVX512BW__ 1
+// AVX512BITALGNOAVX512BW: #define __EVEX512__ 1
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -msse4.2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=SSE42POPCNT %s
 
@@ -467,24 +482,29 @@
 // AVX512BF16: #define __AVX512BF16__ 1
 // AVX512BF16: #define __AVX512BW__ 1
 // AVX512BF16-NOT: #define __AVX512VL__ 1
+// AVX512BF16: #define __EVEX512__ 1
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16_NOAVX512BW %s
 
 // AVX512BF16_NOAVX512BW-NOT: #define __AVX512BF16__ 1
+// AVX512BF16_NOAVX512BW: #define __EVEX512__ 1
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -mno-avx512vl -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16_NOAVX512VL %s
 
 // AVX512BF16_NOAVX512VL: #define __AVX512BF16__ 1
+// AVX512BF16_NOAVX512VL: #define __EVEX512__ 1
 
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512vp2intersect -x c -E -dM -o - %s | FileCheck  -check-prefix=VP2INTERSECT %s
 
 // VP2INTERSECT: #define __AVX512F__ 1
 // VP2INTERSECT: #define __AVX512VP2INTERSECT__ 1
+// VP2INTERSECT: #define __EVEX512__ 1
 
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512vp2intersect -x c -E -dM -o - %s | FileCheck  -check-prefix=NOVP2INTERSECT %s
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512vp2intersect -mno-avx512f -x c -E -dM -o - %s | FileCheck  -check-prefix=NOVP2INTERSECT %s
 
 // NOVP2INTERSECT-NOT: #define __AVX512VP2INTERSECT__ 1
+// NOVP2INTERSECT-NOT: #define __EVEX512__ 1
 
 
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mkl -x c -E -dM -o - %s | FileCheck  -check-prefix=KEYLOCKER %s
@@ -591,21 +611,37 @@
 // AVX512FP16: #define __AVX512DQ__ 1
 // AVX512FP16: #define __AVX512FP16__ 1
 // AVX512FP16: #define __AVX512VL__ 1
+// AVX512FP16: #define __EVEX512__ 1
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -mno-avx512vl -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16NOAVX512VL %s
 
 // AVX512FP16NOAVX512VL-NOT: #define __AVX512FP16__ 1
 // AVX512FP16NOAVX512VL-NOT: #define __AVX512VL__ 1
+// AVX512FP16NOAVX512VL: #define __EVEX512__ 1
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16NOAVX512BW %s
 
 // AVX512FP16NOAVX512BW-NOT: #define __AVX512BW__ 1
 // AVX512FP16NOAVX512BW-NOT: #define __AVX512FP16__ 1
+// AVX512FP16NOAVX512BW: #define __EVEX512__ 1
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -mno-avx512dq -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16NOAVX512DQ %s
 
 // AVX512FP16NOAVX512DQ-NOT: #define __AVX512DQ__ 1
 // AVX512FP16NOAVX512DQ-NOT: #define __AVX512FP16__ 1
+// AVX512FP16NOAVX512DQ: #define __EVEX512__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512f -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512cd -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512er -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s
+// NOEVEX512-NOT: #define __AVX512F__ 1
+// NOEVEX512-NOT: #define __EVEX512__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512f -mno-evex512 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512NOEVEX512 %s
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512cd -mno-evex512 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512NOEVEX512 %s
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512er -mno-evex512 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512NOEVEX512 %s
+// AVX512NOEVEX512: #define __AVX512F__ 1
+// AVX512NOEVEX512-NOT: #define __EVEX512__ 1
 
 // RUN: %clang -target x86_64-unknown-linux-gnu -march=atom -mcmpccxadd -x c -E -dM -o - %s | FileCheck  -check-prefix=CMPCCXADD %s
 

diff  --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 817db0f69bc8636..85ff6996d335ae7 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -240,6 +240,7 @@ X86_FEATURE       (SHA512,          "sha512")
 X86_FEATURE       (SM3,             "sm3")
 X86_FEATURE       (SM4,             "sm4")
 X86_FEATURE       (AVXVNNIINT16,    "avxvnniint16")
+X86_FEATURE       (EVEX512,         "evex512")
 // These features aren't really CPU features, but the frontend can set them.
 X86_FEATURE       (RETPOLINE_EXTERNAL_THUNK,    "retpoline-external-thunk")
 X86_FEATURE       (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")

diff  --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 1a9ce08f08bf297..c0f30a62b8bccc3 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -2030,6 +2030,17 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
               "' does not apply to function return values",
           V);
 
+  unsigned MaxParameterWidth = 0;
+  auto GetMaxParameterWidth = [&MaxParameterWidth](Type *Ty) {
+    if (Ty->isVectorTy()) {
+      if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
+        unsigned Size = VT->getPrimitiveSizeInBits().getFixedValue();
+        if (Size > MaxParameterWidth)
+          MaxParameterWidth = Size;
+      }
+    }
+  };
+  GetMaxParameterWidth(FT->getReturnType());
   verifyParameterAttrs(RetAttrs, FT->getReturnType(), V);
 
   // Verify parameter attributes.
@@ -2048,6 +2059,7 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
     }
 
     verifyParameterAttrs(ArgAttrs, Ty, V);
+    GetMaxParameterWidth(Ty);
 
     if (ArgAttrs.hasAttribute(Attribute::Nest)) {
       Check(!SawNest, "More than one parameter has attribute nest!", V);
@@ -2203,6 +2215,16 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
       CheckFailed("invalid value for 'frame-pointer' attribute: " + FP, V);
   }
 
+  // Check EVEX512 feature.
+  if (MaxParameterWidth >= 512 && Attrs.hasFnAttr("target-features")) {
+    Triple T(M.getTargetTriple());
+    if (T.isX86()) {
+      StringRef TF = Attrs.getFnAttr("target-features").getValueAsString();
+      Check(!TF.contains("+avx512f") || !TF.contains("-evex512"),
+            "512-bit vector arguments require 'evex512' for AVX512", V);
+    }
+  }
+
   checkUnsignedBaseTenFuncAttr(Attrs, "patchable-function-prefix", V);
   checkUnsignedBaseTenFuncAttr(Attrs, "patchable-function-entry", V);
   checkUnsignedBaseTenFuncAttr(Attrs, "warn-stack-size", V);

diff  --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 6af3ebb2feaec92..be167d674619c86 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -285,6 +285,7 @@ class X86MCCodeEmitter : public MCCodeEmitter {
                             SmallVectorImpl<char> &CB) const;
 
   PrefixKind emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
+                                 const MCSubtargetInfo &STI,
                                  SmallVectorImpl<char> &CB) const;
 
   void emitSegmentOverridePrefix(unsigned SegOperand, const MCInst &MI,
@@ -841,7 +842,7 @@ PrefixKind X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
   // REX prefix is optional, but if used must be immediately before the opcode
   // Encoding type for this instruction.
   return (TSFlags & X86II::EncodingMask)
-             ? emitVEXOpcodePrefix(MemoryOperand, MI, CB)
+             ? emitVEXOpcodePrefix(MemoryOperand, MI, STI, CB)
              : emitOpcodePrefix(MemoryOperand, MI, STI, CB);
 }
 
@@ -860,6 +861,7 @@ PrefixKind X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
 /// \returns the used prefix.
 PrefixKind
 X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
+                                      const MCSubtargetInfo &STI,
                                       SmallVectorImpl<char> &CB) const {
   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
   uint64_t TSFlags = Desc.TSFlags;
@@ -919,6 +921,9 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
 
   Prefix.setL(TSFlags & X86II::VEX_L);
   Prefix.setL2(TSFlags & X86II::EVEX_L2);
+  if ((TSFlags & X86II::EVEX_L2) && STI.hasFeature(X86::FeatureAVX512) &&
+      !STI.hasFeature(X86::FeatureEVEX512))
+    report_fatal_error("ZMM registers are not supported without EVEX512");
   switch (TSFlags & X86II::OpPrefixMask) {
   case X86II::PD:
     Prefix.setPP(0x1); // 66

diff  --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 9519608ac022f1f..2e1ec745dbb113b 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -397,6 +397,18 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT,
   if (CPU.empty())
     CPU = "generic";
 
+  size_t posNoEVEX512 = FS.rfind("-evex512");
+  // Make sure we won't be cheated by "-avx512fp16".
+  size_t posNoAVX512F = FS.endswith("-avx512f") ? FS.size() - 8
+                                                : FS.rfind("-avx512f,");
+  size_t posEVEX512 = FS.rfind("+evex512");
+  size_t posAVX512F = FS.rfind("+avx512"); // Any AVX512XXX will enable AVX512F.
+
+  if (posAVX512F != StringRef::npos &&
+      (posNoAVX512F == StringRef::npos || posNoAVX512F < posAVX512F))
+    if (posEVEX512 == StringRef::npos && posNoEVEX512 == StringRef::npos)
+      ArchFS += ",+evex512";
+
   return createX86MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, ArchFS);
 }
 

diff  --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index de034fa2c3227bf..64f91ae90e2b0ce 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -119,6 +119,8 @@ def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
 def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
                        "Support 16-bit floating point conversion instructions",
                        [FeatureAVX]>;
+def FeatureEVEX512  : SubtargetFeature<"evex512", "HasEVEX512", "true",
+                        "Support ZMM and 64-bit mask instructions">;
 def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
                                       "Enable AVX-512 instructions",
                                       [FeatureAVX2, FeatureFMA, FeatureF16C]>;
@@ -817,6 +819,7 @@ def ProcessorFeatures {
   ];
 
   list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
+    FeatureEVEX512,
     FeatureBWI,
     FeatureCDI,
     FeatureDQI,
@@ -940,6 +943,7 @@ def ProcessorFeatures {
                                                   FeatureXSAVES,
                                                   FeatureCLFLUSHOPT,
                                                   FeatureAVX512,
+                                                  FeatureEVEX512,
                                                   FeatureCDI,
                                                   FeatureDQI,
                                                   FeatureBWI,
@@ -982,6 +986,7 @@ def ProcessorFeatures {
 
   // Cannonlake
   list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
+                                                  FeatureEVEX512,
                                                   FeatureCDI,
                                                   FeatureDQI,
                                                   FeatureBWI,
@@ -1262,6 +1267,7 @@ def ProcessorFeatures {
                                         FeatureF16C,
                                         FeatureFSGSBase,
                                         FeatureAVX512,
+                                        FeatureEVEX512,
                                         FeatureERI,
                                         FeatureCDI,
                                         FeaturePFI,
@@ -1471,6 +1477,7 @@ def ProcessorFeatures {
     !listconcat(ZN2Features, ZN3AdditionalFeatures);
   list<SubtargetFeature> ZN4Tuning = ZN3Tuning;
   list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
+                                                  FeatureEVEX512,
                                                   FeatureCDI,
                                                   FeatureDQI,
                                                   FeatureBWI,

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 08e6e4e0627b772..a20fa6a0c3b6c63 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -903,6 +903,7 @@ def NoAVX        : Predicate<"!Subtarget->hasAVX()">;
 def HasAVX       : Predicate<"Subtarget->hasAVX()">;
 def HasAVX2      : Predicate<"Subtarget->hasAVX2()">;
 def HasAVX1Only  : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">;
+def HasEVEX512   : Predicate<"Subtarget->hasEVEX512()">;
 def HasAVX512    : Predicate<"Subtarget->hasAVX512()">;
 def UseAVX       : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">;
 def UseAVX2      : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;

diff  --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index bd29e9317ca5ea0..3504ca2b5743f88 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -1030,7 +1030,14 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
   bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
       VirtReg, Order, Hints, MF, VRM, Matrix);
 
-  if (RC.getID() != X86::TILERegClassID)
+  unsigned ID = RC.getID();
+  const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
+  if ((ID == X86::VK64RegClassID || ID == X86::VK64WMRegClassID) &&
+      Subtarget.hasAVX512() && !Subtarget.hasEVEX512())
+    report_fatal_error(
+        "64-bit mask registers are not supported without EVEX512");
+
+  if (ID != X86::TILERegClassID)
     return BaseImplRetVal;
 
   ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI);

diff  --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index 319b3c7d1394762..88e9c3a705191b3 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -268,6 +268,19 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
   if (!FS.empty())
     FullFS = (Twine(FullFS) + "," + FS).str();
 
+  // Attach EVEX512 feature when we have AVX512 features and EVEX512 is not set.
+  size_t posNoEVEX512 = FS.rfind("-evex512");
+  // Make sure we won't be cheated by "-avx512fp16".
+  size_t posNoAVX512F = FS.endswith("-avx512f") ? FS.size() - 8
+                                                : FS.rfind("-avx512f,");
+  size_t posEVEX512 = FS.rfind("+evex512");
+  size_t posAVX512F = FS.rfind("+avx512"); // Any AVX512XXX will enable AVX512F.
+
+  if (posAVX512F != StringRef::npos &&
+      (posNoAVX512F == StringRef::npos || posNoAVX512F < posAVX512F))
+    if (posEVEX512 == StringRef::npos && posNoEVEX512 == StringRef::npos)
+      FullFS += ",+evex512";
+
   // Parse features string and set the CPU.
   ParseSubtargetFeatures(CPU, TuneCPU, FullFS);
 

diff  --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index d69b8fd3e48b880..a458b5f9ec8fbb9 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -264,7 +264,8 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   // If there are no 512-bit vectors and we prefer not to use 512-bit registers,
   // disable them in the legalizer.
   bool useAVX512Regs() const {
-    return hasAVX512() && (canExtendTo512DQ() || RequiredVectorWidth > 256);
+    return hasAVX512() && hasEVEX512() &&
+           (canExtendTo512DQ() || RequiredVectorWidth > 256);
   }
 
   bool useLight256BitInstructions() const {

diff  --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 274049c5b456937..b9908dd2629ff34 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -72,7 +72,7 @@ constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF |
 constexpr FeatureBitset FeaturesX86_64_V3 =
     FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureF16C |
     FeatureFMA | FeatureLZCNT | FeatureMOVBE | FeatureXSAVE;
-constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 |
+constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 | FeatureEVEX512 |
                                             FeatureAVX512BW | FeatureAVX512CD |
                                             FeatureAVX512DQ | FeatureAVX512VL;
 
@@ -96,8 +96,8 @@ constexpr FeatureBitset FeaturesBroadwell =
 // Intel Knights Landing and Knights Mill
 // Knights Landing has feature parity with Broadwell.
 constexpr FeatureBitset FeaturesKNL =
-    FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureAVX512CD |
-    FeatureAVX512ER | FeatureAVX512PF | FeaturePREFETCHWT1;
+    FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureEVEX512 |
+    FeatureAVX512CD | FeatureAVX512ER | FeatureAVX512PF | FeaturePREFETCHWT1;
 constexpr FeatureBitset FeaturesKNM = FeaturesKNL | FeatureAVX512VPOPCNTDQ;
 
 // Intel Skylake processors.
@@ -107,9 +107,9 @@ constexpr FeatureBitset FeaturesSkylakeClient =
 // SkylakeServer inherits all SkylakeClient features except SGX.
 // FIXME: That doesn't match gcc.
 constexpr FeatureBitset FeaturesSkylakeServer =
-    (FeaturesSkylakeClient & ~FeatureSGX) | FeatureAVX512F | FeatureAVX512CD |
-    FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureCLWB |
-    FeaturePKU;
+    (FeaturesSkylakeClient & ~FeatureSGX) | FeatureAVX512F | FeatureEVEX512 |
+    FeatureAVX512CD | FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL |
+    FeatureCLWB | FeaturePKU;
 constexpr FeatureBitset FeaturesCascadeLake =
     FeaturesSkylakeServer | FeatureAVX512VNNI;
 constexpr FeatureBitset FeaturesCooperLake =
@@ -117,9 +117,9 @@ constexpr FeatureBitset FeaturesCooperLake =
 
 // Intel 10nm processors.
 constexpr FeatureBitset FeaturesCannonlake =
-    FeaturesSkylakeClient | FeatureAVX512F | FeatureAVX512CD | FeatureAVX512DQ |
-    FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | FeatureAVX512VBMI |
-    FeaturePKU | FeatureSHA;
+    FeaturesSkylakeClient | FeatureAVX512F | FeatureEVEX512 | FeatureAVX512CD |
+    FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA |
+    FeatureAVX512VBMI | FeaturePKU | FeatureSHA;
 constexpr FeatureBitset FeaturesICLClient =
     FeaturesCannonlake | FeatureAVX512BITALG | FeatureAVX512VBMI2 |
     FeatureAVX512VNNI | FeatureAVX512VPOPCNTDQ | FeatureGFNI | FeatureRDPID |
@@ -230,11 +230,11 @@ static constexpr FeatureBitset FeaturesZNVER3 = FeaturesZNVER2 |
                                                 FeatureINVPCID | FeaturePKU |
                                                 FeatureVAES | FeatureVPCLMULQDQ;
 static constexpr FeatureBitset FeaturesZNVER4 =
-    FeaturesZNVER3 | FeatureAVX512F | FeatureAVX512CD | FeatureAVX512DQ |
-    FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | FeatureAVX512VBMI |
-    FeatureAVX512VBMI2 | FeatureAVX512VNNI | FeatureAVX512BITALG |
-    FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 | FeatureGFNI |
-    FeatureSHSTK;
+    FeaturesZNVER3 | FeatureAVX512F | FeatureEVEX512 | FeatureAVX512CD |
+    FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA |
+    FeatureAVX512VBMI | FeatureAVX512VBMI2 | FeatureAVX512VNNI |
+    FeatureAVX512BITALG | FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 |
+    FeatureGFNI | FeatureSHSTK;
 
 // D151696 tranplanted Mangling and OnlyForCPUDispatchSpecific from
 // X86TargetParser.def to here. They are assigned by following ways:
@@ -542,6 +542,7 @@ constexpr FeatureBitset ImpliedFeaturesSSE4_1 = FeatureSSSE3;
 constexpr FeatureBitset ImpliedFeaturesSSE4_2 = FeatureSSE4_1;
 constexpr FeatureBitset ImpliedFeaturesAVX = FeatureSSE4_2;
 constexpr FeatureBitset ImpliedFeaturesAVX2 = FeatureAVX;
+constexpr FeatureBitset ImpliedFeaturesEVEX512 = {};
 constexpr FeatureBitset ImpliedFeaturesAVX512F =
     FeatureAVX2 | FeatureF16C | FeatureFMA;
 

diff  --git a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll
index 988097ba7eb791e..4988fc35b10eef6 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,-evex512 | FileCheck %s
 
 ; 256-bit
 

diff  --git a/llvm/test/CodeGen/X86/avx512vl-arith.ll b/llvm/test/CodeGen/X86/avx512vl-arith.ll
index d5953f2d372656e..1006c5625e26c96 100644
--- a/llvm/test/CodeGen/X86/avx512vl-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-arith.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl,-evex512 --show-mc-encoding| FileCheck %s
 
 ; 256-bit
 


        


More information about the cfe-commits mailing list