[clang] b110b7d - [X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow AVX/AVX512 IFMA madd52 intrinsics to be used in constexpr (#161056)

via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 14 06:35:07 PDT 2025


Author: NagaChaitanya Vellanki
Date: 2025-10-14T13:35:03Z
New Revision: b110b7d30ac1ec996f2de39f1baddb15e654b5ab

URL: https://github.com/llvm/llvm-project/commit/b110b7d30ac1ec996f2de39f1baddb15e654b5ab
DIFF: https://github.com/llvm/llvm-project/commit/b110b7d30ac1ec996f2de39f1baddb15e654b5ab.diff

LOG: [X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow AVX/AVX512 IFMA madd52 intrinsics to be used in constexpr (#161056)

Resolves #160498

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsX86.td
    clang/lib/AST/ByteCode/InterpBuiltin.cpp
    clang/lib/AST/ExprConstant.cpp
    clang/lib/Headers/avx512ifmaintrin.h
    clang/lib/Headers/avx512ifmavlintrin.h
    clang/lib/Headers/avxifmaintrin.h
    clang/test/CodeGen/X86/avx512ifma-builtins.c
    clang/test/CodeGen/X86/avx512ifmavl-builtins.c
    clang/test/CodeGen/X86/avxifma-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index a55dff276dd98..006a45347ff1a 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -2137,24 +2137,18 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
   def movdqa64store256_mask : X86Builtin<"void(_Vector<4, long long int *>, _Vector<4, long long int>, unsigned char)">;
 }
 
-let Features = "avx512ifma", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512ifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def vpmadd52huq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
   def vpmadd52luq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
 }
 
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vpmadd52huq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vpmadd52huq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
   def vpmadd52luq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
 }
 
-let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512ifma,avx512vl|avxifma", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+  def vpmadd52huq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
   def vpmadd52luq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
 }
 

diff  --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 5f0a77c125b85..2d3cb6a68d7e2 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3818,6 +3818,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
           return F;
         });
 
+  case X86::BI__builtin_ia32_vpmadd52luq128:
+  case X86::BI__builtin_ia32_vpmadd52luq256:
+  case X86::BI__builtin_ia32_vpmadd52luq512:
+    return interp__builtin_elementwise_triop(
+        S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) {
+          return A + (B.trunc(52) * C.trunc(52)).zext(64);
+        });
+  case X86::BI__builtin_ia32_vpmadd52huq128:
+  case X86::BI__builtin_ia32_vpmadd52huq256:
+  case X86::BI__builtin_ia32_vpmadd52huq512:
+    return interp__builtin_elementwise_triop(
+        S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) {
+          return A + llvm::APIntOps::mulhu(B.trunc(52), C.trunc(52)).zext(64);
+        });
+
   case X86::BI__builtin_ia32_vpshldd128:
   case X86::BI__builtin_ia32_vpshldd256:
   case X86::BI__builtin_ia32_vpshldd512:

diff  --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 59b4f4f6b5782..51c038274fd36 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11974,6 +11974,54 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
 
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
+
+  case X86::BI__builtin_ia32_vpmadd52luq128:
+  case X86::BI__builtin_ia32_vpmadd52luq256:
+  case X86::BI__builtin_ia32_vpmadd52luq512: {
+    APValue A, B, C;
+    if (!EvaluateAsRValue(Info, E->getArg(0), A) ||
+        !EvaluateAsRValue(Info, E->getArg(1), B) ||
+        !EvaluateAsRValue(Info, E->getArg(2), C))
+      return false;
+
+    unsigned ALen = A.getVectorLength();
+    SmallVector<APValue, 4> ResultElements;
+    ResultElements.reserve(ALen);
+
+    for (unsigned EltNum = 0; EltNum < ALen; EltNum += 1) {
+      APInt AElt = A.getVectorElt(EltNum).getInt();
+      APInt BElt = B.getVectorElt(EltNum).getInt().trunc(52);
+      APInt CElt = C.getVectorElt(EltNum).getInt().trunc(52);
+      APSInt ResElt(AElt + (BElt * CElt).zext(64), false);
+      ResultElements.push_back(APValue(ResElt));
+    }
+
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
+  case X86::BI__builtin_ia32_vpmadd52huq128:
+  case X86::BI__builtin_ia32_vpmadd52huq256:
+  case X86::BI__builtin_ia32_vpmadd52huq512: {
+    APValue A, B, C;
+    if (!EvaluateAsRValue(Info, E->getArg(0), A) ||
+        !EvaluateAsRValue(Info, E->getArg(1), B) ||
+        !EvaluateAsRValue(Info, E->getArg(2), C))
+      return false;
+
+    unsigned ALen = A.getVectorLength();
+    SmallVector<APValue, 4> ResultElements;
+    ResultElements.reserve(ALen);
+
+    for (unsigned EltNum = 0; EltNum < ALen; EltNum += 1) {
+      APInt AElt = A.getVectorElt(EltNum).getInt();
+      APInt BElt = B.getVectorElt(EltNum).getInt().trunc(52);
+      APInt CElt = C.getVectorElt(EltNum).getInt().trunc(52);
+      APSInt ResElt(AElt + llvm::APIntOps::mulhu(BElt, CElt).zext(64), false);
+      ResultElements.push_back(APValue(ResElt));
+    }
+
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
+
   case clang::X86::BI__builtin_ia32_vprotbi:
   case clang::X86::BI__builtin_ia32_vprotdi:
   case clang::X86::BI__builtin_ia32_vprotqi:

diff  --git a/clang/lib/Headers/avx512ifmaintrin.h b/clang/lib/Headers/avx512ifmaintrin.h
index f01b322ce7787..625a8ff66dc60 100644
--- a/clang/lib/Headers/avx512ifmaintrin.h
+++ b/clang/lib/Headers/avx512ifmaintrin.h
@@ -15,54 +15,53 @@
 #define __IFMAINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS                                                     \
+  constexpr                                                                    \
+      __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), \
+                     __min_vector_width__(512)))
+#else
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"),     \
                  __min_vector_width__(512)))
+#endif
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di) __X, (__v8di) __Y,
-                                                (__v8di) __Z);
+_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
+  return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di)__X, (__v8di)__Y,
+                                                (__v8di)__Z);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y),
-                                   (__v8di)__W);
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52hi_epu64(
+    __m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z),
-                                   (__v8di)_mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52hi_epu64(
+    __mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z),
+      (__v8di)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di) __X, (__v8di) __Y,
-                                                (__v8di) __Z);
+_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
+  return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di)__X, (__v8di)__Y,
+                                                (__v8di)__Z);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y),
-                                   (__v8di)__W);
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52lo_epu64(
+    __m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y), (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z),
-                                   (__v8di)_mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52lo_epu64(
+    __mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z),
+      (__v8di)_mm512_setzero_si512());
 }
 
 #undef __DEFAULT_FN_ATTRS

diff  --git a/clang/lib/Headers/avx512ifmavlintrin.h b/clang/lib/Headers/avx512ifmavlintrin.h
index a72b56113a12b..c4449c7ece9ff 100644
--- a/clang/lib/Headers/avx512ifmavlintrin.h
+++ b/clang/lib/Headers/avx512ifmavlintrin.h
@@ -8,13 +8,24 @@
  *===-----------------------------------------------------------------------===
  */
 #ifndef __IMMINTRIN_H
-#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
+#error                                                                         \
+    "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
 #endif
 
 #ifndef __IFMAVLINTRIN_H
 #define __IFMAVLINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128                                                  \
+  constexpr __attribute__((__always_inline__, __nodebug__,                     \
+                           __target__("avx512ifma,avx512vl"),                  \
+                           __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  \
+  constexpr __attribute__((__always_inline__, __nodebug__,                     \
+                           __target__("avx512ifma,avx512vl"),                  \
+                           __min_vector_width__(256)))
+#else
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
                  __target__("avx512ifma,avx512vl"),                            \
@@ -24,6 +35,8 @@
                  __target__("avx512ifma,avx512vl"),                            \
                  __min_vector_width__(256)))
 
+#endif
+
 #define _mm_madd52hi_epu64(X, Y, Z)                                            \
   ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y),            \
                                           (__v2di)(Z)))
@@ -41,70 +54,57 @@
                                           (__v4di)(Z)))
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__m128i)__builtin_ia32_selectq_128(__M,
-                                      (__v2di)_mm_madd52hi_epu64(__W, __X, __Y),
-                                      (__v2di)__W);
+_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      __M, (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
-{
-  return (__m128i)__builtin_ia32_selectq_128(__M,
-                                      (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z),
-                                      (__v2di)_mm_setzero_si128());
+_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      __M, (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z),
+      (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__m256i)__builtin_ia32_selectq_256(__M,
-                                   (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y),
-                                   (__v4di)__W);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64(
+    __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      __M, (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
-{
-  return (__m256i)__builtin_ia32_selectq_256(__M,
-                                   (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z),
-                                   (__v4di)_mm256_setzero_si256());
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64(
+    __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      __M, (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z),
+      (__v4di)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__m128i)__builtin_ia32_selectq_128(__M,
-                                      (__v2di)_mm_madd52lo_epu64(__W, __X, __Y),
-                                      (__v2di)__W);
+_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      __M, (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
-{
-  return (__m128i)__builtin_ia32_selectq_128(__M,
-                                      (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z),
-                                      (__v2di)_mm_setzero_si128());
+_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      __M, (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z),
+      (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__m256i)__builtin_ia32_selectq_256(__M,
-                                   (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y),
-                                   (__v4di)__W);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64(
+    __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      __M, (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
-{
-  return (__m256i)__builtin_ia32_selectq_256(__M,
-                                   (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z),
-                                   (__v4di)_mm256_setzero_si256());
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64(
+    __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      __M, (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z),
+      (__v4di)_mm256_setzero_si256());
 }
 
-
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
 

diff  --git a/clang/lib/Headers/avxifmaintrin.h b/clang/lib/Headers/avxifmaintrin.h
index 5c782d2a5b865..a2ef601913431 100644
--- a/clang/lib/Headers/avxifmaintrin.h
+++ b/clang/lib/Headers/avxifmaintrin.h
@@ -15,12 +15,21 @@
 #define __AVXIFMAINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128                                                  \
+  constexpr __attribute__((__always_inline__, __nodebug__,                     \
+                           __target__("avxifma"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  \
+  constexpr __attribute__((__always_inline__, __nodebug__,                     \
+                           __target__("avxifma"), __min_vector_width__(256)))
+#else
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__, __target__("avxifma"),        \
                  __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__, __target__("avxifma"),        \
                  __min_vector_width__(256)))
+#endif
 
 // must vex-encoding
 

diff  --git a/clang/test/CodeGen/X86/avx512ifma-builtins.c b/clang/test/CodeGen/X86/avx512ifma-builtins.c
index eebefb0bad4ab..f90697e3ab9b9 100644
--- a/clang/test/CodeGen/X86/avx512ifma-builtins.c
+++ b/clang/test/CodeGen/X86/avx512ifma-builtins.c
@@ -8,45 +8,230 @@
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
 
-
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 __m512i test_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
   // CHECK-LABEL: test_mm512_madd52hi_epu64
   // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-  return _mm512_madd52hi_epu64(__X, __Y, __Z); 
+  return _mm512_madd52hi_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v8di(_mm512_madd52hi_epu64(
+                              (__m512i)(__v8du){100, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){10, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){5, 0, 0, 0, 0, 0, 0, 0}),
+                          100, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52hi_epu64(
+                              (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+                                                0, 0, 0, 0},
+                              (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+                                                0, 0, 0, 0}),
+                          0xFFFFFFFFFFFFEull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52hi_epu64(
+                              (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+                              (__m512i)(__v8du){0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull},
+                              (__m512i)(__v8du){0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull}),
+                          4503599627370495ull, 4503599627370496ull,
+                          4503599627370497ull, 4503599627370498ull,
+                          4503599627370499ull, 4503599627370500ull,
+                          4503599627370501ull, 4503599627370502ull));
+
 __m512i test_mm512_mask_madd52hi_epu64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
   // CHECK-LABEL: test_mm512_mask_madd52hi_epu64
   // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-  return _mm512_mask_madd52hi_epu64(__W, __M, __X, __Y); 
+  return _mm512_mask_madd52hi_epu64(__W, __M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52hi_epu64(
+                              (__m512i)(__v8du){111, 222, 333, 444, 555, 666,
+                                                777, 888},
+                              0x00,
+                              (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80}),
+                          111, 222, 333, 444, 555, 666, 777, 888));
+
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52hi_epu64(
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80},
+                              0xFF,
+                              (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+                                                700, 800},
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80}),
+                          10, 20, 30, 40, 50, 60, 70, 80));
+
 __m512i test_mm512_maskz_madd52hi_epu64(__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) {
   // CHECK-LABEL: test_mm512_maskz_madd52hi_epu64
   // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-  return _mm512_maskz_madd52hi_epu64(__M, __X, __Y, __Z); 
+  return _mm512_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52hi_epu64(
+                              0x00,
+                              (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80},
+                              (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+                                                700, 800}),
+                          0, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52hi_epu64(
+                              0xFF,
+                              (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80},
+                              (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+                                                700, 800}),
+                          1, 2, 3, 4, 5, 6, 7, 8));
+
 __m512i test_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
   // CHECK-LABEL: test_mm512_madd52lo_epu64
   // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-  return _mm512_madd52lo_epu64(__X, __Y, __Z); 
+  return _mm512_madd52lo_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){10, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){5, 0, 0, 0, 0, 0, 0, 0}),
+                          50, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){100, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){20, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){30, 0, 0, 0, 0, 0, 0, 0}),
+                          700, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+                                                0, 0, 0, 0},
+                              (__m512i)(__v8du){1, 0, 0, 0, 0, 0, 0, 0}),
+                          0xFFFFFFFFFFFFFull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){0x1F000000000000ull, 0, 0, 0,
+                                                0, 0, 0, 0},
+                              (__m512i)(__v8du){2, 0, 0, 0, 0, 0, 0, 0}),
+                          0xE000000000000ull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80},
+                              (__m512i)(__v8du){2, 3, 4, 5, 6, 7, 8, 9}),
+                          21, 62, 123, 204, 305, 426, 567, 728));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+                                                0, 0, 0, 0},
+                              (__m512i)(__v8du){10, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){5, 0, 0, 0, 0, 0, 0, 0}),
+                          4503599627370545ull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80},
+                              (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+                                                700, 800},
+                              (__m512i)(__v8du){2, 3, 4, 5, 6, 7, 8, 9}),
+                          210, 620, 1230, 2040, 3050, 4260, 5670, 7280));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){0x1F000000000000ull,
+                                                0x1F000000000000ull, 0, 0, 0,
+                                                0, 0, 0},
+                              (__m512i)(__v8du){2, 3, 0, 0, 0, 0, 0, 0}),
+                          0xE000000000000ull, 0xD000000000000ull, 0, 0, 0, 0,
+                          0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+                              (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+                              (__m512i)(__v8du){0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull,
+                                                0xFFFFFFFFFFFFFull},
+                              (__m512i)(__v8du){1, 1, 1, 1, 1, 1, 1, 1}),
+                          0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull,
+                          0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull,
+                          0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull,
+                          0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull));
+
 __m512i test_mm512_mask_madd52lo_epu64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
   // CHECK-LABEL: test_mm512_mask_madd52lo_epu64
   // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-  return _mm512_mask_madd52lo_epu64(__W, __M, __X, __Y); 
+  return _mm512_mask_madd52lo_epu64(__W, __M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52lo_epu64(
+                              (__m512i)(__v8du){111, 222, 333, 444, 555, 666,
+                                                777, 888},
+                              0x00,
+                              (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80}),
+                          111, 222, 333, 444, 555, 666, 777, 888));
+
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52lo_epu64(
+                              (__m512i)(__v8du){1000, 2000, 3000, 4000, 5000,
+                                                6000, 7000, 8000},
+                              0xFF,
+                              (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+                                                700, 800},
+                              (__m512i)(__v8du){20, 30, 40, 50, 60, 70, 80,
+                                                90}),
+                          3000, 8000, 15000, 24000, 35000, 48000, 63000,
+                          80000));
+
 __m512i test_mm512_maskz_madd52lo_epu64(__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) {
   // CHECK-LABEL: test_mm512_maskz_madd52lo_epu64
   // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-  return _mm512_maskz_madd52lo_epu64(__M, __X, __Y, __Z); 
+  return _mm512_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
 }
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52lo_epu64(
+                              0x00,
+                              (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+                              (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+                                                80},
+                              (__m512i)(__v8du){2, 3, 4, 5, 6, 7, 8, 9}),
+                          0, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52lo_epu64(
+                              0xFF,
+                              (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+                                                700, 800},
+                              (__m512i)(__v8du){20, 30, 40, 50, 60, 70, 80,
+                                                90},
+                              (__m512i)(__v8du){30, 40, 50, 60, 70, 80, 90,
+                                                100}),
+                          700, 1400, 2300, 3400, 4700, 6200, 7900, 9800));

diff  --git a/clang/test/CodeGen/X86/avx512ifmavl-builtins.c b/clang/test/CodeGen/X86/avx512ifmavl-builtins.c
index 89108fc037520..1cbb5807a660e 100644
--- a/clang/test/CodeGen/X86/avx512ifmavl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512ifmavl-builtins.c
@@ -8,85 +8,241 @@
 // RUN: %clang_cc1 -x c++ %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
 // RUN: %clang_cc1 -x c++ %s -flax-vector-conversions=none -ffreestanding -triple=i386-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
 
-
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 __m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
   // CHECK-LABEL: test_mm_madd52hi_epu64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
-  return _mm_madd52hi_epu64(__X, __Y, __Z); 
+  return _mm_madd52hi_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+                              (__m128i)((__v2du){100, 0}),
+                              (__m128i)((__v2du){10, 0}),
+                              (__m128i)((__v2du){5, 0})),
+                          100, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0})),
+                          0xFFFFFFFFFFFFEull, 0));
+
 __m128i test_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
   // CHECK-LABEL: test_mm_mask_madd52hi_epu64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
-  return _mm_mask_madd52hi_epu64(__W, __M, __X, __Y); 
+  return _mm_mask_madd52hi_epu64(__W, __M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52hi_epu64((__m128i)((__v2du){111, 222}),
+                                                   0x0,
+                                                   (__m128i)((__v2du){1, 2}),
+                                                   (__m128i)((__v2du){10, 20})),
+                          111, 222));
+
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52hi_epu64((__m128i)((__v2du){10, 20}),
+                                                   0x2,
+                                                   (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL}),
+                                                   (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL})),
+                          10, 0x100000000014ULL));
+
 __m128i test_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
   // CHECK-LABEL: test_mm_maskz_madd52hi_epu64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
-  return _mm_maskz_madd52hi_epu64(__M, __X, __Y, __Z); 
+  return _mm_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52hi_epu64(0x3,
+                                                    (__m128i)((__v2du){1, 2}),
+                                                    (__m128i)((__v2du){10, 20}),
+                                                    (__m128i)((__v2du){100, 200})),
+                          1, 2));
+
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52hi_epu64(0x1,
+                                                    (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL}),
+                                                    (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL}),
+                                                    (__m128i)((__v2du){0, 0})),
+                          0x1000000000000ULL, 0));
+
 __m256i test_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
   // CHECK-LABEL: test_mm256_madd52hi_epu64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
-  return _mm256_madd52hi_epu64(__X, __Y, __Z); 
+  return _mm256_madd52hi_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+                              (__m256i)((__v4du){100, 200, 300, 400}),
+                              (__m256i)((__v4du){10, 20, 30, 40}),
+                              (__m256i)((__v4du){5, 6, 7, 8})),
+                          100, 200, 300, 400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+                                                 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+                                                 0})),
+                          0xFFFFFFFFFFFFEull, 0, 0, 0));
+
 __m256i test_mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
   // CHECK-LABEL: test_mm256_mask_madd52hi_epu64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
-  return _mm256_mask_madd52hi_epu64(__W, __M, __X, __Y); 
+  return _mm256_mask_madd52hi_epu64(__W, __M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52hi_epu64((__m256i)((__v4du){111, 222, 333, 444}),
+                                                      0x0,
+                                                      (__m256i)((__v4du){1, 2, 3, 4}),
+                                                      (__m256i)((__v4du){10, 20, 30, 40})),
+                          111, 222, 333, 444));
+
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52hi_epu64((__m256i)((__v4du){10, 20, 30, 40}),
+                                                      0xA,
+                                                      (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+                                                                         0x1000000000000ULL, 0x1000000000000ULL}),
+                                                      (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+                                                                         0x1000000000000ULL, 0x1000000000000ULL})),
+                          10, 0x100000000014ULL, 30, 0x100000000028ULL));
+
 __m256i test_mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
   // CHECK-LABEL: test_mm256_maskz_madd52hi_epu64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
-  return _mm256_maskz_madd52hi_epu64(__M, __X, __Y, __Z); 
+  return _mm256_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52hi_epu64(0xF,
+                                                       (__m256i)((__v4du){1, 2, 3, 4}),
+                                                       (__m256i)((__v4du){10, 20, 30, 40}),
+                                                       (__m256i)((__v4du){100, 200, 300, 400})),
+                          1, 2, 3, 4));
+
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52hi_epu64(0x5,
+                                                       (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+                                                                          0x1000000000000ULL, 0x1000000000000ULL}),
+                                                       (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+                                                                          0x1000000000000ULL, 0x1000000000000ULL}),
+                                                       (__m256i)((__v4du){0, 0, 0, 0})),
+                          0x1000000000000ULL, 0, 0x1000000000000ULL, 0));
+
 __m128i test_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
   // CHECK-LABEL: test_mm_madd52lo_epu64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
-  return _mm_madd52lo_epu64(__X, __Y, __Z); 
+  return _mm_madd52lo_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){10, 0}),
+                              (__m128i)((__v2du){5, 0})),
+                          50, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+                              (__m128i)((__v2du){100, 0}),
+                              (__m128i)((__v2du){20, 0}),
+                              (__m128i)((__v2du){30, 0})),
+                          700, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+                              (__m128i)((__v2du){1, 2}),
+                              (__m128i)((__v2du){10, 20}),
+                              (__m128i)((__v2du){2, 3})),
+                          21, 62));
+
 __m128i test_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
   // CHECK-LABEL: test_mm_mask_madd52lo_epu64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
-  return _mm_mask_madd52lo_epu64(__W, __M, __X, __Y); 
+  return _mm_mask_madd52lo_epu64(__W, __M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52lo_epu64((__m128i)((__v2du){1000, 2000}),
+                                                   0x3,
+                                                   (__m128i)((__v2du){100, 200}),
+                                                   (__m128i)((__v2du){20, 30})),
+                          3000, 8000));
+
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52lo_epu64((__m128i)((__v2du){111, 222}),
+                                                   0x0,
+                                                   (__m128i)((__v2du){1, 2}),
+                                                   (__m128i)((__v2du){10, 20})),
+                          111, 222));
+
 __m128i test_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
   // CHECK-LABEL: test_mm_maskz_madd52lo_epu64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
-  return _mm_maskz_madd52lo_epu64(__M, __X, __Y, __Z); 
+  return _mm_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52lo_epu64(0x3,
+                                                    (__m128i)((__v2du){100, 200}),
+                                                    (__m128i)((__v2du){20, 30}),
+                                                    (__m128i)((__v2du){30, 40})),
+                          700, 1400));
+
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52lo_epu64(0x1,
+                                                    (__m128i)((__v2du){100, 0}),
+                                                    (__m128i)((__v2du){20, 0}),
+                                                    (__m128i)((__v2du){30, 0})),
+                          700, 0));
+
 __m256i test_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
   // CHECK-LABEL: test_mm256_madd52lo_epu64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
-  return _mm256_madd52lo_epu64(__X, __Y, __Z); 
+  return _mm256_madd52lo_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+                              (__m256i)((__v4du){1, 2, 3, 4}),
+                              (__m256i)((__v4du){10, 20, 30, 40}),
+                              (__m256i)((__v4du){2, 3, 4, 5})),
+                          21, 62, 123, 204));
+
 __m256i test_mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
   // CHECK-LABEL: test_mm256_mask_madd52lo_epu64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
-  return _mm256_mask_madd52lo_epu64(__W, __M, __X, __Y); 
+  return _mm256_mask_madd52lo_epu64(__W, __M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52lo_epu64((__m256i)((__v4du){1000, 2000, 3000, 4000}),
+                                                      0xF,
+                                                      (__m256i)((__v4du){100, 200, 300, 400}),
+                                                      (__m256i)((__v4du){20, 30, 40, 50})),
+                          3000, 8000, 15000, 24000));
+
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52lo_epu64((__m256i)((__v4du){111, 222, 333, 444}),
+                                                      0x0,
+                                                      (__m256i)((__v4du){1, 2, 3, 4}),
+                                                      (__m256i)((__v4du){10, 20, 30, 40})),
+                          111, 222, 333, 444));
+
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52lo_epu64((__m256i)((__v4du){11, 22, 33, 44}),
+                                                      0x5,
+                                                      (__m256i)((__v4du){100, 200, 300, 400}),
+                                                      (__m256i)((__v4du){10, 20, 30, 40})),
+                          1011, 22, 9033, 44));
+
 __m256i test_mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
   // CHECK-LABEL: test_mm256_maskz_madd52lo_epu64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
-  return _mm256_maskz_madd52lo_epu64(__M, __X, __Y, __Z); 
+  return _mm256_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
 }
+
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52lo_epu64(0xF,
+                                                       (__m256i)((__v4du){100, 200, 300, 400}),
+                                                       (__m256i)((__v4du){20, 30, 40, 50}),
+                                                       (__m256i)((__v4du){30, 40, 50, 60})),
+                          700, 1400, 2300, 3400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52lo_epu64(0x9,
+                                                       (__m256i)((__v4du){100, 200, 300, 400}),
+                                                       (__m256i)((__v4du){10, 20, 30, 40}),
+                                                       (__m256i)((__v4du){5, 10, 15, 20})),
+                          150, 0, 0, 1200));

diff  --git a/clang/test/CodeGen/X86/avxifma-builtins.c b/clang/test/CodeGen/X86/avxifma-builtins.c
index aa151591ed143..70531da02df21 100644
--- a/clang/test/CodeGen/X86/avxifma-builtins.c
+++ b/clang/test/CodeGen/X86/avxifma-builtins.c
@@ -8,8 +8,9 @@
 // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
 // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
 
-
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
+
 
 __m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
 // CHECK-LABEL: test_mm_madd52hi_epu64
@@ -17,44 +18,207 @@ __m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
   return _mm_madd52hi_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+                              (__m128i)((__v2du){50, 100}),
+                              (__m128i)((__v2du){10, 20}),
+                              (__m128i)((__v2du){5, 6})),
+                          50, 100));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0})),
+                          0xFFFFFFFFFFFFEull, 0));
+
 __m256i test_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
 // CHECK-LABEL: test_mm256_madd52hi_epu64
 // CHECK:    call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   return _mm256_madd52hi_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+                              (__m256i)((__v4du){100, 200, 300, 400}),
+                              (__m256i)((__v4du){10, 20, 30, 40}),
+                              (__m256i)((__v4du){5, 6, 7, 8})),
+                          100, 200, 300, 400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+                                                 0xFFFFFFFFFFFFFull, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+                                                 0xFFFFFFFFFFFFFull, 0, 0})),
+                          0xFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFEull, 0, 0));
+
 __m128i test_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
 // CHECK-LABEL: test_mm_madd52lo_epu64
 // CHECK:    call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_madd52lo_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){10, 0}),
+                              (__m128i)((__v2du){5, 0})),
+                          50, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+                              (__m128i)((__v2du){1, 2}),
+                              (__m128i)((__v2du){10, 20}),
+                              (__m128i)((__v2du){2, 3})),
+                          21, 62));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+                              (__m128i)((__v2du){1, 0})),
+                          0xFFFFFFFFFFFFFull, 0));
+
 __m256i test_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
 // CHECK-LABEL: test_mm256_madd52lo_epu64
 // CHECK:    call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   return _mm256_madd52lo_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+                              (__m256i)((__v4du){1, 2, 3, 4}),
+                              (__m256i)((__v4du){10, 20, 30, 40}),
+                              (__m256i)((__v4du){2, 3, 4, 5})),
+                          21, 62, 123, 204));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+                                                 0}),
+                              (__m256i)((__v4du){1, 0, 0, 0})),
+                          0xFFFFFFFFFFFFFull, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0x1F000000000000ull, 0, 0,
+                                                 0}),
+                              (__m256i)((__v4du){2, 0, 0, 0})),
+                          0xE000000000000ull, 0, 0, 0));
+
 __m128i test_mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
-// CHECK-LABEL: test_mm_madd52hi_avx_epu64
-// CHECK:    call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  // CHECK-LABEL: test_mm_madd52hi_avx_epu64
+  // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_madd52hi_avx_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_avx_epu64(
+                              (__m128i)((__v2du){50, 100}),
+                              (__m128i)((__v2du){10, 20}),
+                              (__m128i)((__v2du){5, 6})),
+                          50, 100));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_avx_epu64(
+                              (__m128i)((__v2du){100, 0}),
+                              (__m128i)((__v2du){10, 0}),
+                              (__m128i)((__v2du){5, 0})),
+                          100, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_avx_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0})),
+                          0xFFFFFFFFFFFFEull, 0));
+                        
 __m256i test_mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
-// CHECK-LABEL: test_mm256_madd52hi_avx_epu64
-// CHECK:    call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
+  // CHECK-LABEL: test_mm256_madd52hi_avx_epu64
+  // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   return _mm256_madd52hi_avx_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_avx_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+                                                 0xFFFFFFFFFFFFFull, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+                                                 0xFFFFFFFFFFFFFull, 0, 0})),
+                          0xFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFEull, 0, 0));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_avx_epu64(
+                              (__m256i)((__v4du){100, 200, 300, 400}),
+                              (__m256i)((__v4du){10, 20, 30, 40}),
+                              (__m256i)((__v4du){5, 6, 7, 8})),
+                          100, 200, 300, 400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_avx_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+                                                 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+                                                 0})),
+                          0xFFFFFFFFFFFFEull, 0, 0, 0));
+
 __m128i test_mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
-// CHECK-LABEL: test_mm_madd52lo_avx_epu64
-// CHECK:    call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  // CHECK-LABEL: test_mm_madd52lo_avx_epu64
+  // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_madd52lo_avx_epu64(__X, __Y, __Z);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){10, 0}),
+                              (__m128i)((__v2du){5, 0})),
+                          50, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+                              (__m128i)((__v2du){100, 0}),
+                              (__m128i)((__v2du){20, 0}),
+                              (__m128i)((__v2du){30, 0})),
+                          700, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+                              (__m128i)((__v2du){1, 2}),
+                              (__m128i)((__v2du){10, 20}),
+                              (__m128i)((__v2du){2, 3})),
+                          21, 62));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+                              (__m128i)((__v2du){0, 0}),
+                              (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+                              (__m128i)((__v2du){1, 0})),
+                          0xFFFFFFFFFFFFFull, 0));
+
 __m256i test_mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
-// CHECK-LABEL: test_mm256_madd52lo_avx_epu64
-// CHECK:    call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
+  // CHECK-LABEL: test_mm256_madd52lo_avx_epu64
+  // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   return _mm256_madd52lo_avx_epu64(__X, __Y, __Z);
 }
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+                              (__m256i)((__v4du){1, 2, 3, 4}),
+                              (__m256i)((__v4du){10, 20, 30, 40}),
+                              (__m256i)((__v4du){2, 3, 4, 5})),
+                          21, 62, 123, 204));
+
+
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+                                                 0}),
+                              (__m256i)((__v4du){1, 0, 0, 0})),
+                          0xFFFFFFFFFFFFFull, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+                              (__m256i)((__v4du){0, 0, 0, 0}),
+                              (__m256i)((__v4du){0x1F000000000000ull, 0, 0,
+                                                 0}),
+                              (__m256i)((__v4du){2, 0, 0, 0})),
+                          0xE000000000000ull, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+                              (__m128i)((__v2du){5, 10}),
+                              (__m128i)((__v2du){100, 200}),
+                              (__m128i)((__v2du){7, 8})),
+                          705, 1610));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+                              (__m256i)((__v4du){1, 2, 3, 4}),
+                              (__m256i)((__v4du){10, 20, 30, 40}),
+                              (__m256i)((__v4du){2, 3, 4, 5})),
+                          21, 62, 123, 204));
+


        


More information about the cfe-commits mailing list