[clang] [clang][x86] Add constexpr support for VNNI intrinsics (PR #190549)

Akash Deo via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 18 07:50:16 PDT 2026


https://github.com/AkashDeoNU updated https://github.com/llvm/llvm-project/pull/190549

>From b5ae019d7989746021e1178d3ab7692eb274af1d Mon Sep 17 00:00:00 2001
From: Akash Deo <AkashDeo2025 at u.Northwestern.edu>
Date: Sun, 5 Apr 2026 15:17:30 -0500
Subject: [PATCH 01/11] [clang][x86] Add constexpr support for VNNI intrinsics

---
 clang/include/clang/Basic/BuiltinsX86.td      |  24 +-
 clang/lib/AST/ByteCode/InterpBuiltin.cpp      |  91 ++++++
 clang/lib/AST/ExprConstant.cpp                |  80 +++++
 clang/lib/Headers/avx512vlvnniintrin.h        |  90 +++---
 clang/lib/Headers/avx512vnniintrin.h          |  73 ++---
 clang/lib/Headers/avxvnniintrin.h             |  48 +--
 .../test/CodeGen/X86/avx512vlvnni-builtins.c  | 298 ++++++++++++++++++
 clang/test/CodeGen/X86/avx512vnni-builtins.c  | 156 +++++++++
 clang/test/CodeGen/X86/avxvnni-builtins.c     | 246 +++++++++++++++
 9 files changed, 986 insertions(+), 120 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index c8c371625b568..483092d6274d8 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1075,51 +1075,51 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto
   def extractf32x4_mask : X86Builtin<"_Vector<4, float>(_Vector<16, float>, _Constant int, _Vector<4, float>, unsigned char)">;
 }
 
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vpdpbusd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, char>)">;
 }
 
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def vpdpbusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">;
 }
 
-let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def vpdpbusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">;
 }
 
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vpdpbusds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, char>)">;
 }
 
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def vpdpbusds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">;
 }
 
-let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def vpdpbusds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">;
 }
 
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vpdpwssd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, short>)">;
 }
 
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def vpdpwssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">;
 }
 
-let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def vpdpwssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">;
 }
 
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vpdpwssds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, short>)">;
 }
 
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def vpdpwssds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">;
 }
 
-let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def vpdpwssds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">;
 }
 
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b16a34543757b..2b018ec590553 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4441,6 +4441,53 @@ static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_ia32_vpdp(InterpState &S, CodePtr OpPC,
+                                      const CallExpr *Call, bool IsDottingWord,
+                                      bool IsSaturating) {
+  const auto *SrcVecT = Call->getArg(0)->getType()->castAs<VectorType>();
+  const auto *OpAVecT = Call->getArg(1)->getType()->castAs<VectorType>();
+  const auto *OpBVecT = Call->getArg(2)->getType()->castAs<VectorType>();
+
+  PrimType SrcElemT = *S.getContext().classify(SrcVecT->getElementType());
+  PrimType OpAElemT = *S.getContext().classify(OpAVecT->getElementType());
+  PrimType OpBElemT = *S.getContext().classify(OpBVecT->getElementType());
+
+  unsigned NumElements = SrcVecT->getNumElements();
+  unsigned Iters = IsDottingWord ? 2 : 4;
+
+  const Pointer &OpBPtr = S.Stk.pop<Pointer>();
+  const Pointer &OpAPtr = S.Stk.pop<Pointer>();
+  const Pointer &SrcPtr = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  for (unsigned I = 0; I < NumElements; ++I) {
+    APSInt Acc;
+    INT_TYPE_SWITCH_NO_BOOL(SrcElemT, { Acc = SrcPtr.elem<T>(I).toAPSInt(); });
+    Acc = Acc.sext(64);
+    for (unsigned J = 0; J < Iters; ++J) {
+      APSInt OpA, OpB;
+      INT_TYPE_SWITCH_NO_BOOL(
+          OpAElemT, { OpA = OpAPtr.elem<T>(Iters * I + J).toAPSInt(); });
+      INT_TYPE_SWITCH_NO_BOOL(
+          OpBElemT, { OpB = OpBPtr.elem<T>(Iters * I + J).toAPSInt(); });
+      if (IsDottingWord) {
+        OpA = APSInt(OpA.sext(64), false);
+      } else {
+        OpA = APSInt(OpA.zext(64), false);
+      }
+      OpB = APSInt(OpB.sext(64), false);
+      Acc += OpA * OpB;
+    }
+    if (IsSaturating) {
+      Acc = APSInt(Acc.truncSSat(32), false);
+    }
+    INT_TYPE_SWITCH_NO_BOOL(SrcElemT,
+                            { Dst.elem<T>(I) = static_cast<T>(Acc); });
+  }
+  Dst.initializeAllElements();
+  return true;
+}
+
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
                       uint32_t BuiltinID) {
   if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -6503,6 +6550,50 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
           return EvalScalarMinMaxFp(A, B, RoundingMode, /*IsMin=*/false);
         },
         /*IsScalar=*/true);
+  case X86::BI__builtin_ia32_vpdpwssd128:
+  case X86::BI__builtin_ia32_vpdpwssd256:
+  case X86::BI__builtin_ia32_vpdpwssd512:
+  case X86::BI__builtin_ia32_vpdpwssds128:
+  case X86::BI__builtin_ia32_vpdpwssds256:
+  case X86::BI__builtin_ia32_vpdpwssds512:
+  case X86::BI__builtin_ia32_vpdpbusds128:
+  case X86::BI__builtin_ia32_vpdpbusds256:
+  case X86::BI__builtin_ia32_vpdpbusds512:
+  case X86::BI__builtin_ia32_vpdpbusd128:
+  case X86::BI__builtin_ia32_vpdpbusd256:
+  case X86::BI__builtin_ia32_vpdpbusd512: {
+    unsigned BuiltinID = Call->getBuiltinCallee();
+    bool IsDottingWord;
+    bool IsSaturating;
+    switch (BuiltinID) {
+    case X86::BI__builtin_ia32_vpdpwssd128:
+    case X86::BI__builtin_ia32_vpdpwssd256:
+    case X86::BI__builtin_ia32_vpdpwssd512:
+      IsDottingWord = true;
+      IsSaturating = false;
+      break;
+    case X86::BI__builtin_ia32_vpdpwssds128:
+    case X86::BI__builtin_ia32_vpdpwssds256:
+    case X86::BI__builtin_ia32_vpdpwssds512:
+      IsDottingWord = true;
+      IsSaturating = true;
+      break;
+    case X86::BI__builtin_ia32_vpdpbusds128:
+    case X86::BI__builtin_ia32_vpdpbusds256:
+    case X86::BI__builtin_ia32_vpdpbusds512:
+      IsDottingWord = false;
+      IsSaturating = true;
+      break;
+    case X86::BI__builtin_ia32_vpdpbusd128:
+    case X86::BI__builtin_ia32_vpdpbusd256:
+    case X86::BI__builtin_ia32_vpdpbusd512:
+      IsDottingWord = false;
+      IsSaturating = false;
+      break;
+    }
+    return interp__builtin_ia32_vpdp(S, OpPC, Call, IsDottingWord,
+                                     IsSaturating);
+  }
 
   default:
     S.FFDiag(S.Current->getLocation(OpPC),
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 6ac16c2b831d2..0d1d5a9022a8f 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14771,6 +14771,86 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
       return false;
     return Success(R, E);
   }
+  case X86::BI__builtin_ia32_vpdpwssd128:
+  case X86::BI__builtin_ia32_vpdpwssd256:
+  case X86::BI__builtin_ia32_vpdpwssd512:
+  case X86::BI__builtin_ia32_vpdpwssds128:
+  case X86::BI__builtin_ia32_vpdpwssds256:
+  case X86::BI__builtin_ia32_vpdpwssds512:
+  case X86::BI__builtin_ia32_vpdpbusds128:
+  case X86::BI__builtin_ia32_vpdpbusds256:
+  case X86::BI__builtin_ia32_vpdpbusds512:
+  case X86::BI__builtin_ia32_vpdpbusd128:
+  case X86::BI__builtin_ia32_vpdpbusd256:
+  case X86::BI__builtin_ia32_vpdpbusd512: {
+    unsigned BuiltinID = E->getBuiltinCallee();
+    bool IsDottingWord = false;
+    bool IsSaturating = false;
+    switch (BuiltinID) {
+    case X86::BI__builtin_ia32_vpdpwssd128:
+    case X86::BI__builtin_ia32_vpdpwssd256:
+    case X86::BI__builtin_ia32_vpdpwssd512:
+      IsDottingWord = true;
+      IsSaturating = false;
+      break;
+    case X86::BI__builtin_ia32_vpdpwssds128:
+    case X86::BI__builtin_ia32_vpdpwssds256:
+    case X86::BI__builtin_ia32_vpdpwssds512:
+      IsDottingWord = true;
+      IsSaturating = true;
+      break;
+    case X86::BI__builtin_ia32_vpdpbusds128:
+    case X86::BI__builtin_ia32_vpdpbusds256:
+    case X86::BI__builtin_ia32_vpdpbusds512:
+      IsDottingWord = false;
+      IsSaturating = true;
+      break;
+    case X86::BI__builtin_ia32_vpdpbusd128:
+    case X86::BI__builtin_ia32_vpdpbusd256:
+    case X86::BI__builtin_ia32_vpdpbusd512:
+      IsDottingWord = false;
+      IsSaturating = false;
+      break;
+    }
+
+    APValue Source, OperandA, OperandB;
+    if (!EvaluateAsRValue(Info, E->getArg(0), Source) ||
+        !EvaluateAsRValue(Info, E->getArg(1), OperandA) ||
+        !EvaluateAsRValue(Info, E->getArg(2), OperandB)) {
+      return false;
+    }
+
+    unsigned NumElements = Source.getVectorLength();
+
+    SmallVector<APValue, 16> Result;
+    Result.reserve(NumElements);
+    unsigned Iters = IsDottingWord ? 2 : 4;
+    for (unsigned I = 0; I < NumElements; ++I) {
+      APSInt DotProduct = Source.getVectorElt(I).getInt();
+      DotProduct = DotProduct.sext(64);
+      for (unsigned J = 0; J < Iters; ++J) {
+        APSInt OpA;
+        if (IsDottingWord) {
+          OpA = APSInt(OperandA.getVectorElt(Iters * I + J).getInt().sext(64),
+                       false);
+        } else {
+          OpA = APSInt(OperandA.getVectorElt(Iters * I + J).getInt().zext(64),
+                       false);
+        }
+        APSInt OpB = APSInt(
+            OperandB.getVectorElt(Iters * I + J).getInt().sext(64), false);
+        DotProduct += OpA * OpB;
+      }
+      if (IsSaturating) {
+        DotProduct = APSInt(DotProduct.truncSSat(32), false);
+      } else {
+        DotProduct = APSInt(DotProduct.trunc(32), false);
+      }
+      Result.push_back(APValue(DotProduct));
+    }
+
+    return Success(APValue(Result.data(), Result.size()), E);
+  }
   }
 }
 
diff --git a/clang/lib/Headers/avx512vlvnniintrin.h b/clang/lib/Headers/avx512vlvnniintrin.h
index 4b8a199af32e5..053807032fcb3 100644
--- a/clang/lib/Headers/avx512vlvnniintrin.h
+++ b/clang/lib/Headers/avx512vlvnniintrin.h
@@ -24,6 +24,14 @@
                  __target__("avx512vl,avx512vnni"),                            \
                  __min_vector_width__(256)))
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
 /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
 /// 16-bit results. Sum these 4 results with the corresponding 32-bit integer
@@ -179,129 +187,115 @@
 #define _mm_dpwssds_epi32(S, A, B)                                             \
   ((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v8hi)(A), (__v8hi)(B)))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                      (__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
                                      (__v8si)__S);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                      (__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
                                      (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                     (__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
                                     (__v8si)__S);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A,
+                           __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                      (__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
                                      (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                      (__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
                                      (__v8si)__S);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                      (__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
                                      (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                     (__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
                                     (__v8si)__S);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A,
+                           __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                     (__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
                                     (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                         (__v4si)_mm_dpbusd_epi32(__S, __A, __B),
                                         (__v4si)__S);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                         (__v4si)_mm_dpbusd_epi32(__S, __A, __B),
                                         (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                        (__v4si)_mm_dpbusds_epi32(__S, __A, __B),
                                        (__v4si)__S);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                        (__v4si)_mm_dpbusds_epi32(__S, __A, __B),
                                        (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                         (__v4si)_mm_dpwssd_epi32(__S, __A, __B),
                                         (__v4si)__S);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                         (__v4si)_mm_dpwssd_epi32(__S, __A, __B),
                                         (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                        (__v4si)_mm_dpwssds_epi32(__S, __A, __B),
                                        (__v4si)__S);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                        (__v4si)_mm_dpwssds_epi32(__S, __A, __B),
                                        (__v4si)_mm_setzero_si128());
diff --git a/clang/lib/Headers/avx512vnniintrin.h b/clang/lib/Headers/avx512vnniintrin.h
index 2ce88efe4a04f..1aa431ed446b2 100644
--- a/clang/lib/Headers/avx512vnniintrin.h
+++ b/clang/lib/Headers/avx512vnniintrin.h
@@ -19,98 +19,99 @@
   __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"),     \
                  __min_vector_width__(512)))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
-{
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v64qu)__A,
                                              (__v64qi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                     (__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
                                     (__v16si)__S);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A,
+                          __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                     (__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
                                     (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v64qu)__A,
                                               (__v64qi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A,
+                          __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                    (__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
                                    (__v16si)__S);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A,
+                           __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                    (__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
                                    (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v32hi)__A,
                                              (__v32hi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                     (__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
                                     (__v16si)__S);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A,
+                          __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                     (__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
                                     (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v32hi)__A,
                                               (__v32hi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A,
+                          __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                    (__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
                                    (__v16si)__S);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A,
+                           __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                    (__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
                                    (__v16si)_mm512_setzero_si512());
 }
 
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 
 #endif
diff --git a/clang/lib/Headers/avxvnniintrin.h b/clang/lib/Headers/avxvnniintrin.h
index 1d2e8c906effc..ee82676fcb392 100644
--- a/clang/lib/Headers/avxvnniintrin.h
+++ b/clang/lib/Headers/avxvnniintrin.h
@@ -43,6 +43,14 @@
 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(256)))
 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(128)))
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#else
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#endif
+
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
 /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed
 /// 16-bit results. Sum these 4 results with the corresponding 32-bit integer
@@ -60,9 +68,8 @@
 ///    ENDFOR
 ///    DST[MAX:256] := 0
 /// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v32qu)__A,
                                              (__v32qi)__B);
 }
@@ -84,9 +91,8 @@ _mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
 ///    ENDFOR
 ///    DST[MAX:256] := 0
 /// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v32qu)__A,
                                               (__v32qi)__B);
 }
@@ -106,9 +112,8 @@ _mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
 ///    ENDFOR
 ///    DST[MAX:256] := 0
 /// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v16hi)__A,
                                              (__v16hi)__B);
 }
@@ -128,9 +133,8 @@ _mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
 ///    ENDFOR
 ///    DST[MAX:256] := 0
 /// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v16hi)__A,
                                               (__v16hi)__B);
 }
@@ -152,9 +156,8 @@ _mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
 ///    ENDFOR
 ///    DST[MAX:128] := 0
 /// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v16qu)__A,
                                              (__v16qi)__B);
 }
@@ -176,9 +179,8 @@ _mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
 ///    ENDFOR
 ///    DST[MAX:128] := 0
 /// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v16qu)__A,
                                               (__v16qi)__B);
 }
@@ -198,9 +200,8 @@ _mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
 ///    ENDFOR
 ///    DST[MAX:128] := 0
 /// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v8hi)__A,
                                              (__v8hi)__B);
 }
@@ -220,9 +221,8 @@ _mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
 ///    ENDFOR
 ///    DST[MAX:128] := 0
 /// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v8hi)__A,
                                               (__v8hi)__B);
 }
diff --git a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c
index 11dbd717a9f77..8bdbdf1ca94a0 100644
--- a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c
@@ -3,7 +3,13 @@
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
 
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 __m256i test_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_dpbusd_epi32
@@ -11,6 +17,13 @@ __m256i test_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_dpbusd_epi32(__S, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_mask_dpbusd_epi32(
+    (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+    (__mmask8)0x55,
+    (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  104, 200, 304, 400, 504, 600, 704, 800));
 
 __m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_dpbusd_epi32
@@ -18,12 +31,43 @@ __m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_dpbusd_epi32(__U, __S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_maskz_dpbusd_epi32(
+    (__mmask8)0x0F,
+    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+    (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  4, 4, 4, 4, 0, 0, 0, 0));
 
 __m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpbusd_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   return _mm256_dpbusd_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusd_epi32(
+    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+    ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  4, 4, 4, 4, 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusd_epi32(
+    ((__m256i)(__v8si){10, 10, 10, 10, 10, 10, 10, 10}),
+    ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  14, 14, 14, 14, 14, 14, 14, 14));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusd_epi32(
+    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+    ((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
+    ((__m256i)(__v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
+  -1020, -1020, -1020, -1020, -1020, -1020, -1020, -1020));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusd_epi32(
+    ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m256i)(__v32qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
+    ((__m256i)(__v32qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0})),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_dpbusds_epi32
@@ -31,6 +75,13 @@ __m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_dpbusds_epi32(__S, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_mask_dpbusds_epi32(
+    (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+    (__mmask8)0xAA,
+    (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  100, 204, 300, 404, 500, 604, 700, 804));
 
 __m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_dpbusds_epi32
@@ -38,12 +89,37 @@ __m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, _
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_dpbusds_epi32(__U, __S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_maskz_dpbusds_epi32(
+    (__mmask8)0xFF,
+    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+    (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  4, 4, 4, 4, 4, 4, 4, 4));
 
 __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpbusds_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   return _mm256_dpbusds_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusds_epi32(
+    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+    ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  4, 4, 4, 4, 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusds_epi32(
+    ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusds_epi32(
+    ((__m256i)(__v8si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1}),
+    ((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
+    ((__m256i)(__v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m256i test_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_dpwssd_epi32
@@ -51,6 +127,13 @@ __m256i test_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_dpwssd_epi32(__S, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_mask_dpwssd_epi32(
+    (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+    (__mmask8)0xF0,
+    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  100, 200, 300, 400, 502, 602, 702, 802));
 
 __m256i test_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_dpwssd_epi32
@@ -58,12 +141,49 @@ __m256i test_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_dpwssd_epi32(__U, __S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_maskz_dpwssd_epi32(
+    (__mmask8)0x0F,
+    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  2, 2, 2, 2, 0, 0, 0, 0));
 
 __m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpwssd_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_dpwssd_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssd_epi32(
+    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssd_epi32(
+    ((__m256i)(__v8si){10, 10, 10, 10, 10, 10, 10, 10}),
+    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  12, 12, 12, 12, 12, 12, 12, 12));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssd_epi32(
+    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+    ((__m256i)(__v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}),
+    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  -2, -2, -2, -2, -2, -2, -2, -2));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssd_epi32(
+    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+    ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+    ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767})),
+  2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssd_epi32(
+    ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0}),
+    ((__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0})),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m256i test_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_dpwssds_epi32
@@ -71,6 +191,13 @@ __m256i test_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_dpwssds_epi32(__S, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_mask_dpwssds_epi32(
+    (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+    (__mmask8)0xAA,
+    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  100, 202, 300, 402, 500, 602, 700, 802));
 
 __m256i test_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_dpwssds_epi32
@@ -78,12 +205,37 @@ __m256i test_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, _
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_dpwssds_epi32(__U, __S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_maskz_dpwssds_epi32(
+    (__mmask8)0xFF,
+    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  2, 2, 2, 2, 2, 2, 2, 2));
 
 __m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpwssds_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_dpwssds_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssds_epi32(
+    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssds_epi32(
+    ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+    ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767})),
+  2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssds_epi32(
+    ((__m256i)(__v8si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1}),
+    ((__m256i)(__v16hi){-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768}),
+    ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767})),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_dpbusd_epi32
@@ -91,6 +243,13 @@ __m128i test_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_mask_dpbusd_epi32(__S, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_mask_dpbusd_epi32(
+    (__m128i)(__v4si){100, 200, 300, 400},
+    (__mmask8)0x05,
+    (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  104, 200, 304, 400));
 
 __m128i test_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_dpbusd_epi32
@@ -98,12 +257,43 @@ __m128i test_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m12
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_maskz_dpbusd_epi32(__U, __S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_maskz_dpbusd_epi32(
+    (__mmask8)0x03,
+    (__m128i)(__v4si){0, 0, 0, 0},
+    (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  4, 4, 0, 0));
 
 __m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpbusd_epi32
   // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_dpbusd_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusd_epi32(
+    ((__m128i)(__v4si){0, 0, 0, 0}),
+    ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  4, 4, 4, 4));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusd_epi32(
+    ((__m128i)(__v4si){10, 10, 10, 10}),
+    ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  14, 14, 14, 14));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusd_epi32(
+    ((__m128i)(__v4si){0, 0, 0, 0}),
+    ((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
+    ((__m128i)(__v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
+  -1020, -1020, -1020, -1020));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusd_epi32(
+    ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m128i)(__v16qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
+    ((__m128i)(__v16qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0})),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_dpbusds_epi32
@@ -111,6 +301,13 @@ __m128i test_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m12
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_mask_dpbusds_epi32(__S, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_mask_dpbusds_epi32(
+    (__m128i)(__v4si){100, 200, 300, 400},
+    (__mmask8)0x0A,
+    (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  100, 204, 300, 404));
 
 __m128i test_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_dpbusds_epi32
@@ -118,12 +315,37 @@ __m128i test_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m1
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_maskz_dpbusds_epi32(__U, __S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_maskz_dpbusds_epi32(
+    (__mmask8)0x0F,
+    (__m128i)(__v4si){0, 0, 0, 0},
+    (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  4, 4, 4, 4));
 
 __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpbusds_epi32
   // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_dpbusds_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusds_epi32(
+    ((__m128i)(__v4si){0, 0, 0, 0}),
+    ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  4, 4, 4, 4));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusds_epi32(
+    ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusds_epi32(
+    ((__m128i)(__v4si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1}),
+    ((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
+    ((__m128i)(__v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_dpwssd_epi32
@@ -131,6 +353,13 @@ __m128i test_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_mask_dpwssd_epi32(__S, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_mask_dpwssd_epi32(
+    (__m128i)(__v4si){100, 200, 300, 400},
+    (__mmask8)0x05,
+    (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
+    (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+  102, 200, 302, 400));
 
 __m128i test_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_dpwssd_epi32
@@ -138,12 +367,49 @@ __m128i test_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m12
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_maskz_dpwssd_epi32(__U, __S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_maskz_dpwssd_epi32(
+    (__mmask8)0x03,
+    (__m128i)(__v4si){0, 0, 0, 0},
+    (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
+    (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+  2, 2, 0, 0));
 
 __m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpwssd_epi32
   // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_dpwssd_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_epi32(
+    ((__m128i)(__v4si){0, 0, 0, 0}),
+    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
+  2, 2, 2, 2));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_epi32(
+    ((__m128i)(__v4si){10, 10, 10, 10}),
+    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
+  12, 12, 12, 12));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_epi32(
+    ((__m128i)(__v4si){0, 0, 0, 0}),
+    ((__m128i)(__v8hi){-1,-1,-1,-1,-1,-1,-1,-1}),
+    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
+  -2, -2, -2, -2));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_epi32(
+    ((__m128i)(__v4si){0, 0, 0, 0}),
+    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
+    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767})),
+  2147352578, 2147352578, 2147352578, 2147352578));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_epi32(
+    ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m128i)(__v8hi){1,0,1,0,1,0,1,0}),
+    ((__m128i)(__v8hi){1,0,1,0,1,0,1,0})),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_dpwssds_epi32
@@ -151,6 +417,13 @@ __m128i test_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m12
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_mask_dpwssds_epi32(__S, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_mask_dpwssds_epi32(
+    (__m128i)(__v4si){100, 200, 300, 400},
+    (__mmask8)0x0A,
+    (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
+    (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+  100, 202, 300, 402));
 
 __m128i test_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_dpwssds_epi32
@@ -158,10 +431,35 @@ __m128i test_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m1
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_maskz_dpwssds_epi32(__U, __S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_maskz_dpwssds_epi32(
+    (__mmask8)0x0F,
+    (__m128i)(__v4si){0, 0, 0, 0},
+    (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
+    (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+  2, 2, 2, 2));
 
 __m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpwssds_epi32
   // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_dpwssds_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssds_epi32(
+    ((__m128i)(__v4si){0, 0, 0, 0}),
+    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
+  2, 2, 2, 2));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssds_epi32(
+    ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
+    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767})),
+  2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssds_epi32(
+    ((__m128i)(__v4si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1}),
+    ((__m128i)(__v8hi){-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768}),
+    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767})),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
diff --git a/clang/test/CodeGen/X86/avx512vnni-builtins.c b/clang/test/CodeGen/X86/avx512vnni-builtins.c
index 6b8465206eedb..f8f663b48aa36 100644
--- a/clang/test/CodeGen/X86/avx512vnni-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vnni-builtins.c
@@ -3,7 +3,13 @@
 //  RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror | FileCheck %s
 //  RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror | FileCheck %s
 
+//  RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+//  RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+//  RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+//  RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 __m512i test_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_dpbusd_epi32
@@ -11,6 +17,13 @@ __m512i test_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_dpbusd_epi32(__S, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v16si(
+  _mm512_mask_dpbusd_epi32(
+    (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+    (__mmask16)0x5555,
+    (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  104, 200, 304, 400, 504, 600, 704, 800, 904, 1000, 1104, 1200, 1304, 1400, 1504, 1600));
 
 __m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_dpbusd_epi32
@@ -18,6 +31,13 @@ __m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, _
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_dpbusd_epi32(__U, __S, __A, __B);
 }
+TEST_CONSTEXPR(match_v16si(
+  _mm512_maskz_dpbusd_epi32(
+    (__mmask16)0x00FF,
+    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0));
 
 __m512i test_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_dpbusd_epi32
@@ -25,12 +45,47 @@ __m512i test_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) {
   return _mm512_dpbusd_epi32(__S, __A, __B);
 }
 
+TEST_CONSTEXPR(match_v16si(_mm512_dpbusd_epi32((__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+  (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+  (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4));
+
+TEST_CONSTEXPR(match_v16si(_mm512_dpbusd_epi32((__m512i)(__v16si){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+  (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+  (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5));
+
+TEST_CONSTEXPR(match_v16si(_mm512_dpbusd_epi32((__m512i)(__v16si){2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+  (__m512i)(__v64qu){1,2,3,4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+  (__m512i)(__v64qi){5,6,7,8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  72, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5));
+
+TEST_CONSTEXPR(match_v16si(_mm512_dpbusd_epi32((__m512i)(__v16si){2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+  (__m512i)(__v64qu){1,2,3,4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+  (__m512i)(__v64qi){5,6,7,8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  72, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5));
+
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpbusd_epi32(
+    (__m512i)(__v16si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+    (__m512i)(__v64qu){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0},
+    (__m512i)(__v64qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0}),
+  -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648, 
+   -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648));
+
 __m512i test_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_dpbusds_epi32
   // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}})
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_dpbusds_epi32(__S, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v16si(
+  _mm512_mask_dpbusds_epi32(
+    (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+    (__mmask16)0x5555,
+    (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  104, 200, 304, 400, 504, 600, 704, 800, 904, 1000, 1104, 1200, 1304, 1400, 1504, 1600));
 
 __m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_dpbusds_epi32
@@ -38,12 +93,37 @@ __m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A,
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_dpbusds_epi32(__U, __S, __A, __B);
 }
+TEST_CONSTEXPR(match_v16si(
+  _mm512_maskz_dpbusds_epi32(
+    (__mmask16)0x00FF,
+    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0));
 
 __m512i test_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_dpbusds_epi32
   // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}})
   return _mm512_dpbusds_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpbusds_epi32(
+    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpbusds_epi32(
+    (__m512i)(__v16si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+    (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpbusds_epi32(
+    (__m512i)(__v16si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1},
+    (__m512i)(__v64qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255},
+    (__m512i)(__v64qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m512i test_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_dpwssd_epi32
@@ -51,6 +131,13 @@ __m512i test_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_dpwssd_epi32(__S, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v16si(
+  _mm512_mask_dpwssd_epi32(
+    (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+    (__mmask16)0xFF00,
+    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  100, 200, 300, 400, 500, 600, 700, 800, 902, 1002, 1102, 1202, 1302, 1402, 1502, 1602));
 
 __m512i test_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_dpwssd_epi32
@@ -58,12 +145,49 @@ __m512i test_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, _
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_dpwssd_epi32(__U, __S, __A, __B);
 }
+TEST_CONSTEXPR(match_v16si(
+  _mm512_maskz_dpwssd_epi32(
+    (__mmask16)0x000F,
+    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
 
 __m512i test_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_dpwssd_epi32
   // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
   return _mm512_dpwssd_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpwssd_epi32(
+    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpwssd_epi32(
+    (__m512i)(__v16si){10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10},
+    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpwssd_epi32(
+    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    (__m512i)(__v32hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpwssd_epi32(
+    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    (__m512i)(__v32hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767},
+    (__m512i)(__v32hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+  2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpwssd_epi32(
+    (__m512i)(__v16si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+    (__m512i)(__v32hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0},
+    (__m512i)(__v32hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0}),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m512i test_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_dpwssds_epi32
@@ -71,6 +195,13 @@ __m512i test_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, _
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_dpwssds_epi32(__S, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v16si(
+  _mm512_mask_dpwssds_epi32(
+    (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+    (__mmask16)0xAAAA,
+    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  100, 202, 300, 402, 500, 602, 700, 802, 900, 1002, 1100, 1202, 1300, 1402, 1500, 1602));
 
 __m512i test_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_dpwssds_epi32
@@ -78,10 +209,35 @@ __m512i test_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A,
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_dpwssds_epi32(__U, __S, __A, __B);
 }
+TEST_CONSTEXPR(match_v16si(
+  _mm512_maskz_dpwssds_epi32(
+    (__mmask16)0xFFFF,
+    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2));
 
 __m512i test_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_dpwssds_epi32
   // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
   return _mm512_dpwssds_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpwssds_epi32(
+    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpwssds_epi32(
+    (__m512i)(__v16si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+    (__m512i)(__v32hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767},
+    (__m512i)(__v32hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+  2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpwssds_epi32(
+    (__m512i)(__v16si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1},
+    (__m512i)(__v32hi){-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768},
+    (__m512i)(__v32hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
diff --git a/clang/test/CodeGen/X86/avxvnni-builtins.c b/clang/test/CodeGen/X86/avxvnni-builtins.c
index 6557a26807eb2..7bf4d563f7ba2 100644
--- a/clang/test/CodeGen/X86/avxvnni-builtins.c
+++ b/clang/test/CodeGen/X86/avxvnni-builtins.c
@@ -3,100 +3,346 @@
 // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror | FileCheck %s
 // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror | FileCheck %s
 
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 __m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpbusd_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   return _mm256_dpbusd_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusd_epi32(
+    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+    ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  4, 4, 4, 4, 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusd_epi32(
+    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+    ((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
+    ((__m256i)(__v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
+  -1020, -1020, -1020, -1020, -1020, -1020, -1020, -1020));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusd_epi32(
+    ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m256i)(__v32qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
+    ((__m256i)(__v32qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0})),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpbusds_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   return _mm256_dpbusds_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusds_epi32(
+    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+    ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  4, 4, 4, 4, 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusds_epi32(
+    ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
 
 __m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpwssd_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_dpwssd_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssd_epi32(
+    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssd_epi32(
+    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+    ((__m256i)(__v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}),
+    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  -2, -2, -2, -2, -2, -2, -2, -2));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssd_epi32(
+    ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0}),
+    ((__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0})),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpwssds_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_dpwssds_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssds_epi32(
+    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssds_epi32(
+    ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+    ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767})),
+  2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
 
 __m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpbusd_epi32
   // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_dpbusd_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusd_epi32(
+    ((__m128i)(__v4si){0, 0, 0, 0}),
+    ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  4, 4, 4, 4));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusd_epi32(
+    ((__m128i)(__v4si){0, 0, 0, 0}),
+    ((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
+    ((__m128i)(__v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
+  -1020, -1020, -1020, -1020));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusd_epi32(
+    ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m128i)(__v16qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
+    ((__m128i)(__v16qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0})),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpbusds_epi32
   // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_dpbusds_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusds_epi32(
+    ((__m128i)(__v4si){0, 0, 0, 0}),
+    ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  4, 4, 4, 4));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusds_epi32(
+    ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+  2147483647, 2147483647, 2147483647, 2147483647));
 
 __m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpwssd_epi32
   // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_dpwssd_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_epi32(
+    ((__m128i)(__v4si){0, 0, 0, 0}),
+    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
+  2, 2, 2, 2));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_epi32(
+    ((__m128i)(__v4si){0, 0, 0, 0}),
+    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
+    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767})),
+  2147352578, 2147352578, 2147352578, 2147352578));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_epi32(
+    ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m128i)(__v8hi){1,0,1,0,1,0,1,0}),
+    ((__m128i)(__v8hi){1,0,1,0,1,0,1,0})),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpwssds_epi32
   // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_dpwssds_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssds_epi32(
+    ((__m128i)(__v4si){0, 0, 0, 0}),
+    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
+  2, 2, 2, 2));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssds_epi32(
+    ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
+    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767})),
+  2147483647, 2147483647, 2147483647, 2147483647));
 
 __m256i test_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpbusd_avx_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   return _mm256_dpbusd_avx_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusd_avx_epi32(
+    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+    (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  4, 4, 4, 4, 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusd_avx_epi32(
+    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+    (__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255},
+    (__m256i)(__v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}),
+  -1020, -1020, -1020, -1020, -1020, -1020, -1020, -1020));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusd_avx_epi32(
+    (__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+    (__m256i)(__v32qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0},
+    (__m256i)(__v32qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m256i test_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpbusds_avx_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   return _mm256_dpbusds_avx_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusds_avx_epi32(
+    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+    (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  4, 4, 4, 4, 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusds_avx_epi32(
+    (__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+    (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
 
 __m256i test_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpwssd_avx_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_dpwssd_avx_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssd_avx_epi32(
+    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssd_avx_epi32(
+    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+    (__m256i)(__v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  -2, -2, -2, -2, -2, -2, -2, -2));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssd_avx_epi32(
+    (__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+    (__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0},
+    (__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0}),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m256i test_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpwssds_avx_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_dpwssds_avx_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssds_avx_epi32(
+    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssds_avx_epi32(
+    (__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+    (__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767},
+    (__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+  2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
 
 __m128i test_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpbusd_avx_epi32
   // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_dpbusd_avx_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusd_avx_epi32(
+    (__m128i)(__v4si){0, 0, 0, 0},
+    (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  4, 4, 4, 4));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusd_avx_epi32(
+    (__m128i)(__v4si){0, 0, 0, 0},
+    (__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255},
+    (__m128i)(__v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}),
+  -1020, -1020, -1020, -1020));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusd_avx_epi32(
+    (__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647},
+    (__m128i)(__v16qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0},
+    (__m128i)(__v16qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpbusds_avx_epi32
   // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_dpbusds_avx_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusds_avx_epi32(
+    (__m128i)(__v4si){0, 0, 0, 0},
+    (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  4, 4, 4, 4));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusds_avx_epi32(
+    (__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647},
+    (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+    (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+  2147483647, 2147483647, 2147483647, 2147483647));
 
 __m128i test_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpwssd_avx_epi32
   // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_dpwssd_avx_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_avx_epi32(
+    (__m128i)(__v4si){0, 0, 0, 0},
+    (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
+    (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+  2, 2, 2, 2));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_avx_epi32(
+    (__m128i)(__v4si){0, 0, 0, 0},
+    (__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767},
+    (__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
+  2147352578, 2147352578, 2147352578, 2147352578));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_avx_epi32(
+    (__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647},
+    (__m128i)(__v8hi){1,0,1,0,1,0,1,0},
+    (__m128i)(__v8hi){1,0,1,0,1,0,1,0}),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpwssds_avx_epi32
   // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_dpwssds_avx_epi32(__S, __A, __B);
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssds_avx_epi32(
+    (__m128i)(__v4si){0, 0, 0, 0},
+    (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
+    (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+  2, 2, 2, 2));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssds_avx_epi32(
+    (__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647},
+    (__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767},
+    (__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
+  2147483647, 2147483647, 2147483647, 2147483647));

>From b5c0395f50523ba08b387498759323fadde4cccc Mon Sep 17 00:00:00 2001
From: Akash Deo <AkashDeo2025 at u.Northwestern.edu>
Date: Mon, 6 Apr 2026 20:03:21 -0500
Subject: [PATCH 02/11] incorporate first round of feedback except about
 type-casting

---
 clang/include/clang/Basic/BuiltinsX86.td | 26 ++--------
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 26 +++-------
 clang/lib/Headers/avx512vlvnniintrin.h   | 55 ++++++++++----------
 clang/lib/Headers/avx512vnniintrin.h     | 66 +++++++++++-------------
 clang/lib/Headers/avxvnniintrin.h        | 35 +++++++------
 5 files changed, 91 insertions(+), 117 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 483092d6274d8..f0112a2db0f1d 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1077,49 +1077,31 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto
 
 let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vpdpbusd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, char>)">;
-}
-
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
-  def vpdpbusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">;
-}
-
-let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
-  def vpdpbusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">;
-}
-
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vpdpbusds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, char>)">;
 }
 
 let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+  def vpdpbusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">;
   def vpdpbusds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">;
 }
 
 let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+  def vpdpbusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">;
   def vpdpbusds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">;
 }
 
 let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vpdpwssd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, short>)">;
-}
-
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
-  def vpdpwssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">;
-}
-
-let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
-  def vpdpwssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">;
-}
-
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vpdpwssds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, short>)">;
 }
 
 let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+  def vpdpwssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">;
   def vpdpwssds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">;
 }
 
 let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+  def vpdpwssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">;
   def vpdpwssds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">;
 }
 
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 2b018ec590553..eaf37c26ec75e 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4460,11 +4460,11 @@ static bool interp__builtin_ia32_vpdp(InterpState &S, CodePtr OpPC,
   const Pointer &SrcPtr = S.Stk.pop<Pointer>();
   const Pointer &Dst = S.Stk.peek<Pointer>();
 
-  for (unsigned I = 0; I < NumElements; ++I) {
+  for (unsigned I = 0; I != NumElements; ++I) {
     APSInt Acc;
     INT_TYPE_SWITCH_NO_BOOL(SrcElemT, { Acc = SrcPtr.elem<T>(I).toAPSInt(); });
     Acc = Acc.sext(64);
-    for (unsigned J = 0; J < Iters; ++J) {
+    for (unsigned J = 0; J != Iters; ++J) {
       APSInt OpA, OpB;
       INT_TYPE_SWITCH_NO_BOOL(
           OpAElemT, { OpA = OpAPtr.elem<T>(Iters * I + J).toAPSInt(); });
@@ -6563,36 +6563,26 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_vpdpbusd256:
   case X86::BI__builtin_ia32_vpdpbusd512: {
     unsigned BuiltinID = Call->getBuiltinCallee();
-    bool IsDottingWord;
-    bool IsSaturating;
     switch (BuiltinID) {
     case X86::BI__builtin_ia32_vpdpwssd128:
     case X86::BI__builtin_ia32_vpdpwssd256:
     case X86::BI__builtin_ia32_vpdpwssd512:
-      IsDottingWord = true;
-      IsSaturating = false;
-      break;
+      return interp__builtin_ia32_vpdp(S, OpPC, Call, true, false);
     case X86::BI__builtin_ia32_vpdpwssds128:
     case X86::BI__builtin_ia32_vpdpwssds256:
     case X86::BI__builtin_ia32_vpdpwssds512:
-      IsDottingWord = true;
-      IsSaturating = true;
-      break;
+      return interp__builtin_ia32_vpdp(S, OpPC, Call, true, true);
+
     case X86::BI__builtin_ia32_vpdpbusds128:
     case X86::BI__builtin_ia32_vpdpbusds256:
     case X86::BI__builtin_ia32_vpdpbusds512:
-      IsDottingWord = false;
-      IsSaturating = true;
-      break;
+      return interp__builtin_ia32_vpdp(S, OpPC, Call, false, true);
+
     case X86::BI__builtin_ia32_vpdpbusd128:
     case X86::BI__builtin_ia32_vpdpbusd256:
     case X86::BI__builtin_ia32_vpdpbusd512:
-      IsDottingWord = false;
-      IsSaturating = false;
-      break;
+      return interp__builtin_ia32_vpdp(S, OpPC, Call, false, false);
     }
-    return interp__builtin_ia32_vpdp(S, OpPC, Call, IsDottingWord,
-                                     IsSaturating);
   }
 
   default:
diff --git a/clang/lib/Headers/avx512vlvnniintrin.h b/clang/lib/Headers/avx512vlvnniintrin.h
index 053807032fcb3..8dfcab3f01606 100644
--- a/clang/lib/Headers/avx512vlvnniintrin.h
+++ b/clang/lib/Headers/avx512vlvnniintrin.h
@@ -15,6 +15,16 @@
 #define __AVX512VLVNNIINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512vl,avx512vnni"),                            \
+                 __min_vector_width__(128))) constexpr
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512vl,avx512vnni"),                            \
+                 __min_vector_width__(256))) constexpr
+#else
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
                  __target__("avx512vl,avx512vnni"),                            \
@@ -23,13 +33,6 @@
   __attribute__((__always_inline__, __nodebug__,                               \
                  __target__("avx512vl,avx512vnni"),                            \
                  __min_vector_width__(256)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
-#else
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
 #endif
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
@@ -187,114 +190,112 @@
 #define _mm_dpwssds_epi32(S, A, B)                                             \
   ((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v8hi)(A), (__v8hi)(B)))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                      (__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
                                      (__v8si)__S);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                      (__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
                                      (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                     (__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
                                     (__v8si)__S);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A,
-                           __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbusds_epi32(
+    __mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                      (__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
                                      (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                      (__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
                                      (__v8si)__S);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                      (__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
                                      (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                     (__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
                                     (__v8si)__S);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A,
-                           __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwssds_epi32(
+    __mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256(__U,
                                     (__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
                                     (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                         (__v4si)_mm_dpbusd_epi32(__S, __A, __B),
                                         (__v4si)__S);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                         (__v4si)_mm_dpbusd_epi32(__S, __A, __B),
                                         (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                        (__v4si)_mm_dpbusds_epi32(__S, __A, __B),
                                        (__v4si)__S);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                        (__v4si)_mm_dpbusds_epi32(__S, __A, __B),
                                        (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                         (__v4si)_mm_dpwssd_epi32(__S, __A, __B),
                                         (__v4si)__S);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                         (__v4si)_mm_dpwssd_epi32(__S, __A, __B),
                                         (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                        (__v4si)_mm_dpwssds_epi32(__S, __A, __B),
                                        (__v4si)__S);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128(__U,
                                        (__v4si)_mm_dpwssds_epi32(__S, __A, __B),
diff --git a/clang/lib/Headers/avx512vnniintrin.h b/clang/lib/Headers/avx512vnniintrin.h
index 1aa431ed446b2..1e245292ab869 100644
--- a/clang/lib/Headers/avx512vnniintrin.h
+++ b/clang/lib/Headers/avx512vnniintrin.h
@@ -15,103 +15,99 @@
 #define __AVX512VNNIINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"),     \
-                 __min_vector_width__(512)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+                 __min_vector_width__(512))) constexpr
 #else
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"),     \
+                 __min_vector_width__(512)))
 #endif
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbusd_epi32(__m512i __S,
+                                                                 __m512i __A,
+                                                                 __m512i __B) {
   return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v64qu)__A,
                                              (__v64qi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                     (__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
                                     (__v16si)__S);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A,
-                          __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbusd_epi32(
+    __mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                     (__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
                                     (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbusds_epi32(__m512i __S,
+                                                                  __m512i __A,
+                                                                  __m512i __B) {
   return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v64qu)__A,
                                               (__v64qi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A,
-                          __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbusds_epi32(
+    __m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                    (__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
                                    (__v16si)__S);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A,
-                           __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbusds_epi32(
+    __mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                    (__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
                                    (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwssd_epi32(__m512i __S,
+                                                                 __m512i __A,
+                                                                 __m512i __B) {
   return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v32hi)__A,
                                              (__v32hi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                     (__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
                                     (__v16si)__S);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A,
-                          __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwssd_epi32(
+    __mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                     (__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
                                     (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwssds_epi32(__m512i __S,
+                                                                  __m512i __A,
+                                                                  __m512i __B) {
   return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v32hi)__A,
                                               (__v32hi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A,
-                          __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwssds_epi32(
+    __m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                    (__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
                                    (__v16si)__S);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A,
-                           __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwssds_epi32(
+    __mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                    (__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
                                    (__v16si)_mm512_setzero_si512());
 }
 
 #undef __DEFAULT_FN_ATTRS
-#undef __DEFAULT_FN_ATTRS_CONSTEXPR
-
 #endif
diff --git a/clang/lib/Headers/avxvnniintrin.h b/clang/lib/Headers/avxvnniintrin.h
index ee82676fcb392..c92790b69ffbe 100644
--- a/clang/lib/Headers/avxvnniintrin.h
+++ b/clang/lib/Headers/avxvnniintrin.h
@@ -40,15 +40,20 @@
 
 /* Intrinsics with _avx_ prefix are for compatibility with msvc. */
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(256)))
-#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(128)))
-
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"),        \
+                 __min_vector_width__(256))) constexpr
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"),        \
+                 __min_vector_width__(128))) constexpr
 #else
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"),        \
+                 __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"),        \
+                 __min_vector_width__(128)))
 #endif
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
@@ -68,7 +73,7 @@
 ///    ENDFOR
 ///    DST[MAX:256] := 0
 /// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v32qu)__A,
                                              (__v32qi)__B);
@@ -91,7 +96,7 @@ _mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
 ///    ENDFOR
 ///    DST[MAX:256] := 0
 /// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v32qu)__A,
                                               (__v32qi)__B);
@@ -112,7 +117,7 @@ _mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
 ///    ENDFOR
 ///    DST[MAX:256] := 0
 /// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v16hi)__A,
                                              (__v16hi)__B);
@@ -133,7 +138,7 @@ _mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
 ///    ENDFOR
 ///    DST[MAX:256] := 0
 /// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v16hi)__A,
                                               (__v16hi)__B);
@@ -156,7 +161,7 @@ _mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
 ///    ENDFOR
 ///    DST[MAX:128] := 0
 /// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v16qu)__A,
                                              (__v16qi)__B);
@@ -179,7 +184,7 @@ _mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
 ///    ENDFOR
 ///    DST[MAX:128] := 0
 /// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v16qu)__A,
                                               (__v16qi)__B);
@@ -200,7 +205,7 @@ _mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
 ///    ENDFOR
 ///    DST[MAX:128] := 0
 /// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v8hi)__A,
                                              (__v8hi)__B);
@@ -221,7 +226,7 @@ _mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
 ///    ENDFOR
 ///    DST[MAX:128] := 0
 /// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v8hi)__A,
                                               (__v8hi)__B);

>From 565e2f7d87ae08419b18627e864ff62d14e832f7 Mon Sep 17 00:00:00 2001
From: Akash Deo <AkashDeo2025 at u.Northwestern.edu>
Date: Mon, 6 Apr 2026 21:56:08 -0500
Subject: [PATCH 03/11] simplify interpreter to try addressing tbaederr's
 concerns

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 52 ++++++++++++------------
 clang/lib/AST/ExprConstant.cpp           | 42 +++++++------------
 2 files changed, 40 insertions(+), 54 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index eaf37c26ec75e..5876a494f27b1 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4442,45 +4442,49 @@ static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC,
 }
 
 static bool interp__builtin_ia32_vpdp(InterpState &S, CodePtr OpPC,
-                                      const CallExpr *Call, bool IsDottingWord,
+                                      const CallExpr *Call,
                                       bool IsSaturating) {
   const auto *SrcVecT = Call->getArg(0)->getType()->castAs<VectorType>();
   const auto *OpAVecT = Call->getArg(1)->getType()->castAs<VectorType>();
   const auto *OpBVecT = Call->getArg(2)->getType()->castAs<VectorType>();
 
+  assert(OpAVecT->getNumElements() == OpBVecT->getNumElements());
+
+  unsigned NumSrcElts = SrcVecT->getNumElements();
+  unsigned NumOperandElts = OpAVecT->getNumElements();
+  unsigned EltsPerLane = NumOperandElts / NumSrcElts;
+
   PrimType SrcElemT = *S.getContext().classify(SrcVecT->getElementType());
   PrimType OpAElemT = *S.getContext().classify(OpAVecT->getElementType());
   PrimType OpBElemT = *S.getContext().classify(OpBVecT->getElementType());
 
-  unsigned NumElements = SrcVecT->getNumElements();
-  unsigned Iters = IsDottingWord ? 2 : 4;
 
   const Pointer &OpBPtr = S.Stk.pop<Pointer>();
   const Pointer &OpAPtr = S.Stk.pop<Pointer>();
   const Pointer &SrcPtr = S.Stk.pop<Pointer>();
   const Pointer &Dst = S.Stk.peek<Pointer>();
 
-  for (unsigned I = 0; I != NumElements; ++I) {
+  for (unsigned I = 0; I != NumSrcElts; ++I) {
     APSInt Acc;
     INT_TYPE_SWITCH_NO_BOOL(SrcElemT, { Acc = SrcPtr.elem<T>(I).toAPSInt(); });
     Acc = Acc.sext(64);
-    for (unsigned J = 0; J != Iters; ++J) {
+    for (unsigned J = 0; J != EltsPerLane; ++J) {
       APSInt OpA, OpB;
-      INT_TYPE_SWITCH_NO_BOOL(
-          OpAElemT, { OpA = OpAPtr.elem<T>(Iters * I + J).toAPSInt(); });
-      INT_TYPE_SWITCH_NO_BOOL(
-          OpBElemT, { OpB = OpBPtr.elem<T>(Iters * I + J).toAPSInt(); });
-      if (IsDottingWord) {
-        OpA = APSInt(OpA.sext(64), false);
-      } else {
-        OpA = APSInt(OpA.zext(64), false);
-      }
-      OpB = APSInt(OpB.sext(64), false);
+      INT_TYPE_SWITCH_NO_BOOL(OpAElemT, {
+        OpA = OpAPtr.elem<T>(EltsPerLane * I + J).toAPSInt();
+        });
+      INT_TYPE_SWITCH_NO_BOOL(OpBElemT, {
+		OpB = OpBPtr.elem<T>(EltsPerLane * I + J).toAPSInt();
+		});
+	  OpA = APSInt(OpA.extend(64), false);
+      OpB = APSInt(OpB.extend(64), false);
       Acc += OpA * OpB;
     }
     if (IsSaturating) {
       Acc = APSInt(Acc.truncSSat(32), false);
-    }
+    } else {
+	  Acc = APSInt(Acc.trunc(32), false);
+	}
     INT_TYPE_SWITCH_NO_BOOL(SrcElemT,
                             { Dst.elem<T>(I) = static_cast<T>(Acc); });
   }
@@ -6567,21 +6571,17 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
     case X86::BI__builtin_ia32_vpdpwssd128:
     case X86::BI__builtin_ia32_vpdpwssd256:
     case X86::BI__builtin_ia32_vpdpwssd512:
-      return interp__builtin_ia32_vpdp(S, OpPC, Call, true, false);
+	case X86::BI__builtin_ia32_vpdpbusd128:
+    case X86::BI__builtin_ia32_vpdpbusd256:
+    case X86::BI__builtin_ia32_vpdpbusd512:
+      return interp__builtin_ia32_vpdp(S, OpPC, Call, false);
     case X86::BI__builtin_ia32_vpdpwssds128:
     case X86::BI__builtin_ia32_vpdpwssds256:
     case X86::BI__builtin_ia32_vpdpwssds512:
-      return interp__builtin_ia32_vpdp(S, OpPC, Call, true, true);
-
-    case X86::BI__builtin_ia32_vpdpbusds128:
+	case X86::BI__builtin_ia32_vpdpbusds128:
     case X86::BI__builtin_ia32_vpdpbusds256:
     case X86::BI__builtin_ia32_vpdpbusds512:
-      return interp__builtin_ia32_vpdp(S, OpPC, Call, false, true);
-
-    case X86::BI__builtin_ia32_vpdpbusd128:
-    case X86::BI__builtin_ia32_vpdpbusd256:
-    case X86::BI__builtin_ia32_vpdpbusd512:
-      return interp__builtin_ia32_vpdp(S, OpPC, Call, false, false);
+      return interp__builtin_ia32_vpdp(S, OpPC, Call, true);
     }
   }
 
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 0d1d5a9022a8f..c10b723c06b3e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14784,35 +14784,25 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
   case X86::BI__builtin_ia32_vpdpbusd256:
   case X86::BI__builtin_ia32_vpdpbusd512: {
     unsigned BuiltinID = E->getBuiltinCallee();
-    bool IsDottingWord = false;
     bool IsSaturating = false;
     switch (BuiltinID) {
     case X86::BI__builtin_ia32_vpdpwssd128:
     case X86::BI__builtin_ia32_vpdpwssd256:
     case X86::BI__builtin_ia32_vpdpwssd512:
-      IsDottingWord = true;
+	case X86::BI__builtin_ia32_vpdpbusd128:
+    case X86::BI__builtin_ia32_vpdpbusd256:
+    case X86::BI__builtin_ia32_vpdpbusd512:
       IsSaturating = false;
       break;
     case X86::BI__builtin_ia32_vpdpwssds128:
     case X86::BI__builtin_ia32_vpdpwssds256:
     case X86::BI__builtin_ia32_vpdpwssds512:
-      IsDottingWord = true;
-      IsSaturating = true;
-      break;
     case X86::BI__builtin_ia32_vpdpbusds128:
     case X86::BI__builtin_ia32_vpdpbusds256:
     case X86::BI__builtin_ia32_vpdpbusds512:
-      IsDottingWord = false;
       IsSaturating = true;
       break;
-    case X86::BI__builtin_ia32_vpdpbusd128:
-    case X86::BI__builtin_ia32_vpdpbusd256:
-    case X86::BI__builtin_ia32_vpdpbusd512:
-      IsDottingWord = false;
-      IsSaturating = false;
-      break;
     }
-
     APValue Source, OperandA, OperandB;
     if (!EvaluateAsRValue(Info, E->getArg(0), Source) ||
         !EvaluateAsRValue(Info, E->getArg(1), OperandA) ||
@@ -14820,25 +14810,21 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
       return false;
     }
 
-    unsigned NumElements = Source.getVectorLength();
+    unsigned NumSrcElts = Source.getVectorLength();
+    unsigned NumOperandElts = OperandA.getVectorLength();
+	unsigned EltsPerLane = NumOperandElts / NumSrcElts;
+
+	assert(OperandA.getVectorLength() == OperandB.getVectorLength());
 
     SmallVector<APValue, 16> Result;
-    Result.reserve(NumElements);
-    unsigned Iters = IsDottingWord ? 2 : 4;
-    for (unsigned I = 0; I < NumElements; ++I) {
+    Result.reserve(NumSrcElts);
+    for (unsigned I = 0; I != NumSrcElts; ++I) {
       APSInt DotProduct = Source.getVectorElt(I).getInt();
-      DotProduct = DotProduct.sext(64);
-      for (unsigned J = 0; J < Iters; ++J) {
-        APSInt OpA;
-        if (IsDottingWord) {
-          OpA = APSInt(OperandA.getVectorElt(Iters * I + J).getInt().sext(64),
-                       false);
-        } else {
-          OpA = APSInt(OperandA.getVectorElt(Iters * I + J).getInt().zext(64),
-                       false);
-        }
+      DotProduct = DotProduct.extend(64);
+      for (unsigned J = 0; J != EltsPerLane; ++J) {
+        APSInt OpA = APSInt(OperandA.getVectorElt(EltsPerLane * I + J).getInt().extend(64), false);
         APSInt OpB = APSInt(
-            OperandB.getVectorElt(Iters * I + J).getInt().sext(64), false);
+            OperandB.getVectorElt(EltsPerLane * I + J).getInt().extend(64), false);
         DotProduct += OpA * OpB;
       }
       if (IsSaturating) {

>From 533b4d86e4907bc15b75813e642fdcb73b9f12ef Mon Sep 17 00:00:00 2001
From: Akash Deo <AkashDeo2025 at u.Northwestern.edu>
Date: Mon, 6 Apr 2026 22:03:40 -0500
Subject: [PATCH 04/11] delete stray line

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 5876a494f27b1..22c37b67176e4 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4458,7 +4458,6 @@ static bool interp__builtin_ia32_vpdp(InterpState &S, CodePtr OpPC,
   PrimType OpAElemT = *S.getContext().classify(OpAVecT->getElementType());
   PrimType OpBElemT = *S.getContext().classify(OpBVecT->getElementType());
 
-
   const Pointer &OpBPtr = S.Stk.pop<Pointer>();
   const Pointer &OpAPtr = S.Stk.pop<Pointer>();
   const Pointer &SrcPtr = S.Stk.pop<Pointer>();

>From e26e1959cb70b4d337c81e7adb0f6e9e77ca960d Mon Sep 17 00:00:00 2001
From: Akash Deo <AkashDeo2025 at u.Northwestern.edu>
Date: Tue, 7 Apr 2026 11:11:49 -0500
Subject: [PATCH 05/11] we check and reject the case where src and dst are
 different types

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 35 ++++++++++++------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 22c37b67176e4..bea20ec01b968 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4447,44 +4447,45 @@ static bool interp__builtin_ia32_vpdp(InterpState &S, CodePtr OpPC,
   const auto *SrcVecT = Call->getArg(0)->getType()->castAs<VectorType>();
   const auto *OpAVecT = Call->getArg(1)->getType()->castAs<VectorType>();
   const auto *OpBVecT = Call->getArg(2)->getType()->castAs<VectorType>();
+  const auto *DstVecT = Call->getType()->castAs<VectorType>();
 
   assert(OpAVecT->getNumElements() == OpBVecT->getNumElements());
 
-  unsigned NumSrcElts = SrcVecT->getNumElements();
-  unsigned NumOperandElts = OpAVecT->getNumElements();
-  unsigned EltsPerLane = NumOperandElts / NumSrcElts;
+  unsigned NumSrcElems = SrcVecT->getNumElements();
+  unsigned NumOperandElems = OpAVecT->getNumElements();
+  unsigned ElemsPerLane = NumOperandElems / NumSrcElems;
 
   PrimType SrcElemT = *S.getContext().classify(SrcVecT->getElementType());
   PrimType OpAElemT = *S.getContext().classify(OpAVecT->getElementType());
   PrimType OpBElemT = *S.getContext().classify(OpBVecT->getElementType());
+  PrimType DstElemT = *S.getContext().classify(DstVecT->getElementType());
+
+  assert(SrcElemT == DstElemT);
 
   const Pointer &OpBPtr = S.Stk.pop<Pointer>();
   const Pointer &OpAPtr = S.Stk.pop<Pointer>();
   const Pointer &SrcPtr = S.Stk.pop<Pointer>();
   const Pointer &Dst = S.Stk.peek<Pointer>();
 
-  for (unsigned I = 0; I != NumSrcElts; ++I) {
+  for (unsigned I = 0; I != NumSrcElems; ++I) {
     APSInt Acc;
     INT_TYPE_SWITCH_NO_BOOL(SrcElemT, { Acc = SrcPtr.elem<T>(I).toAPSInt(); });
     Acc = Acc.sext(64);
-    for (unsigned J = 0; J != EltsPerLane; ++J) {
+    for (unsigned J = 0; J != ElemsPerLane; ++J) {
       APSInt OpA, OpB;
-      INT_TYPE_SWITCH_NO_BOOL(OpAElemT, {
-        OpA = OpAPtr.elem<T>(EltsPerLane * I + J).toAPSInt();
-        });
-      INT_TYPE_SWITCH_NO_BOOL(OpBElemT, {
-		OpB = OpBPtr.elem<T>(EltsPerLane * I + J).toAPSInt();
-		});
-	  OpA = APSInt(OpA.extend(64), false);
+      INT_TYPE_SWITCH_NO_BOOL(
+          OpAElemT, { OpA = OpAPtr.elem<T>(ElemsPerLane * I + J).toAPSInt(); });
+      INT_TYPE_SWITCH_NO_BOOL(
+          OpBElemT, { OpB = OpBPtr.elem<T>(ElemsPerLane * I + J).toAPSInt(); });
+      OpA = APSInt(OpA.extend(64), false);
       OpB = APSInt(OpB.extend(64), false);
       Acc += OpA * OpB;
     }
-    if (IsSaturating) {
+    if (IsSaturating)
       Acc = APSInt(Acc.truncSSat(32), false);
-    } else {
-	  Acc = APSInt(Acc.trunc(32), false);
-	}
-    INT_TYPE_SWITCH_NO_BOOL(SrcElemT,
+    else
+      Acc = APSInt(Acc.trunc(32), false);
+    INT_TYPE_SWITCH_NO_BOOL(DstElemT,
                             { Dst.elem<T>(I) = static_cast<T>(Acc); });
   }
   Dst.initializeAllElements();

>From 6237bfeea12cd27e218ba0e38162c4b161396f3c Mon Sep 17 00:00:00 2001
From: Akash Deo <AkashDeo2025 at u.Northwestern.edu>
Date: Tue, 7 Apr 2026 12:25:43 -0500
Subject: [PATCH 06/11] remove nested switch in interpbuiltin.cpp

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 30 +++++-------------------
 clang/lib/AST/ExprConstant.cpp           | 13 ++++++----
 2 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index bea20ec01b968..fceb7dc4cc2f4 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4442,8 +4442,7 @@ static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC,
 }
 
 static bool interp__builtin_ia32_vpdp(InterpState &S, CodePtr OpPC,
-                                      const CallExpr *Call,
-                                      bool IsSaturating) {
+                                      const CallExpr *Call, bool IsSaturating) {
   const auto *SrcVecT = Call->getArg(0)->getType()->castAs<VectorType>();
   const auto *OpAVecT = Call->getArg(1)->getType()->castAs<VectorType>();
   const auto *OpBVecT = Call->getArg(2)->getType()->castAs<VectorType>();
@@ -6557,34 +6556,17 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_vpdpwssd128:
   case X86::BI__builtin_ia32_vpdpwssd256:
   case X86::BI__builtin_ia32_vpdpwssd512:
+  case X86::BI__builtin_ia32_vpdpbusd128:
+  case X86::BI__builtin_ia32_vpdpbusd256:
+  case X86::BI__builtin_ia32_vpdpbusd512:
+    return interp__builtin_ia32_vpdp(S, OpPC, Call, false);
   case X86::BI__builtin_ia32_vpdpwssds128:
   case X86::BI__builtin_ia32_vpdpwssds256:
   case X86::BI__builtin_ia32_vpdpwssds512:
   case X86::BI__builtin_ia32_vpdpbusds128:
   case X86::BI__builtin_ia32_vpdpbusds256:
   case X86::BI__builtin_ia32_vpdpbusds512:
-  case X86::BI__builtin_ia32_vpdpbusd128:
-  case X86::BI__builtin_ia32_vpdpbusd256:
-  case X86::BI__builtin_ia32_vpdpbusd512: {
-    unsigned BuiltinID = Call->getBuiltinCallee();
-    switch (BuiltinID) {
-    case X86::BI__builtin_ia32_vpdpwssd128:
-    case X86::BI__builtin_ia32_vpdpwssd256:
-    case X86::BI__builtin_ia32_vpdpwssd512:
-	case X86::BI__builtin_ia32_vpdpbusd128:
-    case X86::BI__builtin_ia32_vpdpbusd256:
-    case X86::BI__builtin_ia32_vpdpbusd512:
-      return interp__builtin_ia32_vpdp(S, OpPC, Call, false);
-    case X86::BI__builtin_ia32_vpdpwssds128:
-    case X86::BI__builtin_ia32_vpdpwssds256:
-    case X86::BI__builtin_ia32_vpdpwssds512:
-	case X86::BI__builtin_ia32_vpdpbusds128:
-    case X86::BI__builtin_ia32_vpdpbusds256:
-    case X86::BI__builtin_ia32_vpdpbusds512:
-      return interp__builtin_ia32_vpdp(S, OpPC, Call, true);
-    }
-  }
-
+    return interp__builtin_ia32_vpdp(S, OpPC, Call, true);
   default:
     S.FFDiag(S.Current->getLocation(OpPC),
              diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index c10b723c06b3e..d5a1b1d2dc43a 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14789,7 +14789,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
     case X86::BI__builtin_ia32_vpdpwssd128:
     case X86::BI__builtin_ia32_vpdpwssd256:
     case X86::BI__builtin_ia32_vpdpwssd512:
-	case X86::BI__builtin_ia32_vpdpbusd128:
+    case X86::BI__builtin_ia32_vpdpbusd128:
     case X86::BI__builtin_ia32_vpdpbusd256:
     case X86::BI__builtin_ia32_vpdpbusd512:
       IsSaturating = false;
@@ -14812,9 +14812,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
 
     unsigned NumSrcElts = Source.getVectorLength();
     unsigned NumOperandElts = OperandA.getVectorLength();
-	unsigned EltsPerLane = NumOperandElts / NumSrcElts;
+    unsigned EltsPerLane = NumOperandElts / NumSrcElts;
 
-	assert(OperandA.getVectorLength() == OperandB.getVectorLength());
+    assert(OperandA.getVectorLength() == OperandB.getVectorLength());
 
     SmallVector<APValue, 16> Result;
     Result.reserve(NumSrcElts);
@@ -14822,9 +14822,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
       APSInt DotProduct = Source.getVectorElt(I).getInt();
       DotProduct = DotProduct.extend(64);
       for (unsigned J = 0; J != EltsPerLane; ++J) {
-        APSInt OpA = APSInt(OperandA.getVectorElt(EltsPerLane * I + J).getInt().extend(64), false);
+        APSInt OpA = APSInt(
+            OperandA.getVectorElt(EltsPerLane * I + J).getInt().extend(64),
+            false);
         APSInt OpB = APSInt(
-            OperandB.getVectorElt(EltsPerLane * I + J).getInt().extend(64), false);
+            OperandB.getVectorElt(EltsPerLane * I + J).getInt().extend(64),
+            false);
         DotProduct += OpA * OpB;
       }
       if (IsSaturating) {

>From 93974c786d25702d1983f1f417ef32dad4f82751 Mon Sep 17 00:00:00 2001
From: Akash Deo <AkashDeo2025 at u.Northwestern.edu>
Date: Tue, 7 Apr 2026 13:30:28 -0500
Subject: [PATCH 07/11] remove nested switch in exprconstant.cpp

---
 clang/lib/AST/ExprConstant.cpp | 104 ++++++++++++++-------------------
 1 file changed, 44 insertions(+), 60 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index d5a1b1d2dc43a..a911c775f0c74 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12407,6 +12407,45 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
     return Success(V, E);
   };
 
+  auto EvalVectorDotProduct = [&](bool IsSaturating) -> bool {
+    APValue Source, OperandA, OperandB;
+    if (!EvaluateAsRValue(Info, E->getArg(0), Source) ||
+        !EvaluateAsRValue(Info, E->getArg(1), OperandA) ||
+        !EvaluateAsRValue(Info, E->getArg(2), OperandB)) {
+      return false;
+    }
+
+    unsigned NumSrcElems = Source.getVectorLength();
+    unsigned NumOperandElems = OperandA.getVectorLength();
+    unsigned ElemsPerLane = NumOperandElems / NumSrcElems;
+
+    assert(OperandA.getVectorLength() == OperandB.getVectorLength());
+
+    SmallVector<APValue, 16> Result;
+    Result.reserve(NumSrcElems);
+    for (unsigned I = 0; I != NumSrcElems; ++I) {
+      APSInt DotProduct = Source.getVectorElt(I).getInt();
+      DotProduct = DotProduct.extend(64);
+      for (unsigned J = 0; J != ElemsPerLane; ++J) {
+        APSInt OpA = APSInt(
+            OperandA.getVectorElt(ElemsPerLane * I + J).getInt().extend(64),
+            false);
+        APSInt OpB = APSInt(
+            OperandB.getVectorElt(ElemsPerLane * I + J).getInt().extend(64),
+            false);
+        DotProduct += OpA * OpB;
+      }
+      if (IsSaturating) {
+        DotProduct = APSInt(DotProduct.truncSSat(32), false);
+      } else {
+        DotProduct = APSInt(DotProduct.trunc(32), false);
+      }
+      Result.push_back(APValue(DotProduct));
+    }
+
+    return Success(APValue(Result.data(), Result.size()), E);
+  };
+
   switch (E->getBuiltinCallee()) {
   default:
     return false;
@@ -14774,72 +14813,17 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
   case X86::BI__builtin_ia32_vpdpwssd128:
   case X86::BI__builtin_ia32_vpdpwssd256:
   case X86::BI__builtin_ia32_vpdpwssd512:
+  case X86::BI__builtin_ia32_vpdpbusd128:
+  case X86::BI__builtin_ia32_vpdpbusd256:
+  case X86::BI__builtin_ia32_vpdpbusd512:
+    return EvalVectorDotProduct(false);
   case X86::BI__builtin_ia32_vpdpwssds128:
   case X86::BI__builtin_ia32_vpdpwssds256:
   case X86::BI__builtin_ia32_vpdpwssds512:
   case X86::BI__builtin_ia32_vpdpbusds128:
   case X86::BI__builtin_ia32_vpdpbusds256:
   case X86::BI__builtin_ia32_vpdpbusds512:
-  case X86::BI__builtin_ia32_vpdpbusd128:
-  case X86::BI__builtin_ia32_vpdpbusd256:
-  case X86::BI__builtin_ia32_vpdpbusd512: {
-    unsigned BuiltinID = E->getBuiltinCallee();
-    bool IsSaturating = false;
-    switch (BuiltinID) {
-    case X86::BI__builtin_ia32_vpdpwssd128:
-    case X86::BI__builtin_ia32_vpdpwssd256:
-    case X86::BI__builtin_ia32_vpdpwssd512:
-    case X86::BI__builtin_ia32_vpdpbusd128:
-    case X86::BI__builtin_ia32_vpdpbusd256:
-    case X86::BI__builtin_ia32_vpdpbusd512:
-      IsSaturating = false;
-      break;
-    case X86::BI__builtin_ia32_vpdpwssds128:
-    case X86::BI__builtin_ia32_vpdpwssds256:
-    case X86::BI__builtin_ia32_vpdpwssds512:
-    case X86::BI__builtin_ia32_vpdpbusds128:
-    case X86::BI__builtin_ia32_vpdpbusds256:
-    case X86::BI__builtin_ia32_vpdpbusds512:
-      IsSaturating = true;
-      break;
-    }
-    APValue Source, OperandA, OperandB;
-    if (!EvaluateAsRValue(Info, E->getArg(0), Source) ||
-        !EvaluateAsRValue(Info, E->getArg(1), OperandA) ||
-        !EvaluateAsRValue(Info, E->getArg(2), OperandB)) {
-      return false;
-    }
-
-    unsigned NumSrcElts = Source.getVectorLength();
-    unsigned NumOperandElts = OperandA.getVectorLength();
-    unsigned EltsPerLane = NumOperandElts / NumSrcElts;
-
-    assert(OperandA.getVectorLength() == OperandB.getVectorLength());
-
-    SmallVector<APValue, 16> Result;
-    Result.reserve(NumSrcElts);
-    for (unsigned I = 0; I != NumSrcElts; ++I) {
-      APSInt DotProduct = Source.getVectorElt(I).getInt();
-      DotProduct = DotProduct.extend(64);
-      for (unsigned J = 0; J != EltsPerLane; ++J) {
-        APSInt OpA = APSInt(
-            OperandA.getVectorElt(EltsPerLane * I + J).getInt().extend(64),
-            false);
-        APSInt OpB = APSInt(
-            OperandB.getVectorElt(EltsPerLane * I + J).getInt().extend(64),
-            false);
-        DotProduct += OpA * OpB;
-      }
-      if (IsSaturating) {
-        DotProduct = APSInt(DotProduct.truncSSat(32), false);
-      } else {
-        DotProduct = APSInt(DotProduct.trunc(32), false);
-      }
-      Result.push_back(APValue(DotProduct));
-    }
-
-    return Success(APValue(Result.data(), Result.size()), E);
-  }
+    return EvalVectorDotProduct(true);
   }
 }
 

>From 8d94075965e3281ecfc0c5e1630fcebd66de8f4c Mon Sep 17 00:00:00 2001
From: Akash Deo <AkashDeo2025 at u.Northwestern.edu>
Date: Wed, 8 Apr 2026 19:19:02 -0500
Subject: [PATCH 08/11] add checks before casting in interpbuiltin.cpp

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index fceb7dc4cc2f4..851f3c359f136 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4443,10 +4443,20 @@ static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC,
 
 static bool interp__builtin_ia32_vpdp(InterpState &S, CodePtr OpPC,
                                       const CallExpr *Call, bool IsSaturating) {
-  const auto *SrcVecT = Call->getArg(0)->getType()->castAs<VectorType>();
-  const auto *OpAVecT = Call->getArg(1)->getType()->castAs<VectorType>();
-  const auto *OpBVecT = Call->getArg(2)->getType()->castAs<VectorType>();
-  const auto *DstVecT = Call->getType()->castAs<VectorType>();
+  assert(Call->getNumArgs() == 3);
+
+  QualType SrcT = Call->getArg(0)->getType();
+  QualType OpAT = Call->getArg(1)->getType();
+  QualType OpBT = Call->getArg(2)->getType();
+  QualType DstT = Call->getType();
+  if (!SrcT->isVectorType() || !OpAT->isVectorType() || !OpBT->isVectorType() ||
+      !DstT->isVectorType())
+    return false;
+
+  const auto *SrcVecT = SrcT->castAs<VectorType>();
+  const auto *OpAVecT = OpAT->castAs<VectorType>();
+  const auto *OpBVecT = OpBT->castAs<VectorType>();
+  const auto *DstVecT = DstT->castAs<VectorType>();
 
   assert(OpAVecT->getNumElements() == OpBVecT->getNumElements());
 

>From 19da6e391ddad2c473a10518808503f87a34d6e3 Mon Sep 17 00:00:00 2001
From: Akash Deo <AkashDeo2025 at u.Northwestern.edu>
Date: Wed, 27 May 2026 18:55:37 -0500
Subject: [PATCH 09/11] Quick update to tests

---
 .../test/CodeGen/X86/avx512vlvnni-builtins.c  | 301 +++++++-----------
 clang/test/CodeGen/X86/avx512vnni-builtins.c  | 153 +++------
 clang/test/CodeGen/X86/avxvnni-builtins.c     | 240 ++++++--------
 3 files changed, 257 insertions(+), 437 deletions(-)

diff --git a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c
index 8bdbdf1ca94a0..87568156486de 100644
--- a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c
@@ -21,9 +21,9 @@ TEST_CONSTEXPR(match_v8si(
   _mm256_mask_dpbusd_epi32(
     (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
     (__mmask8)0x55,
-    (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  104, 200, 304, 400, 504, 600, 704, 800));
+    (__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32},
+    (__m256i)(__v32qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32}),
+  110, 200, 342, 400, 574, 600, 806, 800));
 
 __m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_dpbusd_epi32
@@ -34,10 +34,10 @@ __m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __
 TEST_CONSTEXPR(match_v8si(
   _mm256_maskz_dpbusd_epi32(
     (__mmask8)0x0F,
-    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
-    (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  4, 4, 4, 4, 0, 0, 0, 0));
+    (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+    (__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32},
+    (__m256i)(__v32qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32}),
+  110, 226, 342, 458, 0, 0, 0, 0));
 
 __m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpbusd_epi32
@@ -46,27 +46,15 @@ __m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) {
 }
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpbusd_epi32(
-    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
-    ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  4, 4, 4, 4, 4, 4, 4, 4));
-TEST_CONSTEXPR(match_v8si(
-  _mm256_dpbusd_epi32(
-    ((__m256i)(__v8si){10, 10, 10, 10, 10, 10, 10, 10}),
-    ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  14, 14, 14, 14, 14, 14, 14, 14));
-TEST_CONSTEXPR(match_v8si(
-  _mm256_dpbusd_epi32(
-    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
-    ((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
-    ((__m256i)(__v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
-  -1020, -1020, -1020, -1020, -1020, -1020, -1020, -1020));
+    ((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
+    ((__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32}),
+    ((__m256i)(__v32qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32})),
+  110, 226, 342, 458, 574, 690, 806, 922));
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpbusd_epi32(
     ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m256i)(__v32qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
-    ((__m256i)(__v32qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0})),
+    ((__m256i)(__v32qu){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0}),
+    ((__m256i)(__v32qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
@@ -79,9 +67,9 @@ TEST_CONSTEXPR(match_v8si(
   _mm256_mask_dpbusds_epi32(
     (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
     (__mmask8)0xAA,
-    (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  100, 204, 300, 404, 500, 604, 700, 804));
+    (__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32},
+    (__m256i)(__v32qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32}),
+  100, 226, 300, 458, 500, 690, 700, 922));
 
 __m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_dpbusds_epi32
@@ -92,10 +80,10 @@ __m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, _
 TEST_CONSTEXPR(match_v8si(
   _mm256_maskz_dpbusds_epi32(
     (__mmask8)0xFF,
-    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
-    (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  4, 4, 4, 4, 4, 4, 4, 4));
+    (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+    (__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32},
+    (__m256i)(__v32qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32}),
+  110, 226, 342, 458, 574, 690, 806, 922));
 
 __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpbusds_epi32
@@ -104,22 +92,16 @@ __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
 }
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpbusds_epi32(
-    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
-    ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  4, 4, 4, 4, 4, 4, 4, 4));
+    ((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
+    ((__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32}),
+    ((__m256i)(__v32qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32})),
+  110, 226, 342, 458, 574, 690, 806, 922));
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpbusds_epi32(
     ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+    ((__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32}),
+    ((__m256i)(__v32qi){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1})),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
-TEST_CONSTEXPR(match_v8si(
-  _mm256_dpbusds_epi32(
-    ((__m256i)(__v8si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1}),
-    ((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
-    ((__m256i)(__v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
-  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m256i test_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_dpwssd_epi32
@@ -131,9 +113,9 @@ TEST_CONSTEXPR(match_v8si(
   _mm256_mask_dpwssd_epi32(
     (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
     (__mmask8)0xF0,
-    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  100, 200, 300, 400, 502, 602, 702, 802));
+    (__m256i)(__v16hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16},
+    (__m256i)(__v16hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16}),
+  100, 200, 300, 400, 519, 623, 727, 831));
 
 __m256i test_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_dpwssd_epi32
@@ -144,10 +126,10 @@ __m256i test_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __
 TEST_CONSTEXPR(match_v8si(
   _mm256_maskz_dpwssd_epi32(
     (__mmask8)0x0F,
-    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
-    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  2, 2, 2, 2, 0, 0, 0, 0));
+    (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+    (__m256i)(__v16hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16},
+    (__m256i)(__v16hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16}),
+  103, 207, 311, 415, 0, 0, 0, 0));
 
 __m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpwssd_epi32
@@ -156,33 +138,15 @@ __m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
 }
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpwssd_epi32(
-    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
-    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  2, 2, 2, 2, 2, 2, 2, 2));
-TEST_CONSTEXPR(match_v8si(
-  _mm256_dpwssd_epi32(
-    ((__m256i)(__v8si){10, 10, 10, 10, 10, 10, 10, 10}),
-    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  12, 12, 12, 12, 12, 12, 12, 12));
-TEST_CONSTEXPR(match_v8si(
-  _mm256_dpwssd_epi32(
-    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
-    ((__m256i)(__v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}),
-    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  -2, -2, -2, -2, -2, -2, -2, -2));
-TEST_CONSTEXPR(match_v8si(
-  _mm256_dpwssd_epi32(
-    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
-    ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
-    ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767})),
-  2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578));
+    ((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
+    ((__m256i)(__v16hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16}),
+    ((__m256i)(__v16hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16})),
+  103, 207, 311, 415, 519, 623, 727, 831));
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpwssd_epi32(
     ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0}),
-    ((__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0})),
+    ((__m256i)(__v16hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0}),
+    ((__m256i)(__v16hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m256i test_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
@@ -195,9 +159,9 @@ TEST_CONSTEXPR(match_v8si(
   _mm256_mask_dpwssds_epi32(
     (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
     (__mmask8)0xAA,
-    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  100, 202, 300, 402, 500, 602, 700, 802));
+    (__m256i)(__v16hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16},
+    (__m256i)(__v16hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16}),
+  100, 207, 300, 415, 500, 623, 700, 831));
 
 __m256i test_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_dpwssds_epi32
@@ -208,10 +172,10 @@ __m256i test_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, _
 TEST_CONSTEXPR(match_v8si(
   _mm256_maskz_dpwssds_epi32(
     (__mmask8)0xFF,
-    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
-    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  2, 2, 2, 2, 2, 2, 2, 2));
+    (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+    (__m256i)(__v16hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16},
+    (__m256i)(__v16hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16}),
+  103, 207, 311, 415, 519, 623, 727, 831));
 
 __m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpwssds_epi32
@@ -220,22 +184,16 @@ __m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
 }
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpwssds_epi32(
-    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
-    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  2, 2, 2, 2, 2, 2, 2, 2));
+    ((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
+    ((__m256i)(__v16hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16}),
+    ((__m256i)(__v16hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16})),
+  103, 207, 311, 415, 519, 623, 727, 831));
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpwssds_epi32(
     ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
-    ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767})),
+    ((__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767}),
+    ((__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767})),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
-TEST_CONSTEXPR(match_v8si(
-  _mm256_dpwssds_epi32(
-    ((__m256i)(__v8si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1}),
-    ((__m256i)(__v16hi){-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768}),
-    ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767})),
-  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_dpbusd_epi32
@@ -245,11 +203,11 @@ __m128i test_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_mask_dpbusd_epi32(
-    (__m128i)(__v4si){100, 200, 300, 400},
+    (__m128i)(__v4si){1000, 2000, 3000, 4000},
     (__mmask8)0x05,
-    (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  104, 200, 304, 400));
+    (__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16},
+    (__m128i)(__v16qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16}),
+  1010, 2000, 3042, 4000));
 
 __m128i test_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_dpbusd_epi32
@@ -260,10 +218,10 @@ __m128i test_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m12
 TEST_CONSTEXPR(match_v4si(
   _mm_maskz_dpbusd_epi32(
     (__mmask8)0x03,
-    (__m128i)(__v4si){0, 0, 0, 0},
-    (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  4, 4, 0, 0));
+    (__m128i)(__v4si){1000, 2000, 3000, 4000},
+    (__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16},
+    (__m128i)(__v16qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16}),
+  1010, 2026, 0, 0));
 
 __m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpbusd_epi32
@@ -272,27 +230,15 @@ __m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) {
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_dpbusd_epi32(
-    ((__m128i)(__v4si){0, 0, 0, 0}),
-    ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  4, 4, 4, 4));
-TEST_CONSTEXPR(match_v4si(
-  _mm_dpbusd_epi32(
-    ((__m128i)(__v4si){10, 10, 10, 10}),
-    ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  14, 14, 14, 14));
-TEST_CONSTEXPR(match_v4si(
-  _mm_dpbusd_epi32(
-    ((__m128i)(__v4si){0, 0, 0, 0}),
-    ((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
-    ((__m128i)(__v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
-  -1020, -1020, -1020, -1020));
+    ((__m128i)(__v4si){1000, 2000, 3000, 4000}),
+    ((__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16}),
+    ((__m128i)(__v16qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16})),
+  1010, 2026, 3042, 4058));
 TEST_CONSTEXPR(match_v4si(
   _mm_dpbusd_epi32(
     ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m128i)(__v16qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
-    ((__m128i)(__v16qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0})),
+    ((__m128i)(__v16qu){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0}),
+    ((__m128i)(__v16qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
@@ -303,11 +249,11 @@ __m128i test_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m12
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_mask_dpbusds_epi32(
-    (__m128i)(__v4si){100, 200, 300, 400},
+    (__m128i)(__v4si){1000, 2000, 3000, 4000},
     (__mmask8)0x0A,
-    (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  100, 204, 300, 404));
+    (__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16},
+    (__m128i)(__v16qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16}),
+  1000, 2026, 3000, 4058));
 
 __m128i test_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_dpbusds_epi32
@@ -318,10 +264,10 @@ __m128i test_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m1
 TEST_CONSTEXPR(match_v4si(
   _mm_maskz_dpbusds_epi32(
     (__mmask8)0x0F,
-    (__m128i)(__v4si){0, 0, 0, 0},
-    (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  4, 4, 4, 4));
+    (__m128i)(__v4si){1000, 2000, 3000, 4000},
+    (__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16},
+    (__m128i)(__v16qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16}),
+  1010, 2026, 3042, 4058));
 
 __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpbusds_epi32
@@ -330,22 +276,16 @@ __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_dpbusds_epi32(
-    ((__m128i)(__v4si){0, 0, 0, 0}),
-    ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  4, 4, 4, 4));
+    ((__m128i)(__v4si){1000, 2000, 3000, 4000}),
+    ((__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16}),
+    ((__m128i)(__v16qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16})),
+  1010, 2026, 3042, 4058));
 TEST_CONSTEXPR(match_v4si(
   _mm_dpbusds_epi32(
     ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+    ((__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16}),
+    ((__m128i)(__v16qi){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1})),
   2147483647, 2147483647, 2147483647, 2147483647));
-TEST_CONSTEXPR(match_v4si(
-  _mm_dpbusds_epi32(
-    ((__m128i)(__v4si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1}),
-    ((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
-    ((__m128i)(__v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
-  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_dpwssd_epi32
@@ -355,11 +295,11 @@ __m128i test_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_mask_dpwssd_epi32(
-    (__m128i)(__v4si){100, 200, 300, 400},
+    (__m128i)(__v4si){1000, 2000, 3000, 4000},
     (__mmask8)0x05,
-    (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
-    (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
-  102, 200, 302, 400));
+    (__m128i)(__v8hi){1,2, 3,4, 5,6, 7,8},
+    (__m128i)(__v8hi){-1,2, -3,4, -5,6, -7,8}),
+  1003, 2000, 3011, 4000));
 
 __m128i test_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_dpwssd_epi32
@@ -370,10 +310,10 @@ __m128i test_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m12
 TEST_CONSTEXPR(match_v4si(
   _mm_maskz_dpwssd_epi32(
     (__mmask8)0x03,
-    (__m128i)(__v4si){0, 0, 0, 0},
-    (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
-    (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
-  2, 2, 0, 0));
+    (__m128i)(__v4si){1000, 2000, 3000, 4000},
+    (__m128i)(__v8hi){1,2, 3,4, 5,6, 7,8},
+    (__m128i)(__v8hi){-1,2, -3,4, -5,6, -7,8}),
+  1003, 2007, 0, 0));
 
 __m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpwssd_epi32
@@ -382,33 +322,15 @@ __m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_dpwssd_epi32(
-    ((__m128i)(__v4si){0, 0, 0, 0}),
-    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
-    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
-  2, 2, 2, 2));
-TEST_CONSTEXPR(match_v4si(
-  _mm_dpwssd_epi32(
-    ((__m128i)(__v4si){10, 10, 10, 10}),
-    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
-    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
-  12, 12, 12, 12));
-TEST_CONSTEXPR(match_v4si(
-  _mm_dpwssd_epi32(
-    ((__m128i)(__v4si){0, 0, 0, 0}),
-    ((__m128i)(__v8hi){-1,-1,-1,-1,-1,-1,-1,-1}),
-    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
-  -2, -2, -2, -2));
-TEST_CONSTEXPR(match_v4si(
-  _mm_dpwssd_epi32(
-    ((__m128i)(__v4si){0, 0, 0, 0}),
-    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
-    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767})),
-  2147352578, 2147352578, 2147352578, 2147352578));
+    ((__m128i)(__v4si){1000, 2000, 3000, 4000}),
+    ((__m128i)(__v8hi){1,2, 3,4, 5,6, 7,8}),
+    ((__m128i)(__v8hi){-1,2, -3,4, -5,6, -7,8})),
+  1003, 2007, 3011, 4015));
 TEST_CONSTEXPR(match_v4si(
   _mm_dpwssd_epi32(
     ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m128i)(__v8hi){1,0,1,0,1,0,1,0}),
-    ((__m128i)(__v8hi){1,0,1,0,1,0,1,0})),
+    ((__m128i)(__v8hi){1,0, 1,0, 1,0, 1,0}),
+    ((__m128i)(__v8hi){1,0, 1,0, 1,0, 1,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
@@ -419,11 +341,11 @@ __m128i test_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m12
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_mask_dpwssds_epi32(
-    (__m128i)(__v4si){100, 200, 300, 400},
+    (__m128i)(__v4si){1000, 2000, 3000, 4000},
     (__mmask8)0x0A,
-    (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
-    (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
-  100, 202, 300, 402));
+    (__m128i)(__v8hi){1,2, 3,4, 5,6, 7,8},
+    (__m128i)(__v8hi){-1,2, -3,4, -5,6, -7,8}),
+  1000, 2007, 3000, 4015));
 
 __m128i test_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_dpwssds_epi32
@@ -434,10 +356,10 @@ __m128i test_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m1
 TEST_CONSTEXPR(match_v4si(
   _mm_maskz_dpwssds_epi32(
     (__mmask8)0x0F,
-    (__m128i)(__v4si){0, 0, 0, 0},
-    (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
-    (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
-  2, 2, 2, 2));
+    (__m128i)(__v4si){1000, 2000, 3000, 4000},
+    (__m128i)(__v8hi){1,2, 3,4, 5,6, 7,8},
+    (__m128i)(__v8hi){-1,2, -3,4, -5,6, -7,8}),
+  1003, 2007, 3011, 4015));
 
 __m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpwssds_epi32
@@ -446,20 +368,13 @@ __m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_dpwssds_epi32(
-    ((__m128i)(__v4si){0, 0, 0, 0}),
-    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
-    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
-  2, 2, 2, 2));
+    ((__m128i)(__v4si){1000, 2000, 3000, 4000}),
+    ((__m128i)(__v8hi){1,2, 3,4, 5,6, 7,8}),
+    ((__m128i)(__v8hi){-1,2, -3,4, -5,6, -7,8})),
+  1003, 2007, 3011, 4015));
 TEST_CONSTEXPR(match_v4si(
   _mm_dpwssds_epi32(
     ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
-    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767})),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, 32767,32767, 32767,32767}),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, 32767,32767, 32767,32767})),
   2147483647, 2147483647, 2147483647, 2147483647));
-TEST_CONSTEXPR(match_v4si(
-  _mm_dpwssds_epi32(
-    ((__m128i)(__v4si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1}),
-    ((__m128i)(__v8hi){-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768}),
-    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767})),
-  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
-
diff --git a/clang/test/CodeGen/X86/avx512vnni-builtins.c b/clang/test/CodeGen/X86/avx512vnni-builtins.c
index f8f663b48aa36..8a38cf28e360c 100644
--- a/clang/test/CodeGen/X86/avx512vnni-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vnni-builtins.c
@@ -21,9 +21,9 @@ TEST_CONSTEXPR(match_v16si(
   _mm512_mask_dpbusd_epi32(
     (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
     (__mmask16)0x5555,
-    (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  104, 200, 304, 400, 504, 600, 704, 800, 904, 1000, 1104, 1200, 1304, 1400, 1504, 1600));
+    (__m512i)(__v64qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,52, 53,54,55,56, 57,58,59,60, 61,62,63,64},
+    (__m512i)(__v64qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32, -33,34,-35,36, -37,38,-39,40, -41,42,-43,44, -45,46,-47,48, -49,50,-51,52, -53,54,-55,56, -57,58,-59,60, -61,62,-63,64}),
+  110, 200, 342, 400, 574, 600, 806, 800, 1038, 1000, 1270, 1200, 1502, 1400, 1734, 1600));
 
 __m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_dpbusd_epi32
@@ -34,44 +34,28 @@ __m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, _
 TEST_CONSTEXPR(match_v16si(
   _mm512_maskz_dpbusd_epi32(
     (__mmask16)0x00FF,
-    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0));
+    (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+    (__m512i)(__v64qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,52, 53,54,55,56, 57,58,59,60, 61,62,63,64},
+    (__m512i)(__v64qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32, -33,34,-35,36, -37,38,-39,40, -41,42,-43,44, -45,46,-47,48, -49,50,-51,52, -53,54,-55,56, -57,58,-59,60, -61,62,-63,64}),
+  110, 226, 342, 458, 574, 690, 806, 922, 0, 0, 0, 0, 0, 0, 0, 0));
 
 __m512i test_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_dpbusd_epi32
   // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}})
   return _mm512_dpbusd_epi32(__S, __A, __B);
 }
-
-TEST_CONSTEXPR(match_v16si(_mm512_dpbusd_epi32((__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-  (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-  (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4));
-
-TEST_CONSTEXPR(match_v16si(_mm512_dpbusd_epi32((__m512i)(__v16si){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-  (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-  (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5));
-
-TEST_CONSTEXPR(match_v16si(_mm512_dpbusd_epi32((__m512i)(__v16si){2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-  (__m512i)(__v64qu){1,2,3,4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-  (__m512i)(__v64qi){5,6,7,8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  72, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5));
-
-TEST_CONSTEXPR(match_v16si(_mm512_dpbusd_epi32((__m512i)(__v16si){2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-  (__m512i)(__v64qu){1,2,3,4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-  (__m512i)(__v64qi){5,6,7,8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  72, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5));
-
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpbusd_epi32(
+    (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+    (__m512i)(__v64qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,52, 53,54,55,56, 57,58,59,60, 61,62,63,64},
+    (__m512i)(__v64qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32, -33,34,-35,36, -37,38,-39,40, -41,42,-43,44, -45,46,-47,48, -49,50,-51,52, -53,54,-55,56, -57,58,-59,60, -61,62,-63,64}),
+  110, 226, 342, 458, 574, 690, 806, 922, 1038, 1154, 1270, 1386, 1502, 1618, 1734, 1850));
 TEST_CONSTEXPR(match_v16si(
   _mm512_dpbusd_epi32(
     (__m512i)(__v16si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
     (__m512i)(__v64qu){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0},
     (__m512i)(__v64qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0}),
-  -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648, 
-   -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648,  -2147483648));
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m512i test_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_dpbusds_epi32
@@ -83,9 +67,9 @@ TEST_CONSTEXPR(match_v16si(
   _mm512_mask_dpbusds_epi32(
     (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
     (__mmask16)0x5555,
-    (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  104, 200, 304, 400, 504, 600, 704, 800, 904, 1000, 1104, 1200, 1304, 1400, 1504, 1600));
+    (__m512i)(__v64qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,52, 53,54,55,56, 57,58,59,60, 61,62,63,64},
+    (__m512i)(__v64qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32, -33,34,-35,36, -37,38,-39,40, -41,42,-43,44, -45,46,-47,48, -49,50,-51,52, -53,54,-55,56, -57,58,-59,60, -61,62,-63,64}),
+  110, 200, 342, 400, 574, 600, 806, 800, 1038, 1000, 1270, 1200, 1502, 1400, 1734, 1600));
 
 __m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_dpbusds_epi32
@@ -96,10 +80,10 @@ __m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A,
 TEST_CONSTEXPR(match_v16si(
   _mm512_maskz_dpbusds_epi32(
     (__mmask16)0x00FF,
-    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0));
+    (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+    (__m512i)(__v64qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,52, 53,54,55,56, 57,58,59,60, 61,62,63,64},
+    (__m512i)(__v64qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32, -33,34,-35,36, -37,38,-39,40, -41,42,-43,44, -45,46,-47,48, -49,50,-51,52, -53,54,-55,56, -57,58,-59,60, -61,62,-63,64}),
+  110, 226, 342, 458, 574, 690, 806, 922, 0, 0, 0, 0, 0, 0, 0, 0));
 
 __m512i test_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_dpbusds_epi32
@@ -108,22 +92,16 @@ __m512i test_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) {
 }
 TEST_CONSTEXPR(match_v16si(
   _mm512_dpbusds_epi32(
-    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4));
+    (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+    (__m512i)(__v64qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,52, 53,54,55,56, 57,58,59,60, 61,62,63,64},
+    (__m512i)(__v64qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32, -33,34,-35,36, -37,38,-39,40, -41,42,-43,44, -45,46,-47,48, -49,50,-51,52, -53,54,-55,56, -57,58,-59,60, -61,62,-63,64}),
+  110, 226, 342, 458, 574, 690, 806, 922, 1038, 1154, 1270, 1386, 1502, 1618, 1734, 1850));
 TEST_CONSTEXPR(match_v16si(
   _mm512_dpbusds_epi32(
     (__m512i)(__v16si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
-    (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    (__m512i)(__v64qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,52, 53,54,55,56, 57,58,59,60, 61,62,63,64},
+    (__m512i)(__v64qi){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1}),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
-TEST_CONSTEXPR(match_v16si(
-  _mm512_dpbusds_epi32(
-    (__m512i)(__v16si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1},
-    (__m512i)(__v64qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255},
-    (__m512i)(__v64qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}),
-  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m512i test_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_dpwssd_epi32
@@ -135,9 +113,9 @@ TEST_CONSTEXPR(match_v16si(
   _mm512_mask_dpwssd_epi32(
     (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
     (__mmask16)0xFF00,
-    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  100, 200, 300, 400, 500, 600, 700, 800, 902, 1002, 1102, 1202, 1302, 1402, 1502, 1602));
+    (__m512i)(__v32hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16, 17,18, 19,20, 21,22, 23,24, 25,26, 27,28, 29,30, 31,32},
+    (__m512i)(__v32hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16, -17,18, -19,20, -21,22, -23,24, -25,26, -27,28, -29,30, -31,32}),
+  100, 200, 300, 400, 500, 600, 700, 800, 935, 1039, 1143, 1247, 1351, 1455, 1559, 1663));
 
 __m512i test_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_dpwssd_epi32
@@ -148,10 +126,10 @@ __m512i test_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, _
 TEST_CONSTEXPR(match_v16si(
   _mm512_maskz_dpwssd_epi32(
     (__mmask16)0x000F,
-    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+    (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+    (__m512i)(__v32hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16, 17,18, 19,20, 21,22, 23,24, 25,26, 27,28, 29,30, 31,32},
+    (__m512i)(__v32hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16, -17,18, -19,20, -21,22, -23,24, -25,26, -27,28, -29,30, -31,32}),
+  103, 207, 311, 415, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
 
 __m512i test_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_dpwssd_epi32
@@ -160,33 +138,15 @@ __m512i test_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) {
 }
 TEST_CONSTEXPR(match_v16si(
   _mm512_dpwssd_epi32(
-    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2));
-TEST_CONSTEXPR(match_v16si(
-  _mm512_dpwssd_epi32(
-    (__m512i)(__v16si){10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10},
-    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12));
-TEST_CONSTEXPR(match_v16si(
-  _mm512_dpwssd_epi32(
-    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    (__m512i)(__v32hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
-    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2));
-TEST_CONSTEXPR(match_v16si(
-  _mm512_dpwssd_epi32(
-    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    (__m512i)(__v32hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767},
-    (__m512i)(__v32hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
-  2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578));
+    (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+    (__m512i)(__v32hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16, 17,18, 19,20, 21,22, 23,24, 25,26, 27,28, 29,30, 31,32},
+    (__m512i)(__v32hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16, -17,18, -19,20, -21,22, -23,24, -25,26, -27,28, -29,30, -31,32}),
+  103, 207, 311, 415, 519, 623, 727, 831, 935, 1039, 1143, 1247, 1351, 1455, 1559, 1663));
 TEST_CONSTEXPR(match_v16si(
   _mm512_dpwssd_epi32(
     (__m512i)(__v16si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
-    (__m512i)(__v32hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0},
-    (__m512i)(__v32hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0}),
+    (__m512i)(__v32hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0},
+    (__m512i)(__v32hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0}),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m512i test_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
@@ -199,9 +159,9 @@ TEST_CONSTEXPR(match_v16si(
   _mm512_mask_dpwssds_epi32(
     (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
     (__mmask16)0xAAAA,
-    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  100, 202, 300, 402, 500, 602, 700, 802, 900, 1002, 1100, 1202, 1300, 1402, 1500, 1602));
+    (__m512i)(__v32hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16, 17,18, 19,20, 21,22, 23,24, 25,26, 27,28, 29,30, 31,32},
+    (__m512i)(__v32hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16, -17,18, -19,20, -21,22, -23,24, -25,26, -27,28, -29,30, -31,32}),
+  100, 207, 300, 415, 500, 623, 700, 831, 900, 1039, 1100, 1247, 1300, 1455, 1500, 1663));
 
 __m512i test_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_dpwssds_epi32
@@ -212,10 +172,10 @@ __m512i test_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A,
 TEST_CONSTEXPR(match_v16si(
   _mm512_maskz_dpwssds_epi32(
     (__mmask16)0xFFFF,
-    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2));
+    (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+    (__m512i)(__v32hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16, 17,18, 19,20, 21,22, 23,24, 25,26, 27,28, 29,30, 31,32},
+    (__m512i)(__v32hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16, -17,18, -19,20, -21,22, -23,24, -25,26, -27,28, -29,30, -31,32}),
+  103, 207, 311, 415, 519, 623, 727, 831, 935, 1039, 1143, 1247, 1351, 1455, 1559, 1663));
 
 __m512i test_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_dpwssds_epi32
@@ -224,20 +184,13 @@ __m512i test_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) {
 }
 TEST_CONSTEXPR(match_v16si(
   _mm512_dpwssds_epi32(
-    (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2));
+    (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+    (__m512i)(__v32hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16, 17,18, 19,20, 21,22, 23,24, 25,26, 27,28, 29,30, 31,32},
+    (__m512i)(__v32hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16, -17,18, -19,20, -21,22, -23,24, -25,26, -27,28, -29,30, -31,32}),
+  103, 207, 311, 415, 519, 623, 727, 831, 935, 1039, 1143, 1247, 1351, 1455, 1559, 1663));
 TEST_CONSTEXPR(match_v16si(
   _mm512_dpwssds_epi32(
     (__m512i)(__v16si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
-    (__m512i)(__v32hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767},
-    (__m512i)(__v32hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+    (__m512i)(__v32hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767},
+    (__m512i)(__v32hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767}),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
-TEST_CONSTEXPR(match_v16si(
-  _mm512_dpwssds_epi32(
-    (__m512i)(__v16si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1},
-    (__m512i)(__v32hi){-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768},
-    (__m512i)(__v32hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
-  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
-
diff --git a/clang/test/CodeGen/X86/avxvnni-builtins.c b/clang/test/CodeGen/X86/avxvnni-builtins.c
index 7bf4d563f7ba2..d1a4c490d0a66 100644
--- a/clang/test/CodeGen/X86/avxvnni-builtins.c
+++ b/clang/test/CodeGen/X86/avxvnni-builtins.c
@@ -18,21 +18,15 @@ __m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) {
 }
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpbusd_epi32(
-    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
-    ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  4, 4, 4, 4, 4, 4, 4, 4));
-TEST_CONSTEXPR(match_v8si(
-  _mm256_dpbusd_epi32(
-    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
-    ((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
-    ((__m256i)(__v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
-  -1020, -1020, -1020, -1020, -1020, -1020, -1020, -1020));
+    ((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
+    ((__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32}),
+    ((__m256i)(__v32qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32})),
+  110, 226, 342, 458, 574, 690, 806, 922));
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpbusd_epi32(
     ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m256i)(__v32qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
-    ((__m256i)(__v32qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0})),
+    ((__m256i)(__v32qu){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0}),
+    ((__m256i)(__v32qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
@@ -42,15 +36,15 @@ __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
 }
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpbusds_epi32(
-    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
-    ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  4, 4, 4, 4, 4, 4, 4, 4));
+    ((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
+    ((__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32}),
+    ((__m256i)(__v32qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32})),
+  110, 226, 342, 458, 574, 690, 806, 922));
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpbusds_epi32(
     ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+    ((__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32}),
+    ((__m256i)(__v32qi){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1})),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
 
 __m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
@@ -60,21 +54,15 @@ __m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
 }
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpwssd_epi32(
-    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
-    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  2, 2, 2, 2, 2, 2, 2, 2));
-TEST_CONSTEXPR(match_v8si(
-  _mm256_dpwssd_epi32(
-    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
-    ((__m256i)(__v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}),
-    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  -2, -2, -2, -2, -2, -2, -2, -2));
+    ((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
+    ((__m256i)(__v16hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16}),
+    ((__m256i)(__v16hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16})),
+  103, 207, 311, 415, 519, 623, 727, 831));
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpwssd_epi32(
     ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0}),
-    ((__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0})),
+    ((__m256i)(__v16hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0}),
+    ((__m256i)(__v16hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
@@ -84,15 +72,15 @@ __m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
 }
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpwssds_epi32(
-    ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
-    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  2, 2, 2, 2, 2, 2, 2, 2));
+    ((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
+    ((__m256i)(__v16hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16}),
+    ((__m256i)(__v16hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16})),
+  103, 207, 311, 415, 519, 623, 727, 831));
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpwssds_epi32(
     ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
-    ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767})),
+    ((__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767}),
+    ((__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767})),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
 
 __m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) {
@@ -102,21 +90,15 @@ __m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) {
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_dpbusd_epi32(
-    ((__m128i)(__v4si){0, 0, 0, 0}),
-    ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  4, 4, 4, 4));
-TEST_CONSTEXPR(match_v4si(
-  _mm_dpbusd_epi32(
-    ((__m128i)(__v4si){0, 0, 0, 0}),
-    ((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
-    ((__m128i)(__v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
-  -1020, -1020, -1020, -1020));
+    ((__m128i)(__v4si){1000, 2000, 3000, 4000}),
+    ((__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16}),
+    ((__m128i)(__v16qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16})),
+  1010, 2026, 3042, 4058));
 TEST_CONSTEXPR(match_v4si(
   _mm_dpbusd_epi32(
     ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m128i)(__v16qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
-    ((__m128i)(__v16qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0})),
+    ((__m128i)(__v16qu){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0}),
+    ((__m128i)(__v16qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
@@ -126,15 +108,15 @@ __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_dpbusds_epi32(
-    ((__m128i)(__v4si){0, 0, 0, 0}),
-    ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
-  4, 4, 4, 4));
+    ((__m128i)(__v4si){1000, 2000, 3000, 4000}),
+    ((__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16}),
+    ((__m128i)(__v16qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16})),
+  1010, 2026, 3042, 4058));
 TEST_CONSTEXPR(match_v4si(
   _mm_dpbusds_epi32(
     ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-    ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+    ((__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16}),
+    ((__m128i)(__v16qi){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1})),
   2147483647, 2147483647, 2147483647, 2147483647));
 
 __m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
@@ -144,21 +126,15 @@ __m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_dpwssd_epi32(
-    ((__m128i)(__v4si){0, 0, 0, 0}),
-    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
-    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
-  2, 2, 2, 2));
-TEST_CONSTEXPR(match_v4si(
-  _mm_dpwssd_epi32(
-    ((__m128i)(__v4si){0, 0, 0, 0}),
-    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
-    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767})),
-  2147352578, 2147352578, 2147352578, 2147352578));
+    ((__m128i)(__v4si){1000, 2000, 3000, 4000}),
+    ((__m128i)(__v8hi){1,2, 3,4, 5,6, 7,8}),
+    ((__m128i)(__v8hi){-1,2, -3,4, -5,6, -7,8})),
+  1003, 2007, 3011, 4015));
 TEST_CONSTEXPR(match_v4si(
   _mm_dpwssd_epi32(
     ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m128i)(__v8hi){1,0,1,0,1,0,1,0}),
-    ((__m128i)(__v8hi){1,0,1,0,1,0,1,0})),
+    ((__m128i)(__v8hi){1,0, 1,0, 1,0, 1,0}),
+    ((__m128i)(__v8hi){1,0, 1,0, 1,0, 1,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
@@ -168,15 +144,15 @@ __m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_dpwssds_epi32(
-    ((__m128i)(__v4si){0, 0, 0, 0}),
-    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
-    ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
-  2, 2, 2, 2));
+    ((__m128i)(__v4si){1000, 2000, 3000, 4000}),
+    ((__m128i)(__v8hi){1,2, 3,4, 5,6, 7,8}),
+    ((__m128i)(__v8hi){-1,2, -3,4, -5,6, -7,8})),
+  1003, 2007, 3011, 4015));
 TEST_CONSTEXPR(match_v4si(
   _mm_dpwssds_epi32(
     ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
-    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
-    ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767})),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, 32767,32767, 32767,32767}),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, 32767,32767, 32767,32767})),
   2147483647, 2147483647, 2147483647, 2147483647));
 
 __m256i test_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
@@ -186,21 +162,15 @@ __m256i test_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
 }
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpbusd_avx_epi32(
-    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
-    (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  4, 4, 4, 4, 4, 4, 4, 4));
-TEST_CONSTEXPR(match_v8si(
-  _mm256_dpbusd_avx_epi32(
-    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
-    (__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255},
-    (__m256i)(__v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}),
-  -1020, -1020, -1020, -1020, -1020, -1020, -1020, -1020));
+    (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+    (__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32},
+    (__m256i)(__v32qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32}),
+  110, 226, 342, 458, 574, 690, 806, 922));
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpbusd_avx_epi32(
     (__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
-    (__m256i)(__v32qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0},
-    (__m256i)(__v32qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
+    (__m256i)(__v32qu){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0},
+    (__m256i)(__v32qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0}),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m256i test_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
@@ -210,15 +180,15 @@ __m256i test_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
 }
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpbusds_avx_epi32(
-    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
-    (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  4, 4, 4, 4, 4, 4, 4, 4));
+    (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+    (__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32},
+    (__m256i)(__v32qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32}),
+  110, 226, 342, 458, 574, 690, 806, 922));
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpbusds_avx_epi32(
     (__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
-    (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    (__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32},
+    (__m256i)(__v32qi){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1}),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
 
 __m256i test_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
@@ -228,21 +198,15 @@ __m256i test_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
 }
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpwssd_avx_epi32(
-    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
-    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  2, 2, 2, 2, 2, 2, 2, 2));
-TEST_CONSTEXPR(match_v8si(
-  _mm256_dpwssd_avx_epi32(
-    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
-    (__m256i)(__v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
-    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  -2, -2, -2, -2, -2, -2, -2, -2));
+    (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+    (__m256i)(__v16hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16},
+    (__m256i)(__v16hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16}),
+  103, 207, 311, 415, 519, 623, 727, 831));
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpwssd_avx_epi32(
     (__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
-    (__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0},
-    (__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0}),
+    (__m256i)(__v16hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0},
+    (__m256i)(__v16hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0}),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m256i test_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
@@ -252,15 +216,15 @@ __m256i test_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
 }
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpwssds_avx_epi32(
-    (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
-    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  2, 2, 2, 2, 2, 2, 2, 2));
+    (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+    (__m256i)(__v16hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16},
+    (__m256i)(__v16hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16}),
+  103, 207, 311, 415, 519, 623, 727, 831));
 TEST_CONSTEXPR(match_v8si(
   _mm256_dpwssds_avx_epi32(
     (__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
-    (__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767},
-    (__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+    (__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767},
+    (__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767}),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
 
 __m128i test_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
@@ -270,21 +234,15 @@ __m128i test_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_dpbusd_avx_epi32(
-    (__m128i)(__v4si){0, 0, 0, 0},
-    (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  4, 4, 4, 4));
-TEST_CONSTEXPR(match_v4si(
-  _mm_dpbusd_avx_epi32(
-    (__m128i)(__v4si){0, 0, 0, 0},
-    (__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255},
-    (__m128i)(__v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}),
-  -1020, -1020, -1020, -1020));
+    (__m128i)(__v4si){1000, 2000, 3000, 4000},
+    (__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16},
+    (__m128i)(__v16qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16}),
+  1010, 2026, 3042, 4058));
 TEST_CONSTEXPR(match_v4si(
   _mm_dpbusd_avx_epi32(
     (__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647},
-    (__m128i)(__v16qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0},
-    (__m128i)(__v16qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
+    (__m128i)(__v16qu){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0},
+    (__m128i)(__v16qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0}),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
@@ -294,15 +252,15 @@ __m128i test_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_dpbusds_avx_epi32(
-    (__m128i)(__v4si){0, 0, 0, 0},
-    (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
-  4, 4, 4, 4));
+    (__m128i)(__v4si){1000, 2000, 3000, 4000},
+    (__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16},
+    (__m128i)(__v16qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16}),
+  1010, 2026, 3042, 4058));
 TEST_CONSTEXPR(match_v4si(
   _mm_dpbusds_avx_epi32(
     (__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647},
-    (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
-    (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+    (__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16},
+    (__m128i)(__v16qi){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1}),
   2147483647, 2147483647, 2147483647, 2147483647));
 
 __m128i test_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
@@ -312,21 +270,15 @@ __m128i test_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_dpwssd_avx_epi32(
-    (__m128i)(__v4si){0, 0, 0, 0},
-    (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
-    (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
-  2, 2, 2, 2));
-TEST_CONSTEXPR(match_v4si(
-  _mm_dpwssd_avx_epi32(
-    (__m128i)(__v4si){0, 0, 0, 0},
-    (__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767},
-    (__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
-  2147352578, 2147352578, 2147352578, 2147352578));
+    (__m128i)(__v4si){1000, 2000, 3000, 4000},
+    (__m128i)(__v8hi){1,2, 3,4, 5,6, 7,8},
+    (__m128i)(__v8hi){-1,2, -3,4, -5,6, -7,8}),
+  1003, 2007, 3011, 4015));
 TEST_CONSTEXPR(match_v4si(
   _mm_dpwssd_avx_epi32(
     (__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647},
-    (__m128i)(__v8hi){1,0,1,0,1,0,1,0},
-    (__m128i)(__v8hi){1,0,1,0,1,0,1,0}),
+    (__m128i)(__v8hi){1,0, 1,0, 1,0, 1,0},
+    (__m128i)(__v8hi){1,0, 1,0, 1,0, 1,0}),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
 __m128i test_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
@@ -336,13 +288,13 @@ __m128i test_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_dpwssds_avx_epi32(
-    (__m128i)(__v4si){0, 0, 0, 0},
-    (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
-    (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
-  2, 2, 2, 2));
+    (__m128i)(__v4si){1000, 2000, 3000, 4000},
+    (__m128i)(__v8hi){1,2, 3,4, 5,6, 7,8},
+    (__m128i)(__v8hi){-1,2, -3,4, -5,6, -7,8}),
+  1003, 2007, 3011, 4015));
 TEST_CONSTEXPR(match_v4si(
   _mm_dpwssds_avx_epi32(
     (__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647},
-    (__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767},
-    (__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
+    (__m128i)(__v8hi){32767,32767, 32767,32767, 32767,32767, 32767,32767},
+    (__m128i)(__v8hi){32767,32767, 32767,32767, 32767,32767, 32767,32767}),
   2147483647, 2147483647, 2147483647, 2147483647));

>From 03db3a3568c594ef8de1a4d02915ab5aa624c78d Mon Sep 17 00:00:00 2001
From: Akash Deo <AkashDeo2025 at u.Northwestern.edu>
Date: Thu, 18 Jun 2026 09:09:32 -0500
Subject: [PATCH 10/11] Change EvaluateAsRValue to EvaluateVector

---
 clang/lib/AST/ExprConstant.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index a911c775f0c74..b1506667f4e31 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12409,9 +12409,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
 
   auto EvalVectorDotProduct = [&](bool IsSaturating) -> bool {
     APValue Source, OperandA, OperandB;
-    if (!EvaluateAsRValue(Info, E->getArg(0), Source) ||
-        !EvaluateAsRValue(Info, E->getArg(1), OperandA) ||
-        !EvaluateAsRValue(Info, E->getArg(2), OperandB)) {
+    if (!EvaluateVector(E->getArg(0), Source, Info) ||
+        !EvaluateVector(E->getArg(1), OperandA, Info) ||
+        !EvaluateVector(E->getArg(2), OperandB, Info)) {
       return false;
     }
 

>From 6729a08c539c3be10332fde76968ffd056d615cd Mon Sep 17 00:00:00 2001
From: Akash Deo <AkashDeo2025 at u.Northwestern.edu>
Date: Thu, 18 Jun 2026 09:49:41 -0500
Subject: [PATCH 11/11] Add min max tests

---
 .../test/CodeGen/X86/avx512vlvnni-builtins.c  |  73 +++++++++++
 clang/test/CodeGen/X86/avx512vnni-builtins.c  |  24 ++++
 clang/test/CodeGen/X86/avxvnni-builtins.c     | 120 ++++++++++++++++++
 3 files changed, 217 insertions(+)

diff --git a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c
index 87568156486de..71452d6078a43 100644
--- a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c
@@ -57,6 +57,13 @@ TEST_CONSTEXPR(match_v8si(
     ((__m256i)(__v32qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
 
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusd_epi32(
+    ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m256i)(__v32qu){255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0, 255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0}),
+    ((__m256i)(__v32qi){-128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127, 127,127,127,127, 127,127,127,127, -128,-128,-128,-128, -128,-128,-128,-128})),
+  2147353087, 2147353088, 2147483647, -2147483647-1, -2147354109, -2147354108, 2147483647, -2147483647-1));
+
 __m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_dpbusds_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
@@ -102,6 +109,12 @@ TEST_CONSTEXPR(match_v8si(
     ((__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32}),
     ((__m256i)(__v32qi){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1})),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusds_epi32(
+    ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m256i)(__v32qu){255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0, 255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0}),
+    ((__m256i)(__v32qi){-128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127, 127,127,127,127, 127,127,127,127, -128,-128,-128,-128, -128,-128,-128,-128})),
+  2147353087, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147354108, 2147483647, -2147483647-1));
 
 __m256i test_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_dpwssd_epi32
@@ -148,6 +161,12 @@ TEST_CONSTEXPR(match_v8si(
     ((__m256i)(__v16hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0}),
     ((__m256i)(__v16hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssd_epi32(
+    ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m256i)(__v16hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+    ((__m256i)(__v16hi){-32768,-32768, -32768,-32768, 32767,32767, 32767,32767, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768})),
+  65535, 65536, 65535, 65536, -131071, -131070, -1, 0));
 
 __m256i test_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_dpwssds_epi32
@@ -194,6 +213,12 @@ TEST_CONSTEXPR(match_v8si(
     ((__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767}),
     ((__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767})),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssds_epi32(
+    ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m256i)(__v16hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+    ((__m256i)(__v16hi){-32768,-32768, -32768,-32768, 32767,32767, 32767,32767, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768})),
+  65535, -2147483647-1, 65535, -2147483647-1, 2147483647, -131070, 2147483647, 0));
 
 __m128i test_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_dpbusd_epi32
@@ -240,6 +265,18 @@ TEST_CONSTEXPR(match_v4si(
     ((__m128i)(__v16qu){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0}),
     ((__m128i)(__v16qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusd_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v16qu){255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0}),
+    ((__m128i)(__v16qi){-128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127})),
+  2147353087, 2147353088, 2147483647, -2147483647-1));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusd_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v16qu){255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0}),
+    ((__m128i)(__v16qi){127,127,127,127, 127,127,127,127, -128,-128,-128,-128, -128,-128,-128,-128})),
+  -2147354109, -2147354108, 2147483647, -2147483647-1));
 
 __m128i test_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_dpbusds_epi32
@@ -286,6 +323,18 @@ TEST_CONSTEXPR(match_v4si(
     ((__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16}),
     ((__m128i)(__v16qi){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1})),
   2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusds_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v16qu){255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0}),
+    ((__m128i)(__v16qi){-128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127})),
+  2147353087, -2147483647-1, 2147483647, -2147483647-1));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusds_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v16qu){255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0}),
+    ((__m128i)(__v16qi){127,127,127,127, 127,127,127,127, -128,-128,-128,-128, -128,-128,-128,-128})),
+  2147483647, -2147354108, 2147483647, -2147483647-1));
 
 __m128i test_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_dpwssd_epi32
@@ -332,6 +381,18 @@ TEST_CONSTEXPR(match_v4si(
     ((__m128i)(__v8hi){1,0, 1,0, 1,0, 1,0}),
     ((__m128i)(__v8hi){1,0, 1,0, 1,0, 1,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+    ((__m128i)(__v8hi){-32768,-32768, -32768,-32768, 32767,32767, 32767,32767})),
+  65535, 65536, 65535, 65536));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768})),
+  -131071, -131070, -1, 0));
 
 __m128i test_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_dpwssds_epi32
@@ -378,3 +439,15 @@ TEST_CONSTEXPR(match_v4si(
     ((__m128i)(__v8hi){32767,32767, 32767,32767, 32767,32767, 32767,32767}),
     ((__m128i)(__v8hi){32767,32767, 32767,32767, 32767,32767, 32767,32767})),
   2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssds_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+    ((__m128i)(__v8hi){-32768,-32768, -32768,-32768, 32767,32767, 32767,32767})),
+  65535, -2147483647-1, 65535, -2147483647-1));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssds_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768})),
+  2147483647, -131070, 2147483647, 0));
diff --git a/clang/test/CodeGen/X86/avx512vnni-builtins.c b/clang/test/CodeGen/X86/avx512vnni-builtins.c
index 8a38cf28e360c..b0c0c9828e7b3 100644
--- a/clang/test/CodeGen/X86/avx512vnni-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vnni-builtins.c
@@ -56,6 +56,12 @@ TEST_CONSTEXPR(match_v16si(
     (__m512i)(__v64qu){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0},
     (__m512i)(__v64qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0}),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpbusd_epi32(
+    (__m512i)(__v16si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1},
+    (__m512i)(__v64qu){255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0, 255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0, 255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0, 255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0},
+    (__m512i)(__v64qi){-128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127, 127,127,127,127, 127,127,127,127, -128,-128,-128,-128, -128,-128,-128,-128, -128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127, 127,127,127,127, 127,127,127,127, -128,-128,-128,-128, -128,-128,-128,-128}),
+  2147353087, 2147353088, 2147483647, -2147483647-1, -2147354109, -2147354108, 2147483647, -2147483647-1, 2147353087, 2147353088, 2147483647, -2147483647-1, -2147354109, -2147354108, 2147483647, -2147483647-1));
 
 __m512i test_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_dpbusds_epi32
@@ -102,6 +108,12 @@ TEST_CONSTEXPR(match_v16si(
     (__m512i)(__v64qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,52, 53,54,55,56, 57,58,59,60, 61,62,63,64},
     (__m512i)(__v64qi){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1}),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpbusds_epi32(
+    (__m512i)(__v16si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1},
+    (__m512i)(__v64qu){255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0, 255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0, 255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0, 255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0},
+    (__m512i)(__v64qi){-128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127, 127,127,127,127, 127,127,127,127, -128,-128,-128,-128, -128,-128,-128,-128, -128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127, 127,127,127,127, 127,127,127,127, -128,-128,-128,-128, -128,-128,-128,-128}),
+  2147353087, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147354108, 2147483647, -2147483647-1, 2147353087, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147354108, 2147483647, -2147483647-1));
 
 __m512i test_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_dpwssd_epi32
@@ -148,6 +160,12 @@ TEST_CONSTEXPR(match_v16si(
     (__m512i)(__v32hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0},
     (__m512i)(__v32hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0}),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpwssd_epi32(
+    (__m512i)(__v16si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1},
+    (__m512i)(__v32hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768},
+    (__m512i)(__v32hi){-32768,-32768, -32768,-32768, 32767,32767, 32767,32767, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768, 32767,32767, 32767,32767, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+  65535, 65536, 65535, 65536, -131071, -131070, -1, 0, 65535, 65536, 65535, 65536, -131071, -131070, -1, 0));
 
 __m512i test_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_dpwssds_epi32
@@ -194,3 +212,9 @@ TEST_CONSTEXPR(match_v16si(
     (__m512i)(__v32hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767},
     (__m512i)(__v32hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767}),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_dpwssds_epi32(
+    (__m512i)(__v16si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1},
+    (__m512i)(__v32hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768},
+    (__m512i)(__v32hi){-32768,-32768, -32768,-32768, 32767,32767, 32767,32767, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768, 32767,32767, 32767,32767, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+  65535, -2147483647-1, 65535, -2147483647-1, 2147483647, -131070, 2147483647, 0, 65535, -2147483647-1, 65535, -2147483647-1, 2147483647, -131070, 2147483647, 0));
diff --git a/clang/test/CodeGen/X86/avxvnni-builtins.c b/clang/test/CodeGen/X86/avxvnni-builtins.c
index d1a4c490d0a66..903aa7681159e 100644
--- a/clang/test/CodeGen/X86/avxvnni-builtins.c
+++ b/clang/test/CodeGen/X86/avxvnni-builtins.c
@@ -28,6 +28,12 @@ TEST_CONSTEXPR(match_v8si(
     ((__m256i)(__v32qu){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0}),
     ((__m256i)(__v32qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusd_epi32(
+    ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m256i)(__v32qu){255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0, 255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0}),
+    ((__m256i)(__v32qi){-128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127, 127,127,127,127, 127,127,127,127, -128,-128,-128,-128, -128,-128,-128,-128})),
+  2147353087, 2147353088, 2147483647, -2147483647-1, -2147354109, -2147354108, 2147483647, -2147483647-1));
 
 __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpbusds_epi32
@@ -46,6 +52,12 @@ TEST_CONSTEXPR(match_v8si(
     ((__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32}),
     ((__m256i)(__v32qi){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1})),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusds_epi32(
+    ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m256i)(__v32qu){255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0, 255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0}),
+    ((__m256i)(__v32qi){-128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127, 127,127,127,127, 127,127,127,127, -128,-128,-128,-128, -128,-128,-128,-128})),
+  2147353087, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147354108, 2147483647, -2147483647-1));
 
 __m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpwssd_epi32
@@ -64,6 +76,12 @@ TEST_CONSTEXPR(match_v8si(
     ((__m256i)(__v16hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0}),
     ((__m256i)(__v16hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssd_epi32(
+    ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m256i)(__v16hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+    ((__m256i)(__v16hi){-32768,-32768, -32768,-32768, 32767,32767, 32767,32767, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768})),
+  65535, 65536, 65535, 65536, -131071, -131070, -1, 0));
 
 __m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpwssds_epi32
@@ -82,6 +100,12 @@ TEST_CONSTEXPR(match_v8si(
     ((__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767}),
     ((__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767})),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssds_epi32(
+    ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m256i)(__v16hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+    ((__m256i)(__v16hi){-32768,-32768, -32768,-32768, 32767,32767, 32767,32767, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768})),
+  65535, -2147483647-1, 65535, -2147483647-1, 2147483647, -131070, 2147483647, 0));
 
 __m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpbusd_epi32
@@ -100,6 +124,18 @@ TEST_CONSTEXPR(match_v4si(
     ((__m128i)(__v16qu){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0}),
     ((__m128i)(__v16qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusd_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v16qu){255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0}),
+    ((__m128i)(__v16qi){-128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127})),
+  2147353087, 2147353088, 2147483647, -2147483647-1));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusd_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v16qu){255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0}),
+    ((__m128i)(__v16qi){127,127,127,127, 127,127,127,127, -128,-128,-128,-128, -128,-128,-128,-128})),
+  -2147354109, -2147354108, 2147483647, -2147483647-1));
 
 __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpbusds_epi32
@@ -118,6 +154,18 @@ TEST_CONSTEXPR(match_v4si(
     ((__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16}),
     ((__m128i)(__v16qi){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1})),
   2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusds_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v16qu){255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0}),
+    ((__m128i)(__v16qi){-128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127})),
+  2147353087, -2147483647-1, 2147483647, -2147483647-1));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusds_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v16qu){255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0}),
+    ((__m128i)(__v16qi){127,127,127,127, 127,127,127,127, -128,-128,-128,-128, -128,-128,-128,-128})),
+  2147483647, -2147354108, 2147483647, -2147483647-1));
 
 __m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpwssd_epi32
@@ -136,6 +184,18 @@ TEST_CONSTEXPR(match_v4si(
     ((__m128i)(__v8hi){1,0, 1,0, 1,0, 1,0}),
     ((__m128i)(__v8hi){1,0, 1,0, 1,0, 1,0})),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+    ((__m128i)(__v8hi){-32768,-32768, -32768,-32768, 32767,32767, 32767,32767})),
+  65535, 65536, 65535, 65536));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768})),
+  -131071, -131070, -1, 0));
 
 __m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpwssds_epi32
@@ -154,6 +214,18 @@ TEST_CONSTEXPR(match_v4si(
     ((__m128i)(__v8hi){32767,32767, 32767,32767, 32767,32767, 32767,32767}),
     ((__m128i)(__v8hi){32767,32767, 32767,32767, 32767,32767, 32767,32767})),
   2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssds_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+    ((__m128i)(__v8hi){-32768,-32768, -32768,-32768, 32767,32767, 32767,32767})),
+  65535, -2147483647-1, 65535, -2147483647-1));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssds_epi32(
+    ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+    ((__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768})),
+  2147483647, -131070, 2147483647, 0));
 
 __m256i test_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpbusd_avx_epi32
@@ -172,6 +244,12 @@ TEST_CONSTEXPR(match_v8si(
     (__m256i)(__v32qu){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0},
     (__m256i)(__v32qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0}),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusd_avx_epi32(
+    (__m256i)(__v8si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, 2147483647, 2147483647, 2147483647, 2147483647},
+    (__m256i)(__v32qu){255,255,255,255, 255,255,255,255, 255,255,255,255, 255,255,255,255, 255,255,255,255, 255,255,255,255, 255,255,255,255, 255,255,255,255},
+    (__m256i)(__v32qi){-128,-128,-128,-128, -128,-128,-128,-128, -128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127, 127,127,127,127, 127,127,127,127}),
+  2147353088, 2147353088, 2147353088, 2147353088, -2147354109, -2147354109, -2147354109, -2147354109));
 
 __m256i test_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpbusds_avx_epi32
@@ -190,6 +268,12 @@ TEST_CONSTEXPR(match_v8si(
     (__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32},
     (__m256i)(__v32qi){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1}),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpbusds_avx_epi32(
+    (__m256i)(__v8si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, 2147483647, 2147483647, 2147483647, 2147483647},
+    (__m256i)(__v32qu){255,255,255,255, 255,255,255,255, 255,255,255,255, 255,255,255,255, 255,255,255,255, 255,255,255,255, 255,255,255,255, 255,255,255,255},
+    (__m256i)(__v32qi){-128,-128,-128,-128, -128,-128,-128,-128, -128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127, 127,127,127,127, 127,127,127,127}),
+  -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, 2147483647, 2147483647, 2147483647, 2147483647));
 
 __m256i test_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpwssd_avx_epi32
@@ -208,6 +292,12 @@ TEST_CONSTEXPR(match_v8si(
     (__m256i)(__v16hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0},
     (__m256i)(__v16hi){1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0, 1,0}),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssd_avx_epi32(
+    (__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+    (__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768},
+    (__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768}),
+  -131071, -131071, -131071, -131071, -1, -1, -1, -1));
 
 __m256i test_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_dpwssds_avx_epi32
@@ -226,6 +316,12 @@ TEST_CONSTEXPR(match_v8si(
     (__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767},
     (__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767, 32767,32767}),
   2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_dpwssds_avx_epi32(
+    (__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+    (__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768},
+    (__m256i)(__v16hi){32767,32767, 32767,32767, 32767,32767, 32767,32767, -32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768}),
+  2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
 
 __m128i test_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpbusd_avx_epi32
@@ -244,6 +340,12 @@ TEST_CONSTEXPR(match_v4si(
     (__m128i)(__v16qu){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0},
     (__m128i)(__v16qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0}),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusd_avx_epi32(
+    (__m128i)(__v4si){-2147483647-1, -2147483647-1, 2147483647, 2147483647},
+    (__m128i)(__v16qu){255,255,255,255, 255,255,255,255, 255,255,255,255, 255,255,255,255},
+    (__m128i)(__v16qi){-128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127}),
+  2147353088, 2147353088, -2147354109, -2147354109));
 
 __m128i test_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpbusds_avx_epi32
@@ -262,6 +364,12 @@ TEST_CONSTEXPR(match_v4si(
     (__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16},
     (__m128i)(__v16qi){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1}),
   2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpbusds_avx_epi32(
+    (__m128i)(__v4si){-2147483647-1, -2147483647-1, 2147483647, 2147483647},
+    (__m128i)(__v16qu){255,255,255,255, 255,255,255,255, 255,255,255,255, 255,255,255,255},
+    (__m128i)(__v16qi){-128,-128,-128,-128, -128,-128,-128,-128, 127,127,127,127, 127,127,127,127}),
+  -2147483647-1, -2147483647-1, 2147483647, 2147483647));
 
 __m128i test_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpwssd_avx_epi32
@@ -280,6 +388,12 @@ TEST_CONSTEXPR(match_v4si(
     (__m128i)(__v8hi){1,0, 1,0, 1,0, 1,0},
     (__m128i)(__v8hi){1,0, 1,0, 1,0, 1,0}),
   -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssd_avx_epi32(
+    (__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647},
+    (__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768},
+    (__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+  -131071, -131071, -1, -1));
 
 __m128i test_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_dpwssds_avx_epi32
@@ -298,3 +412,9 @@ TEST_CONSTEXPR(match_v4si(
     (__m128i)(__v8hi){32767,32767, 32767,32767, 32767,32767, 32767,32767},
     (__m128i)(__v8hi){32767,32767, 32767,32767, 32767,32767, 32767,32767}),
   2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v4si(
+  _mm_dpwssds_avx_epi32(
+    (__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647},
+    (__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768},
+    (__m128i)(__v8hi){32767,32767, 32767,32767, -32768,-32768, -32768,-32768}),
+  2147483647, 2147483647, 2147483647, 2147483647));



More information about the cfe-commits mailing list