[clang] [Clang][X86] Add constexpr support for VNNI intrinsics [#161430] (PR #190533)
Akash Deo via cfe-commits
cfe-commits at lists.llvm.org
Sun Apr 5 08:50:00 PDT 2026
https://github.com/AkashDeoNU created https://github.com/llvm/llvm-project/pull/190533
This fixes Issue #161430 by adding constexpr support for a few VNNI intrinsics - sorry about the delay - let me know if there are any issues and I'll fix them
>From fa66c60f2375e5c8e8c5bf45e87dfd7fa05261c1 Mon Sep 17 00:00:00 2001
From: Akash Deo <AkashDeo2025 at u.Northwestern.edu>
Date: Sun, 5 Apr 2026 10:46:27 -0500
Subject: [PATCH] [Clang][X86] Add constexpr support for VNNI intrinsics
---
clang/include/clang/Basic/BuiltinsX86.td | 24 +-
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 91 ++++++
clang/lib/AST/ExprConstant.cpp | 80 +++++
clang/lib/Headers/avx512vlvnniintrin.h | 90 +++---
clang/lib/Headers/avx512vnniintrin.h | 73 ++---
clang/lib/Headers/avxvnniintrin.h | 48 +--
.../test/CodeGen/X86/avx512vlvnni-builtins.c | 298 ++++++++++++++++++
clang/test/CodeGen/X86/avx512vnni-builtins.c | 156 +++++++++
clang/test/CodeGen/X86/avxvnni-builtins.c | 246 +++++++++++++++
9 files changed, 986 insertions(+), 120 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 0cab8c77d465d..342a23e1f2aab 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1075,51 +1075,51 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto
def extractf32x4_mask : X86Builtin<"_Vector<4, float>(_Vector<16, float>, _Constant int, _Vector<4, float>, unsigned char)">;
}
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpdpbusd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, char>)">;
}
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpdpbusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">;
}
-let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpdpbusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">;
}
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpdpbusds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, char>)">;
}
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpdpbusds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">;
}
-let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpdpbusds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">;
}
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpdpwssd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, short>)">;
}
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpdpwssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">;
}
-let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpdpwssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">;
}
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpdpwssds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, short>)">;
}
-let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpdpwssds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">;
}
-let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpdpwssds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index e7b3ef6ce1510..03c9add584658 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4189,6 +4189,53 @@ static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_vpdp(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call, bool IsDottingWord,
+ bool IsSaturating) {
+ const auto *SrcVecT = Call->getArg(0)->getType()->castAs<VectorType>();
+ const auto *OpAVecT = Call->getArg(1)->getType()->castAs<VectorType>();
+ const auto *OpBVecT = Call->getArg(2)->getType()->castAs<VectorType>();
+
+ PrimType SrcElemT = *S.getContext().classify(SrcVecT->getElementType());
+ PrimType OpAElemT = *S.getContext().classify(OpAVecT->getElementType());
+ PrimType OpBElemT = *S.getContext().classify(OpBVecT->getElementType());
+
+ unsigned NumElements = SrcVecT->getNumElements();
+ unsigned Iters = IsDottingWord ? 2 : 4;
+
+ const Pointer &OpBPtr = S.Stk.pop<Pointer>();
+ const Pointer &OpAPtr = S.Stk.pop<Pointer>();
+ const Pointer &SrcPtr = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ for (unsigned I = 0; I < NumElements; ++I) {
+ APSInt Acc;
+ INT_TYPE_SWITCH_NO_BOOL(SrcElemT, { Acc = SrcPtr.elem<T>(I).toAPSInt(); });
+ Acc = Acc.sext(64);
+ for (unsigned J = 0; J < Iters; ++J) {
+ APSInt OpA, OpB;
+ INT_TYPE_SWITCH_NO_BOOL(
+ OpAElemT, { OpA = OpAPtr.elem<T>(Iters * I + J).toAPSInt(); });
+ INT_TYPE_SWITCH_NO_BOOL(
+ OpBElemT, { OpB = OpBPtr.elem<T>(Iters * I + J).toAPSInt(); });
+ if (IsDottingWord) {
+ OpA = APSInt(OpA.sext(64), false);
+ } else {
+ OpA = APSInt(OpA.zext(64), false);
+ }
+ OpB = APSInt(OpB.sext(64), false);
+ Acc += OpA * OpB;
+ }
+ if (IsSaturating) {
+ Acc = APSInt(Acc.truncSSat(32), false);
+ }
+ INT_TYPE_SWITCH_NO_BOOL(SrcElemT,
+ { Dst.elem<T>(I) = static_cast<T>(Acc); });
+ }
+ Dst.initializeAllElements();
+ return true;
+}
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -6049,6 +6096,50 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return EvalScalarMinMaxFp(A, B, RoundingMode, /*IsMin=*/false);
},
/*IsScalar=*/true);
+ case X86::BI__builtin_ia32_vpdpwssd128:
+ case X86::BI__builtin_ia32_vpdpwssd256:
+ case X86::BI__builtin_ia32_vpdpwssd512:
+ case X86::BI__builtin_ia32_vpdpwssds128:
+ case X86::BI__builtin_ia32_vpdpwssds256:
+ case X86::BI__builtin_ia32_vpdpwssds512:
+ case X86::BI__builtin_ia32_vpdpbusds128:
+ case X86::BI__builtin_ia32_vpdpbusds256:
+ case X86::BI__builtin_ia32_vpdpbusds512:
+ case X86::BI__builtin_ia32_vpdpbusd128:
+ case X86::BI__builtin_ia32_vpdpbusd256:
+ case X86::BI__builtin_ia32_vpdpbusd512: {
+ unsigned BuiltinID = Call->getBuiltinCallee();
+ bool IsDottingWord;
+ bool IsSaturating;
+ switch (BuiltinID) {
+ case X86::BI__builtin_ia32_vpdpwssd128:
+ case X86::BI__builtin_ia32_vpdpwssd256:
+ case X86::BI__builtin_ia32_vpdpwssd512:
+ IsDottingWord = true;
+ IsSaturating = false;
+ break;
+ case X86::BI__builtin_ia32_vpdpwssds128:
+ case X86::BI__builtin_ia32_vpdpwssds256:
+ case X86::BI__builtin_ia32_vpdpwssds512:
+ IsDottingWord = true;
+ IsSaturating = true;
+ break;
+ case X86::BI__builtin_ia32_vpdpbusds128:
+ case X86::BI__builtin_ia32_vpdpbusds256:
+ case X86::BI__builtin_ia32_vpdpbusds512:
+ IsDottingWord = false;
+ IsSaturating = true;
+ break;
+ case X86::BI__builtin_ia32_vpdpbusd128:
+ case X86::BI__builtin_ia32_vpdpbusd256:
+ case X86::BI__builtin_ia32_vpdpbusd512:
+ IsDottingWord = false;
+ IsSaturating = false;
+ break;
+ }
+ return interp__builtin_ia32_vpdp(S, OpPC, Call, IsDottingWord,
+ IsSaturating);
+ }
default:
S.FFDiag(S.Current->getLocation(OpPC),
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 4f45fa728c605..ecbdb8cac301d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14634,6 +14634,86 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
return Success(R, E);
}
+ case X86::BI__builtin_ia32_vpdpwssd128:
+ case X86::BI__builtin_ia32_vpdpwssd256:
+ case X86::BI__builtin_ia32_vpdpwssd512:
+ case X86::BI__builtin_ia32_vpdpwssds128:
+ case X86::BI__builtin_ia32_vpdpwssds256:
+ case X86::BI__builtin_ia32_vpdpwssds512:
+ case X86::BI__builtin_ia32_vpdpbusds128:
+ case X86::BI__builtin_ia32_vpdpbusds256:
+ case X86::BI__builtin_ia32_vpdpbusds512:
+ case X86::BI__builtin_ia32_vpdpbusd128:
+ case X86::BI__builtin_ia32_vpdpbusd256:
+ case X86::BI__builtin_ia32_vpdpbusd512: {
+ unsigned BuiltinID = E->getBuiltinCallee();
+ bool IsDottingWord = false;
+ bool IsSaturating = false;
+ switch (BuiltinID) {
+ case X86::BI__builtin_ia32_vpdpwssd128:
+ case X86::BI__builtin_ia32_vpdpwssd256:
+ case X86::BI__builtin_ia32_vpdpwssd512:
+ IsDottingWord = true;
+ IsSaturating = false;
+ break;
+ case X86::BI__builtin_ia32_vpdpwssds128:
+ case X86::BI__builtin_ia32_vpdpwssds256:
+ case X86::BI__builtin_ia32_vpdpwssds512:
+ IsDottingWord = true;
+ IsSaturating = true;
+ break;
+ case X86::BI__builtin_ia32_vpdpbusds128:
+ case X86::BI__builtin_ia32_vpdpbusds256:
+ case X86::BI__builtin_ia32_vpdpbusds512:
+ IsDottingWord = false;
+ IsSaturating = true;
+ break;
+ case X86::BI__builtin_ia32_vpdpbusd128:
+ case X86::BI__builtin_ia32_vpdpbusd256:
+ case X86::BI__builtin_ia32_vpdpbusd512:
+ IsDottingWord = false;
+ IsSaturating = false;
+ break;
+ }
+
+ APValue Source, OperandA, OperandB;
+ if (!EvaluateAsRValue(Info, E->getArg(0), Source) ||
+ !EvaluateAsRValue(Info, E->getArg(1), OperandA) ||
+ !EvaluateAsRValue(Info, E->getArg(2), OperandB)) {
+ return false;
+ }
+
+ unsigned NumElements = Source.getVectorLength();
+
+ SmallVector<APValue, 16> Result;
+ Result.reserve(NumElements);
+ unsigned Iters = IsDottingWord ? 2 : 4;
+ for (unsigned I = 0; I < NumElements; ++I) {
+ APSInt DotProduct = Source.getVectorElt(I).getInt();
+ DotProduct = DotProduct.sext(64);
+ for (unsigned J = 0; J < Iters; ++J) {
+ APSInt OpA;
+ if (IsDottingWord) {
+ OpA = APSInt(OperandA.getVectorElt(Iters * I + J).getInt().sext(64),
+ false);
+ } else {
+ OpA = APSInt(OperandA.getVectorElt(Iters * I + J).getInt().zext(64),
+ false);
+ }
+ APSInt OpB = APSInt(
+ OperandB.getVectorElt(Iters * I + J).getInt().sext(64), false);
+ DotProduct += OpA * OpB;
+ }
+ if (IsSaturating) {
+ DotProduct = APSInt(DotProduct.truncSSat(32), false);
+ } else {
+ DotProduct = APSInt(DotProduct.trunc(32), false);
+ }
+ Result.push_back(APValue(DotProduct));
+ }
+
+ return Success(APValue(Result.data(), Result.size()), E);
+ }
}
}
diff --git a/clang/lib/Headers/avx512vlvnniintrin.h b/clang/lib/Headers/avx512vlvnniintrin.h
index 4b8a199af32e5..053807032fcb3 100644
--- a/clang/lib/Headers/avx512vlvnniintrin.h
+++ b/clang/lib/Headers/avx512vlvnniintrin.h
@@ -24,6 +24,14 @@
__target__("avx512vl,avx512vnni"), \
__min_vector_width__(256)))
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer
@@ -179,129 +187,115 @@
#define _mm_dpwssds_epi32(S, A, B) \
((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v8hi)(A), (__v8hi)(B)))
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
(__v8si)__S);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
(__v8si)_mm256_setzero_si256());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
(__v8si)__S);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A,
+ __m256i __B) {
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
(__v8si)_mm256_setzero_si256());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
(__v8si)__S);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
(__v8si)_mm256_setzero_si256());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
(__v8si)__S);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A,
+ __m256i __B) {
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
(__v8si)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpbusd_epi32(__S, __A, __B),
(__v4si)__S);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpbusd_epi32(__S, __A, __B),
(__v4si)_mm_setzero_si128());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpbusds_epi32(__S, __A, __B),
(__v4si)__S);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpbusds_epi32(__S, __A, __B),
(__v4si)_mm_setzero_si128());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpwssd_epi32(__S, __A, __B),
(__v4si)__S);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpwssd_epi32(__S, __A, __B),
(__v4si)_mm_setzero_si128());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpwssds_epi32(__S, __A, __B),
(__v4si)__S);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpwssds_epi32(__S, __A, __B),
(__v4si)_mm_setzero_si128());
diff --git a/clang/lib/Headers/avx512vnniintrin.h b/clang/lib/Headers/avx512vnniintrin.h
index 2ce88efe4a04f..1aa431ed446b2 100644
--- a/clang/lib/Headers/avx512vnniintrin.h
+++ b/clang/lib/Headers/avx512vnniintrin.h
@@ -19,98 +19,99 @@
__attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), \
__min_vector_width__(512)))
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
-{
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v64qu)__A,
(__v64qi)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
(__v16si)__S);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A,
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v64qu)__A,
(__v64qi)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A,
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
(__v16si)__S);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A,
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v32hi)__A,
(__v32hi)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
(__v16si)__S);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A,
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v32hi)__A,
(__v32hi)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A,
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
(__v16si)__S);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A,
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
(__v16si)_mm512_setzero_si512());
}
#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#endif
diff --git a/clang/lib/Headers/avxvnniintrin.h b/clang/lib/Headers/avxvnniintrin.h
index 1d2e8c906effc..ee82676fcb392 100644
--- a/clang/lib/Headers/avxvnniintrin.h
+++ b/clang/lib/Headers/avxvnniintrin.h
@@ -43,6 +43,14 @@
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(256)))
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(128)))
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#else
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#endif
+
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed
/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer
@@ -60,9 +68,8 @@
/// ENDFOR
/// DST[MAX:256] := 0
/// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v32qu)__A,
(__v32qi)__B);
}
@@ -84,9 +91,8 @@ _mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
/// ENDFOR
/// DST[MAX:256] := 0
/// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v32qu)__A,
(__v32qi)__B);
}
@@ -106,9 +112,8 @@ _mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
/// ENDFOR
/// DST[MAX:256] := 0
/// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v16hi)__A,
(__v16hi)__B);
}
@@ -128,9 +133,8 @@ _mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
/// ENDFOR
/// DST[MAX:256] := 0
/// \endcode
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v16hi)__A,
(__v16hi)__B);
}
@@ -152,9 +156,8 @@ _mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
/// ENDFOR
/// DST[MAX:128] := 0
/// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v16qu)__A,
(__v16qi)__B);
}
@@ -176,9 +179,8 @@ _mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
/// ENDFOR
/// DST[MAX:128] := 0
/// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v16qu)__A,
(__v16qi)__B);
}
@@ -198,9 +200,8 @@ _mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
/// ENDFOR
/// DST[MAX:128] := 0
/// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v8hi)__A,
(__v8hi)__B);
}
@@ -220,9 +221,8 @@ _mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
/// ENDFOR
/// DST[MAX:128] := 0
/// \endcode
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v8hi)__A,
(__v8hi)__B);
}
diff --git a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c
index 11dbd717a9f77..8bdbdf1ca94a0 100644
--- a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c
@@ -3,7 +3,13 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m256i test_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_dpbusd_epi32
@@ -11,6 +17,13 @@ __m256i test_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpbusd_epi32(__S, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_dpbusd_epi32(
+ (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+ (__mmask8)0x55,
+ (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 104, 200, 304, 400, 504, 600, 704, 800));
__m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_dpbusd_epi32
@@ -18,12 +31,43 @@ __m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpbusd_epi32(__U, __S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_maskz_dpbusd_epi32(
+ (__mmask8)0x0F,
+ (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 4, 4, 4, 4, 0, 0, 0, 0));
__m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpbusd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_dpbusd_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusd_epi32(
+ ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+ ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 4, 4, 4, 4, 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusd_epi32(
+ ((__m256i)(__v8si){10, 10, 10, 10, 10, 10, 10, 10}),
+ ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 14, 14, 14, 14, 14, 14, 14, 14));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusd_epi32(
+ ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+ ((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
+ ((__m256i)(__v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
+ -1020, -1020, -1020, -1020, -1020, -1020, -1020, -1020));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusd_epi32(
+ ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m256i)(__v32qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
+ ((__m256i)(__v32qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0})),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_dpbusds_epi32
@@ -31,6 +75,13 @@ __m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpbusds_epi32(__S, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_dpbusds_epi32(
+ (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+ (__mmask8)0xAA,
+ (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 100, 204, 300, 404, 500, 604, 700, 804));
__m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_dpbusds_epi32
@@ -38,12 +89,37 @@ __m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, _
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpbusds_epi32(__U, __S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_maskz_dpbusds_epi32(
+ (__mmask8)0xFF,
+ (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 4, 4, 4, 4, 4, 4, 4, 4));
__m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpbusds_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_dpbusds_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusds_epi32(
+ ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+ ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 4, 4, 4, 4, 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusds_epi32(
+ ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusds_epi32(
+ ((__m256i)(__v8si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1}),
+ ((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
+ ((__m256i)(__v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m256i test_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_dpwssd_epi32
@@ -51,6 +127,13 @@ __m256i test_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpwssd_epi32(__S, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_dpwssd_epi32(
+ (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+ (__mmask8)0xF0,
+ (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 100, 200, 300, 400, 502, 602, 702, 802));
__m256i test_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_dpwssd_epi32
@@ -58,12 +141,49 @@ __m256i test_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpwssd_epi32(__U, __S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_maskz_dpwssd_epi32(
+ (__mmask8)0x0F,
+ (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 2, 2, 2, 2, 0, 0, 0, 0));
__m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpwssd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwssd_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssd_epi32(
+ ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+ ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssd_epi32(
+ ((__m256i)(__v8si){10, 10, 10, 10, 10, 10, 10, 10}),
+ ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 12, 12, 12, 12, 12, 12, 12, 12));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssd_epi32(
+ ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+ ((__m256i)(__v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}),
+ ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ -2, -2, -2, -2, -2, -2, -2, -2));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssd_epi32(
+ ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+ ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+ ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767})),
+ 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssd_epi32(
+ ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0}),
+ ((__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0})),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m256i test_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_dpwssds_epi32
@@ -71,6 +191,13 @@ __m256i test_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpwssds_epi32(__S, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_dpwssds_epi32(
+ (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800},
+ (__mmask8)0xAA,
+ (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 100, 202, 300, 402, 500, 602, 700, 802));
__m256i test_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_dpwssds_epi32
@@ -78,12 +205,37 @@ __m256i test_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, _
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpwssds_epi32(__U, __S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_maskz_dpwssds_epi32(
+ (__mmask8)0xFF,
+ (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 2, 2, 2, 2, 2, 2, 2, 2));
__m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpwssds_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwssds_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssds_epi32(
+ ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+ ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssds_epi32(
+ ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+ ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767})),
+ 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssds_epi32(
+ ((__m256i)(__v8si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1}),
+ ((__m256i)(__v16hi){-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768}),
+ ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767})),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m128i test_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_dpbusd_epi32
@@ -91,6 +243,13 @@ __m128i test_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpbusd_epi32(__S, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_dpbusd_epi32(
+ (__m128i)(__v4si){100, 200, 300, 400},
+ (__mmask8)0x05,
+ (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 104, 200, 304, 400));
__m128i test_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_dpbusd_epi32
@@ -98,12 +257,43 @@ __m128i test_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m12
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpbusd_epi32(__U, __S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_maskz_dpbusd_epi32(
+ (__mmask8)0x03,
+ (__m128i)(__v4si){0, 0, 0, 0},
+ (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 4, 4, 0, 0));
__m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpbusd_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_dpbusd_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusd_epi32(
+ ((__m128i)(__v4si){0, 0, 0, 0}),
+ ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusd_epi32(
+ ((__m128i)(__v4si){10, 10, 10, 10}),
+ ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 14, 14, 14, 14));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusd_epi32(
+ ((__m128i)(__v4si){0, 0, 0, 0}),
+ ((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
+ ((__m128i)(__v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
+ -1020, -1020, -1020, -1020));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusd_epi32(
+ ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m128i)(__v16qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
+ ((__m128i)(__v16qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0})),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m128i test_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_dpbusds_epi32
@@ -111,6 +301,13 @@ __m128i test_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m12
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpbusds_epi32(__S, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_dpbusds_epi32(
+ (__m128i)(__v4si){100, 200, 300, 400},
+ (__mmask8)0x0A,
+ (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 100, 204, 300, 404));
__m128i test_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_dpbusds_epi32
@@ -118,12 +315,37 @@ __m128i test_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m1
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpbusds_epi32(__U, __S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_maskz_dpbusds_epi32(
+ (__mmask8)0x0F,
+ (__m128i)(__v4si){0, 0, 0, 0},
+ (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 4, 4, 4, 4));
__m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpbusds_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_dpbusds_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusds_epi32(
+ ((__m128i)(__v4si){0, 0, 0, 0}),
+ ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusds_epi32(
+ ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusds_epi32(
+ ((__m128i)(__v4si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1}),
+ ((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
+ ((__m128i)(__v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m128i test_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_dpwssd_epi32
@@ -131,6 +353,13 @@ __m128i test_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpwssd_epi32(__S, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_dpwssd_epi32(
+ (__m128i)(__v4si){100, 200, 300, 400},
+ (__mmask8)0x05,
+ (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
+ (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+ 102, 200, 302, 400));
__m128i test_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_dpwssd_epi32
@@ -138,12 +367,49 @@ __m128i test_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m12
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpwssd_epi32(__U, __S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_maskz_dpwssd_epi32(
+ (__mmask8)0x03,
+ (__m128i)(__v4si){0, 0, 0, 0},
+ (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
+ (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+ 2, 2, 0, 0));
__m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpwssd_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwssd_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssd_epi32(
+ ((__m128i)(__v4si){0, 0, 0, 0}),
+ ((__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+ ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
+ 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssd_epi32(
+ ((__m128i)(__v4si){10, 10, 10, 10}),
+ ((__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+ ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
+ 12, 12, 12, 12));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssd_epi32(
+ ((__m128i)(__v4si){0, 0, 0, 0}),
+ ((__m128i)(__v8hi){-1,-1,-1,-1,-1,-1,-1,-1}),
+ ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
+ -2, -2, -2, -2));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssd_epi32(
+ ((__m128i)(__v4si){0, 0, 0, 0}),
+ ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
+ ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767})),
+ 2147352578, 2147352578, 2147352578, 2147352578));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssd_epi32(
+ ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m128i)(__v8hi){1,0,1,0,1,0,1,0}),
+ ((__m128i)(__v8hi){1,0,1,0,1,0,1,0})),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m128i test_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_dpwssds_epi32
@@ -151,6 +417,13 @@ __m128i test_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m12
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpwssds_epi32(__S, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_dpwssds_epi32(
+ (__m128i)(__v4si){100, 200, 300, 400},
+ (__mmask8)0x0A,
+ (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
+ (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+ 100, 202, 300, 402));
__m128i test_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_dpwssds_epi32
@@ -158,10 +431,35 @@ __m128i test_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m1
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpwssds_epi32(__U, __S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_maskz_dpwssds_epi32(
+ (__mmask8)0x0F,
+ (__m128i)(__v4si){0, 0, 0, 0},
+ (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
+ (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+ 2, 2, 2, 2));
__m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpwssds_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwssds_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssds_epi32(
+ ((__m128i)(__v4si){0, 0, 0, 0}),
+ ((__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+ ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
+ 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssds_epi32(
+ ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
+ ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767})),
+ 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssds_epi32(
+ ((__m128i)(__v4si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1}),
+ ((__m128i)(__v8hi){-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768}),
+ ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767})),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
diff --git a/clang/test/CodeGen/X86/avx512vnni-builtins.c b/clang/test/CodeGen/X86/avx512vnni-builtins.c
index 6b8465206eedb..f8f663b48aa36 100644
--- a/clang/test/CodeGen/X86/avx512vnni-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vnni-builtins.c
@@ -3,7 +3,13 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m512i test_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_dpbusd_epi32
@@ -11,6 +17,13 @@ __m512i test_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpbusd_epi32(__S, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_dpbusd_epi32(
+ (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+ (__mmask16)0x5555,
+ (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 104, 200, 304, 400, 504, 600, 704, 800, 904, 1000, 1104, 1200, 1304, 1400, 1504, 1600));
__m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_dpbusd_epi32
@@ -18,6 +31,13 @@ __m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, _
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpbusd_epi32(__U, __S, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_dpbusd_epi32(
+ (__mmask16)0x00FF,
+ (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0));
__m512i test_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_dpbusd_epi32
@@ -25,12 +45,47 @@ __m512i test_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) {
return _mm512_dpbusd_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_dpbusd_epi32((__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4));
+
+TEST_CONSTEXPR(match_v16si(_mm512_dpbusd_epi32((__m512i)(__v16si){1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5));
+
+TEST_CONSTEXPR(match_v16si(_mm512_dpbusd_epi32((__m512i)(__v16si){2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ (__m512i)(__v64qu){1,2,3,4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v64qi){5,6,7,8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 72, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5));
+
+TEST_CONSTEXPR(match_v16si(_mm512_dpbusd_epi32((__m512i)(__v16si){2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ (__m512i)(__v64qu){1,2,3,4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v64qi){5,6,7,8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 72, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5));
+
+TEST_CONSTEXPR(match_v16si(
+ _mm512_dpbusd_epi32(
+ (__m512i)(__v16si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+ (__m512i)(__v64qu){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0},
+ (__m512i)(__v64qi){1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0, 1,0,0,0}),
+ -2147483648, -2147483648, -2147483648, -2147483648, -2147483648, -2147483648, -2147483648, -2147483648,
+ -2147483648, -2147483648, -2147483648, -2147483648, -2147483648, -2147483648, -2147483648, -2147483648));
+
__m512i test_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_dpbusds_epi32
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpbusds_epi32(__S, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_dpbusds_epi32(
+ (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+ (__mmask16)0x5555,
+ (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 104, 200, 304, 400, 504, 600, 704, 800, 904, 1000, 1104, 1200, 1304, 1400, 1504, 1600));
__m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_dpbusds_epi32
@@ -38,12 +93,37 @@ __m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A,
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpbusds_epi32(__U, __S, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_dpbusds_epi32(
+ (__mmask16)0x00FF,
+ (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0));
__m512i test_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_dpbusds_epi32
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}})
return _mm512_dpbusds_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_dpbusds_epi32(
+ (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_dpbusds_epi32(
+ (__m512i)(__v16si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+ (__m512i)(__v64qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v64qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_dpbusds_epi32(
+ (__m512i)(__v16si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1},
+ (__m512i)(__v64qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255},
+ (__m512i)(__v64qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m512i test_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_dpwssd_epi32
@@ -51,6 +131,13 @@ __m512i test_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpwssd_epi32(__S, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_dpwssd_epi32(
+ (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+ (__mmask16)0xFF00,
+ (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 100, 200, 300, 400, 500, 600, 700, 800, 902, 1002, 1102, 1202, 1302, 1402, 1502, 1602));
__m512i test_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_dpwssd_epi32
@@ -58,12 +145,49 @@ __m512i test_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, _
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpwssd_epi32(__U, __S, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_dpwssd_epi32(
+ (__mmask16)0x000F,
+ (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m512i test_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_dpwssd_epi32
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
return _mm512_dpwssd_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_dpwssd_epi32(
+ (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_dpwssd_epi32(
+ (__m512i)(__v16si){10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10},
+ (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_dpwssd_epi32(
+ (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v32hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+ (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_dpwssd_epi32(
+ (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v32hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767},
+ (__m512i)(__v32hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+ 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578, 2147352578));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_dpwssd_epi32(
+ (__m512i)(__v16si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+ (__m512i)(__v32hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0},
+ (__m512i)(__v32hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0}),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m512i test_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_dpwssds_epi32
@@ -71,6 +195,13 @@ __m512i test_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, _
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpwssds_epi32(__S, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_dpwssds_epi32(
+ (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600},
+ (__mmask16)0xAAAA,
+ (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 100, 202, 300, 402, 500, 602, 700, 802, 900, 1002, 1100, 1202, 1300, 1402, 1500, 1602));
__m512i test_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_dpwssds_epi32
@@ -78,10 +209,35 @@ __m512i test_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A,
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpwssds_epi32(__U, __S, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_dpwssds_epi32(
+ (__mmask16)0xFFFF,
+ (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2));
__m512i test_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_dpwssds_epi32
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
return _mm512_dpwssds_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_dpwssds_epi32(
+ (__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m512i)(__v32hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_dpwssds_epi32(
+ (__m512i)(__v16si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+ (__m512i)(__v32hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767},
+ (__m512i)(__v32hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+ 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_dpwssds_epi32(
+ (__m512i)(__v16si){-2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1},
+ (__m512i)(__v32hi){-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768},
+ (__m512i)(__v32hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
diff --git a/clang/test/CodeGen/X86/avxvnni-builtins.c b/clang/test/CodeGen/X86/avxvnni-builtins.c
index 6557a26807eb2..7bf4d563f7ba2 100644
--- a/clang/test/CodeGen/X86/avxvnni-builtins.c
+++ b/clang/test/CodeGen/X86/avxvnni-builtins.c
@@ -3,100 +3,346 @@
// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpbusd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_dpbusd_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusd_epi32(
+ ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+ ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 4, 4, 4, 4, 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusd_epi32(
+ ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+ ((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
+ ((__m256i)(__v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
+ -1020, -1020, -1020, -1020, -1020, -1020, -1020, -1020));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusd_epi32(
+ ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m256i)(__v32qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
+ ((__m256i)(__v32qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0})),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpbusds_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_dpbusds_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusds_epi32(
+ ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+ ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 4, 4, 4, 4, 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusds_epi32(
+ ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
__m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpwssd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwssd_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssd_epi32(
+ ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+ ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssd_epi32(
+ ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+ ((__m256i)(__v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}),
+ ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ -2, -2, -2, -2, -2, -2, -2, -2));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssd_epi32(
+ ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0}),
+ ((__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0})),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpwssds_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwssds_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssds_epi32(
+ ((__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0}),
+ ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssds_epi32(
+ ((__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+ ((__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767})),
+ 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
__m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpbusd_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_dpbusd_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusd_epi32(
+ ((__m128i)(__v4si){0, 0, 0, 0}),
+ ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusd_epi32(
+ ((__m128i)(__v4si){0, 0, 0, 0}),
+ ((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
+ ((__m128i)(__v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1})),
+ -1020, -1020, -1020, -1020));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusd_epi32(
+ ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m128i)(__v16qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
+ ((__m128i)(__v16qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0})),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpbusds_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_dpbusds_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusds_epi32(
+ ((__m128i)(__v4si){0, 0, 0, 0}),
+ ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusds_epi32(
+ ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ ((__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1})),
+ 2147483647, 2147483647, 2147483647, 2147483647));
__m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpwssd_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwssd_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssd_epi32(
+ ((__m128i)(__v4si){0, 0, 0, 0}),
+ ((__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+ ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
+ 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssd_epi32(
+ ((__m128i)(__v4si){0, 0, 0, 0}),
+ ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
+ ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767})),
+ 2147352578, 2147352578, 2147352578, 2147352578));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssd_epi32(
+ ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m128i)(__v8hi){1,0,1,0,1,0,1,0}),
+ ((__m128i)(__v8hi){1,0,1,0,1,0,1,0})),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpwssds_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwssds_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssds_epi32(
+ ((__m128i)(__v4si){0, 0, 0, 0}),
+ ((__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+ ((__m128i)(__v8hi){1,1,1,1,1,1,1,1})),
+ 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssds_epi32(
+ ((__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647}),
+ ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
+ ((__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767})),
+ 2147483647, 2147483647, 2147483647, 2147483647));
__m256i test_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpbusd_avx_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_dpbusd_avx_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusd_avx_epi32(
+ (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 4, 4, 4, 4, 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusd_avx_epi32(
+ (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255},
+ (__m256i)(__v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}),
+ -1020, -1020, -1020, -1020, -1020, -1020, -1020, -1020));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusd_avx_epi32(
+ (__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+ (__m256i)(__v32qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0},
+ (__m256i)(__v32qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m256i test_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpbusds_avx_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_dpbusds_avx_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusds_avx_epi32(
+ (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 4, 4, 4, 4, 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpbusds_avx_epi32(
+ (__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+ (__m256i)(__v32qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m256i)(__v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
__m256i test_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpwssd_avx_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwssd_avx_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssd_avx_epi32(
+ (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssd_avx_epi32(
+ (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m256i)(__v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
+ (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ -2, -2, -2, -2, -2, -2, -2, -2));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssd_avx_epi32(
+ (__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+ (__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0},
+ (__m256i)(__v16hi){1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0}),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m256i test_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpwssds_avx_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_dpwssds_avx_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssds_avx_epi32(
+ (__m256i)(__v8si){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m256i)(__v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 2, 2, 2, 2, 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_dpwssds_avx_epi32(
+ (__m256i)(__v8si){2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647},
+ (__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767},
+ (__m256i)(__v16hi){32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767,32767}),
+ 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647, 2147483647));
__m128i test_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpbusd_avx_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_dpbusd_avx_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusd_avx_epi32(
+ (__m128i)(__v4si){0, 0, 0, 0},
+ (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusd_avx_epi32(
+ (__m128i)(__v4si){0, 0, 0, 0},
+ (__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255},
+ (__m128i)(__v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}),
+ -1020, -1020, -1020, -1020));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusd_avx_epi32(
+ (__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647},
+ (__m128i)(__v16qu){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0},
+ (__m128i)(__v16qi){1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0}),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m128i test_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpbusds_avx_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_dpbusds_avx_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusds_avx_epi32(
+ (__m128i)(__v4si){0, 0, 0, 0},
+ (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 4, 4, 4, 4));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpbusds_avx_epi32(
+ (__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647},
+ (__m128i)(__v16qu){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1},
+ (__m128i)(__v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}),
+ 2147483647, 2147483647, 2147483647, 2147483647));
__m128i test_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpwssd_avx_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwssd_avx_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssd_avx_epi32(
+ (__m128i)(__v4si){0, 0, 0, 0},
+ (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
+ (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+ 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssd_avx_epi32(
+ (__m128i)(__v4si){0, 0, 0, 0},
+ (__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767},
+ (__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
+ 2147352578, 2147352578, 2147352578, 2147352578));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssd_avx_epi32(
+ (__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647},
+ (__m128i)(__v8hi){1,0,1,0,1,0,1,0},
+ (__m128i)(__v8hi){1,0,1,0,1,0,1,0}),
+ -2147483647-1, -2147483647-1, -2147483647-1, -2147483647-1));
__m128i test_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpwssds_avx_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_dpwssds_avx_epi32(__S, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssds_avx_epi32(
+ (__m128i)(__v4si){0, 0, 0, 0},
+ (__m128i)(__v8hi){1,1,1,1,1,1,1,1},
+ (__m128i)(__v8hi){1,1,1,1,1,1,1,1}),
+ 2, 2, 2, 2));
+TEST_CONSTEXPR(match_v4si(
+ _mm_dpwssds_avx_epi32(
+ (__m128i)(__v4si){2147483647, 2147483647, 2147483647, 2147483647},
+ (__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767},
+ (__m128i)(__v8hi){32767,32767,32767,32767,32767,32767,32767,32767}),
+ 2147483647, 2147483647, 2147483647, 2147483647));
More information about the cfe-commits
mailing list