[clang] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add AVX512 VPTERNLOGD/VPTERNLOGQ intrinsics to be used in constexpr (PR #158703)
Shawn K via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 1 00:55:38 PDT 2025
https://github.com/kimsh02 updated https://github.com/llvm/llvm-project/pull/158703
>From a1852c2b14510cb7ec76136087079f7d857d87c9 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Mon, 15 Sep 2025 10:58:34 -0700
Subject: [PATCH 01/15] [Clang] VectorExprEvaluator::VisitCallExpr /
InterpretBuiltin - add AVX512 VPTERNLOGD/VPTERNLOGQ intrinsics to be used in
constexpr
---
clang/include/clang/Basic/BuiltinsX86.td | 16 +-
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 63 ++++-
clang/lib/AST/ExprConstant.cpp | 91 +++++++
clang/test/CodeGen/X86/avx512f-builtins.c | 138 ++++++++++
clang/test/CodeGen/X86/avx512vl-builtins.c | 277 +++++++++++++++++++++
5 files changed, 580 insertions(+), 5 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index e98bee28c15be..0ce9bb3be9351 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -2405,28 +2405,36 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
def psraq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">;
def psrld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;
def psrlq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512f",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pternlogd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
def pternlogd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
def pternlogq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
def pternlogq512_maskz : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pternlogd128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
def pternlogd128_maskz : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pternlogd256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
def pternlogd256_maskz : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pternlogq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
def pternlogq128_maskz : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pternlogq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
def pternlogq256_maskz : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a2e97fcafdfef..e04705ac7e6ee 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2874,10 +2874,57 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
});
Dst.initializeAllElements();
-
return true;
}
+static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call, bool MaskZ) {
+ assert(Call->getNumArgs() == 5);
+
+ const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
+ unsigned DstLen = VecT->getNumElements();
+ PrimType DstElemT = *S.getContext().classify(VecT->getElementType());
+
+ APSInt U = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(4)));
+ APSInt Imm = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(3)));
+ const Pointer &C = S.Stk.pop<Pointer>();
+ const Pointer &B = S.Stk.pop<Pointer>();
+ const Pointer &A = S.Stk.pop<Pointer>();
+
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ for (unsigned I = 0; I < DstLen; ++I) {
+ APSInt a, b, c;
+ INT_TYPE_SWITCH(DstElemT, {
+ a = A.elem<T>(I).toAPSInt();
+ b = B.elem<T>(I).toAPSInt();
+ c = C.elem<T>(I).toAPSInt();
+ });
+
+ unsigned BitWidth = a.getBitWidth();
+ APInt R(BitWidth, 0);
+ bool DstUnsigned = a.isUnsigned();
+
+ if (U[I]) {
+ for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+ unsigned Idx = (a[Bit] << 2) | (b[Bit] << 1) | (c[Bit]);
+ R.setBitVal(Bit, Imm[Idx]);
+ }
+ INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
+ Dst.elem<T>(I) = static_cast<T>(APSInt(R, DstUnsigned));
+ });
+ } else if (MaskZ) {
+ INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
+ Dst.elem<T>(I) = static_cast<T>(APSInt(R, DstUnsigned));
+ });
+ } else {
+ INT_TYPE_SWITCH_NO_BOOL(DstElemT,
+ { Dst.elem<T>(I) = static_cast<T>(a); });
+ }
+ }
+ Dst.initializeAllElements();
+}
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -3661,6 +3708,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call,
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
+ case X86::BI__builtin_ia32_pternlogd128_mask:
+ case X86::BI__builtin_ia32_pternlogd256_mask:
+ case X86::BI__builtin_ia32_pternlogd512_mask:
+ case X86::BI__builtin_ia32_pternlogq128_mask:
+ case X86::BI__builtin_ia32_pternlogq256_mask:
+ case X86::BI__builtin_ia32_pternlogq512_mask:
+ return interp__builtin_pternlog(S, OpPC, Call, false);
+ case X86::BI__builtin_ia32_pternlogd128_maskz:
+ case X86::BI__builtin_ia32_pternlogd256_maskz:
+ case X86::BI__builtin_ia32_pternlogd512_maskz:
+ case X86::BI__builtin_ia32_pternlogq128_maskz:
+ case X86::BI__builtin_ia32_pternlogq256_maskz:
+ case X86::BI__builtin_ia32_pternlogq512_maskz:
+ return interp__builtin_pternlog(S, OpPC, Call, true);
case Builtin::BI__builtin_elementwise_fshl:
return interp__builtin_elementwise_triop(S, OpPC, Call,
llvm::APIntOps::fshl);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b706b14945b6d..dac6fdcd11db0 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12087,6 +12087,97 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case X86::BI__builtin_ia32_pternlogd128_mask:
+ case X86::BI__builtin_ia32_pternlogd256_mask:
+ case X86::BI__builtin_ia32_pternlogd512_mask:
+ case X86::BI__builtin_ia32_pternlogq128_mask:
+ case X86::BI__builtin_ia32_pternlogq256_mask:
+ case X86::BI__builtin_ia32_pternlogq512_mask: {
+ APValue AValue, BValue, CValue, ImmValue, UValue;
+ if (!EvaluateAsRValue(Info, E->getArg(0), AValue) ||
+ !EvaluateAsRValue(Info, E->getArg(1), BValue) ||
+ !EvaluateAsRValue(Info, E->getArg(2), CValue) ||
+ !EvaluateAsRValue(Info, E->getArg(3), ImmValue) ||
+ !EvaluateAsRValue(Info, E->getArg(4), UValue))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+ APInt Imm = ImmValue.getInt();
+ APInt U = UValue.getInt();
+ unsigned ResultLen = AValue.getVectorLength();
+ SmallVector<APValue, 16> ResultElements;
+ ResultElements.reserve(ResultLen);
+
+ for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) {
+ APInt ALane = AValue.getVectorElt(EltNum).getInt();
+ APInt BLane = BValue.getVectorElt(EltNum).getInt();
+ APInt CLane = CValue.getVectorElt(EltNum).getInt();
+
+ if (U[EltNum]) {
+ unsigned BitWidth = ALane.getBitWidth();
+ APInt ResLane(BitWidth, 0);
+
+ for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+ unsigned ABit = ALane[Bit];
+ unsigned BBit = BLane[Bit];
+ unsigned CBit = CLane[Bit];
+
+ unsigned Idx = (ABit << 2) | (BBit << 1) | CBit;
+ ResLane.setBitVal(Bit, Imm[Idx]);
+ }
+ ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned)));
+ } else {
+ ResultElements.push_back(APValue(APSInt(ALane, DestUnsigned)));
+ }
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+ case X86::BI__builtin_ia32_pternlogd128_maskz:
+ case X86::BI__builtin_ia32_pternlogd256_maskz:
+ case X86::BI__builtin_ia32_pternlogd512_maskz:
+ case X86::BI__builtin_ia32_pternlogq128_maskz:
+ case X86::BI__builtin_ia32_pternlogq256_maskz:
+ case X86::BI__builtin_ia32_pternlogq512_maskz: {
+ APValue AValue, BValue, CValue, ImmValue, UValue;
+ if (!EvaluateAsRValue(Info, E->getArg(0), AValue) ||
+ !EvaluateAsRValue(Info, E->getArg(1), BValue) ||
+ !EvaluateAsRValue(Info, E->getArg(2), CValue) ||
+ !EvaluateAsRValue(Info, E->getArg(3), ImmValue) ||
+ !EvaluateAsRValue(Info, E->getArg(4), UValue))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+ APInt Imm = ImmValue.getInt();
+ APInt U = UValue.getInt();
+ unsigned ResultLen = AValue.getVectorLength();
+ SmallVector<APValue, 16> ResultElements;
+ ResultElements.reserve(ResultLen);
+
+ for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) {
+ APInt ALane = AValue.getVectorElt(EltNum).getInt();
+ APInt BLane = BValue.getVectorElt(EltNum).getInt();
+ APInt CLane = CValue.getVectorElt(EltNum).getInt();
+
+ unsigned BitWidth = ALane.getBitWidth();
+ APInt ResLane(BitWidth, 0);
+
+ if (U[EltNum]) {
+ for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+ unsigned ABit = ALane[Bit];
+ unsigned BBit = BLane[Bit];
+ unsigned CBit = CLane[Bit];
+
+ unsigned Idx = (ABit << 2) | (BBit << 1) | CBit;
+ ResLane.setBitVal(Bit, Imm[Idx]);
+ }
+ }
+ ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned)));
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+
case Builtin::BI__builtin_elementwise_clzg:
case Builtin::BI__builtin_elementwise_ctzg: {
APValue SourceLHS;
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 84eaad8d99e61..03cbb20ab0ed5 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -6273,6 +6273,27 @@ __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) {
// CHECK: @llvm.x86.avx512.pternlog.d.512({{.*}}, i32 240)
return _mm512_ternarylogic_epi32(__A, __B, __C, _MM_TERNLOG_A);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_ternarylogic_epi32(
+ ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+ ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+ ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0xCA),
+ 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_ternarylogic_epi32(
+ ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_ternarylogic_epi32(
+ ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
__m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_mask_ternarylogic_epi32
@@ -6280,6 +6301,30 @@ __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i _
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_ternarylogic_epi32(__A, __U, __B, __C, _MM_TERNLOG_B);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_ternarylogic_epi32(
+ ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+ (__mmask16)0x3333,
+ ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+ ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0xCA),
+ 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_ternarylogic_epi32(
+ ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ (__mmask16)0xCCCC,
+ ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_ternarylogic_epi32(
+ ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ (__mmask16)0x5555,
+ ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9));
__m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_ternarylogic_epi32
@@ -6287,12 +6332,57 @@ __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> zeroinitializer
return _mm512_maskz_ternarylogic_epi32(__U, __A, __B, __C, _MM_TERNLOG_C);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_ternarylogic_epi32(
+ (__mmask16)0x3333,
+ ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+ ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+ ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0xCA),
+ 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_ternarylogic_epi32(
+ (__mmask16)0xCCCC,
+ ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_ternarylogic_epi32(
+ (__mmask16)0x5555,
+ ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
__m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_ternarylogic_epi64
// CHECK: @llvm.x86.avx512.pternlog.q.512({{.*}}, i32 192)
return _mm512_ternarylogic_epi64(__A, __B, __C, _MM_TERNLOG_A & _MM_TERNLOG_B);
}
+TEST_CONSTEXPR(match_v8di(
+ _mm512_ternarylogic_epi64(
+ ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+ ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+ ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0xCA),
+ 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC));
+TEST_CONSTEXPR(match_v8di(
+ _mm512_ternarylogic_epi64(
+ ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF));
+TEST_CONSTEXPR(match_v8di(
+ _mm512_ternarylogic_epi64(
+ ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
__m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_mask_ternarylogic_epi64
@@ -6300,6 +6390,30 @@ __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_ternarylogic_epi64(__A, __U, __B, __C, _MM_TERNLOG_B | _MM_TERNLOG_C);
}
+TEST_CONSTEXPR(match_v8di(
+ _mm512_mask_ternarylogic_epi64(
+ ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+ (__mmask8)0x33,
+ ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+ ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0xCA),
+ 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0));
+TEST_CONSTEXPR(match_v8di(
+ _mm512_mask_ternarylogic_epi64(
+ ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ (__mmask8)0xCC,
+ ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF));
+TEST_CONSTEXPR(match_v8di(
+ _mm512_mask_ternarylogic_epi64(
+ ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ (__mmask8)0x55,
+ ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9));
__m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_ternarylogic_epi64
@@ -6307,6 +6421,30 @@ __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i _
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> zeroinitializer
return _mm512_maskz_ternarylogic_epi64(__U, __A, __B, __C, ~_MM_TERNLOG_A | (_MM_TERNLOG_B ^ _MM_TERNLOG_C));
}
+TEST_CONSTEXPR(match_v8di(
+ _mm512_maskz_ternarylogic_epi64(
+ (__mmask8)0x33,
+ ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+ ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+ ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0xCA),
+ 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0));
+TEST_CONSTEXPR(match_v8di(
+ _mm512_maskz_ternarylogic_epi64(
+ (__mmask8)0xCC,
+ ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF));
+TEST_CONSTEXPR(match_v8di(
+ _mm512_maskz_ternarylogic_epi64(
+ (__mmask8)0x55,
+ ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
__m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_shuffle_f32x4
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 5282c7ab06dea..5a94532883d5f 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -8359,6 +8359,27 @@ __m128i test_mm_ternarylogic_epi32(__m128i __A, __m128i __B, __m128i __C) {
// CHECK: @llvm.x86.avx512.pternlog.d.128
return _mm_ternarylogic_epi32(__A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_ternarylogic_epi32(
+ ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})),
+ ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})),
+ ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })),
+ (unsigned char)0xCA),
+ 0xB, 0xC, 0xB, 0xC));
+TEST_CONSTEXPR(match_v4si(
+ _mm_ternarylogic_epi32(
+ ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
+ ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
+ ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0xF, 0xF, 0xF, 0xF));
+TEST_CONSTEXPR(match_v4si(
+ _mm_ternarylogic_epi32(
+ ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
+ ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
+ ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x0, 0x0, 0x0));
__m128i test_mm_mask_ternarylogic_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_mask_ternarylogic_epi32
@@ -8366,6 +8387,30 @@ __m128i test_mm_mask_ternarylogic_epi32(__m128i __A, __mmask8 __U, __m128i __B,
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_ternarylogic_epi32(__A, __U, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_ternarylogic_epi32(
+ ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})),
+ (__mmask8)0x03,
+ ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})),
+ ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })),
+ (unsigned char)0xCA),
+ 0xB, 0xC, -0x1, 0x0));
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_ternarylogic_epi32(
+ ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
+ (__mmask8)0x0C,
+ ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
+ ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0x9, 0x9, 0xF, 0xF));
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_ternarylogic_epi32(
+ ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
+ (__mmask8)0x05,
+ ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
+ ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x9, 0x0, 0x9));
__m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_maskz_ternarylogic_epi32
@@ -8373,12 +8418,57 @@ __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B,
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
return _mm_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_maskz_ternarylogic_epi32(
+ (__mmask8)0x03,
+ ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})),
+ ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})),
+ ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })),
+ (unsigned char)0xCA),
+ 0xB, 0xC, 0x0, 0x0));
+TEST_CONSTEXPR(match_v4si(
+ _mm_maskz_ternarylogic_epi32(
+ (__mmask8)0x0C,
+ ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
+ ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
+ ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0x0, 0x0, 0xF, 0xF));
+TEST_CONSTEXPR(match_v4si(
+ _mm_maskz_ternarylogic_epi32(
+ (__mmask8)0x05,
+ ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
+ ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
+ ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x0, 0x0, 0x0));
__m256i test_mm256_ternarylogic_epi32(__m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_ternarylogic_epi32
// CHECK: @llvm.x86.avx512.pternlog.d.256
return _mm256_ternarylogic_epi32(__A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_ternarylogic_epi32(
+ ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+ ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+ ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0xCA),
+ 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_ternarylogic_epi32(
+ ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_ternarylogic_epi32(
+ ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
__m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_mask_ternarylogic_epi32
@@ -8386,6 +8476,30 @@ __m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_ternarylogic_epi32(__A, __U, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_ternarylogic_epi32(
+ ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+ (__mmask8)0x33,
+ ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+ ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0xCA),
+ 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_ternarylogic_epi32(
+ ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ (__mmask8)0xCC,
+ ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_ternarylogic_epi32(
+ ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ (__mmask8)0x55,
+ ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9));
__m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_maskz_ternarylogic_epi32
@@ -8393,12 +8507,57 @@ __m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i _
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> zeroinitializer
return _mm256_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_maskz_ternarylogic_epi32(
+ (__mmask8)0x33,
+ ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+ ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+ ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0xCA),
+ 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_maskz_ternarylogic_epi32(
+ (__mmask8)0xCC,
+ ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_maskz_ternarylogic_epi32(
+ (__mmask8)0x55,
+ ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
__m128i test_mm_ternarylogic_epi64(__m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_ternarylogic_epi64
// CHECK: @llvm.x86.avx512.pternlog.q.128
return _mm_ternarylogic_epi64(__A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_ternarylogic_epi64(
+ ((__m128i)((__v2di){-0x1, 0x0})),
+ ((__m128i)((__v2di){0xB, 0xB})),
+ ((__m128i)((__v2di){0xC, 0xC})),
+ (unsigned char)0xCA),
+ 0xB, 0xC));
+TEST_CONSTEXPR(match_v2di(
+ _mm_ternarylogic_epi64(
+ ((__m128i)((__v2di){0x9, 0x9})),
+ ((__m128i)((__v2di){0x4, 0x4})),
+ ((__m128i)((__v2di){0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0xF, 0xF));
+TEST_CONSTEXPR(match_v2di(
+ _mm_ternarylogic_epi64(
+ ((__m128i)((__v2di){0x9, 0x9})),
+ ((__m128i)((__v2di){0x4, 0x4})),
+ ((__m128i)((__v2di){0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x0));
__m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_mask_ternarylogic_epi64
@@ -8406,6 +8565,30 @@ __m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B,
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_ternarylogic_epi64(__A, __U, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_mask_ternarylogic_epi64(
+ ((__m128i)((__v2di){-0x1, 0x0})),
+ (__mmask8)0x33,
+ ((__m128i)((__v2di){0xB, 0xB})),
+ ((__m128i)((__v2di){0xC, 0xC})),
+ (unsigned char)0xCA),
+ 0xB, 0xC));
+TEST_CONSTEXPR(match_v2di(
+ _mm_mask_ternarylogic_epi64(
+ ((__m128i)((__v2di){0x9, 0x9})),
+ (__mmask8)0xCC,
+ ((__m128i)((__v2di){0x4, 0x4})),
+ ((__m128i)((__v2di){0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0x9, 0x9));
+TEST_CONSTEXPR(match_v2di(
+ _mm_mask_ternarylogic_epi64(
+ ((__m128i)((__v2di){0x9, 0x9})),
+ (__mmask8)0x55,
+ ((__m128i)((__v2di){0x4, 0x4})),
+ ((__m128i)((__v2di){0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x9));
__m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_maskz_ternarylogic_epi64
@@ -8413,12 +8596,57 @@ __m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B,
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> zeroinitializer
return _mm_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_maskz_ternarylogic_epi64(
+ (__mmask8)0x03,
+ ((__m128i)((__v2di){-0x1, 0x0})),
+ ((__m128i)((__v2di){0xB, 0xB})),
+ ((__m128i)((__v2di){0xC, 0xC})),
+ (unsigned char)0xCA),
+ 0xB, 0xC));
+TEST_CONSTEXPR(match_v2di(
+ _mm_maskz_ternarylogic_epi64(
+ (__mmask8)0x0C,
+ ((__m128i)((__v2di){0x9, 0x9})),
+ ((__m128i)((__v2di){0x4, 0x4})),
+ ((__m128i)((__v2di){0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0x0, 0x0));
+TEST_CONSTEXPR(match_v2di(
+ _mm_maskz_ternarylogic_epi64(
+ (__mmask8)0x05,
+ ((__m128i)((__v2di){0x9, 0x9})),
+ ((__m128i)((__v2di){0x4, 0x4})),
+ ((__m128i)((__v2di){0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x0));
__m256i test_mm256_ternarylogic_epi64(__m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_ternarylogic_epi64
// CHECK: @llvm.x86.avx512.pternlog.q.256
return _mm256_ternarylogic_epi64(__A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_ternarylogic_epi64(
+ ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})),
+ ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})),
+ ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0xCA),
+ 0xB, 0xC, 0xB, 0xC));
+TEST_CONSTEXPR(match_v4di(
+ _mm256_ternarylogic_epi64(
+ ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
+ ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
+ ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0xF, 0xF, 0xF, 0xF));
+TEST_CONSTEXPR(match_v4di(
+ _mm256_ternarylogic_epi64(
+ ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
+ ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
+ ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x0, 0x0, 0x0));
__m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_mask_ternarylogic_epi64
@@ -8426,6 +8654,30 @@ __m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_ternarylogic_epi64(__A, __U, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_mask_ternarylogic_epi64(
+ ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})),
+ (__mmask8)0x33,
+ ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})),
+ ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0xCA),
+ 0xB, 0xC, -0x1, 0x0));
+TEST_CONSTEXPR(match_v4di(
+ _mm256_mask_ternarylogic_epi64(
+ ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
+ (__mmask8)0xCC,
+ ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
+ ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0x9, 0x9, 0xF, 0xF));
+TEST_CONSTEXPR(match_v4di(
+ _mm256_mask_ternarylogic_epi64(
+ ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
+ (__mmask8)0x55,
+ ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
+ ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x9, 0x0, 0x9));
__m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_maskz_ternarylogic_epi64
@@ -8433,6 +8685,31 @@ __m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i _
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> zeroinitializer
return _mm256_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_maskz_ternarylogic_epi64(
+ (__mmask8)0x33,
+ ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})),
+ ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})),
+ ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0xCA),
+ 0xB, 0xC, 0x0, 0x0));
+TEST_CONSTEXPR(match_v4di(
+ _mm256_maskz_ternarylogic_epi64(
+ (__mmask8)0xCC,
+ ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
+ ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
+ ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE),
+ 0x0, 0x0, 0xF, 0xF));
+TEST_CONSTEXPR(match_v4di(
+ _mm256_maskz_ternarylogic_epi64(
+ (__mmask8)0x55,
+ ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
+ ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
+ ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80),
+ 0x0, 0x0, 0x0, 0x0));
+
__m256 test_mm256_shuffle_f32x4(__m256 __A, __m256 __B) {
// CHECK-LABEL: test_mm256_shuffle_f32x4
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
>From 6936047dc512f64095167de6d7d3673c7c31362e Mon Sep 17 00:00:00 2001
From: Shawn <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 00:05:54 -0700
Subject: [PATCH 02/15] Apply suggestion from @tbaederr
Co-authored-by: Timm Baeder <tbaeder at redhat.com>
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index e04705ac7e6ee..31846e0d3d0dd 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2893,7 +2893,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
const Pointer &Dst = S.Stk.peek<Pointer>();
- for (unsigned I = 0; I < DstLen; ++I) {
+ for (unsigned I = 0; I != DstLen; ++I) {
APSInt a, b, c;
INT_TYPE_SWITCH(DstElemT, {
a = A.elem<T>(I).toAPSInt();
>From f9160faf510c98c926a8fd72d3a627f205d97c46 Mon Sep 17 00:00:00 2001
From: Shawn <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 00:06:30 -0700
Subject: [PATCH 03/15] Apply suggestion from @tbaederr
Co-authored-by: Timm Baeder <tbaeder at redhat.com>
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 31846e0d3d0dd..748f8891d9205 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2906,7 +2906,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
bool DstUnsigned = a.isUnsigned();
if (U[I]) {
- for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+ for (unsigned Bit = 0; Bit != BitWidth; ++Bit) {
unsigned Idx = (a[Bit] << 2) | (b[Bit] << 1) | (c[Bit]);
R.setBitVal(Bit, Imm[Idx]);
}
>From c97d923a78a3ffee6b531218548e3238b8d8c433 Mon Sep 17 00:00:00 2001
From: Shawn <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 00:09:14 -0700
Subject: [PATCH 04/15] Apply suggestion from @tbaederr
Co-authored-by: Timm Baeder <tbaeder at redhat.com>
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 748f8891d9205..c777f44f5e071 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3714,7 +3714,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_pternlogq128_mask:
case X86::BI__builtin_ia32_pternlogq256_mask:
case X86::BI__builtin_ia32_pternlogq512_mask:
- return interp__builtin_pternlog(S, OpPC, Call, false);
+ return interp__builtin_pternlog(S, OpPC, Call, /*MaskZ=*/false);
case X86::BI__builtin_ia32_pternlogd128_maskz:
case X86::BI__builtin_ia32_pternlogd256_maskz:
case X86::BI__builtin_ia32_pternlogd512_maskz:
>From 33b638d17c6593aa7f0aaaeae9620617375f6b25 Mon Sep 17 00:00:00 2001
From: Shawn <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 00:09:42 -0700
Subject: [PATCH 05/15] Apply suggestion from @tbaederr
Co-authored-by: Timm Baeder <tbaeder at redhat.com>
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index c777f44f5e071..3ca0531152758 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3721,7 +3721,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_pternlogq128_maskz:
case X86::BI__builtin_ia32_pternlogq256_maskz:
case X86::BI__builtin_ia32_pternlogq512_maskz:
- return interp__builtin_pternlog(S, OpPC, Call, true);
+ return interp__builtin_pternlog(S, OpPC, Call, /*MaskZ=*/true);
case Builtin::BI__builtin_elementwise_fshl:
return interp__builtin_elementwise_triop(S, OpPC, Call,
llvm::APIntOps::fshl);
>From dd305dcec3b1318e142be4de8df8e5d406702aa9 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 00:47:53 -0700
Subject: [PATCH 06/15] Apply feedback: Use PascalCase
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 26 ++++++++++++------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 3ca0531152758..4c8abd7172109 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2894,32 +2894,32 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
const Pointer &Dst = S.Stk.peek<Pointer>();
for (unsigned I = 0; I != DstLen; ++I) {
- APSInt a, b, c;
+ APSInt ALane, BLane, CLane;
INT_TYPE_SWITCH(DstElemT, {
- a = A.elem<T>(I).toAPSInt();
- b = B.elem<T>(I).toAPSInt();
- c = C.elem<T>(I).toAPSInt();
+ ALane = A.elem<T>(I).toAPSInt();
+ BLane = B.elem<T>(I).toAPSInt();
+ CLane = C.elem<T>(I).toAPSInt();
});
- unsigned BitWidth = a.getBitWidth();
- APInt R(BitWidth, 0);
- bool DstUnsigned = a.isUnsigned();
+ unsigned BitWidth = ALane.getBitWidth();
+ APInt RLane(BitWidth, 0);
+ bool DstUnsigned = ALane.isUnsigned();
if (U[I]) {
for (unsigned Bit = 0; Bit != BitWidth; ++Bit) {
- unsigned Idx = (a[Bit] << 2) | (b[Bit] << 1) | (c[Bit]);
- R.setBitVal(Bit, Imm[Idx]);
+ unsigned Idx = (ALane[Bit] << 2) | (BLane[Bit] << 1) | (CLane[Bit]);
+ RLane.setBitVal(Bit, Imm[Idx]);
}
INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
- Dst.elem<T>(I) = static_cast<T>(APSInt(R, DstUnsigned));
+ Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
});
} else if (MaskZ) {
- INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
- Dst.elem<T>(I) = static_cast<T>(APSInt(R, DstUnsigned));
+ INT_TYPE_SWITCH_NO_BOOL(DstElemT, { /* Zeroes lane */
+ Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
});
} else {
INT_TYPE_SWITCH_NO_BOOL(DstElemT,
- { Dst.elem<T>(I) = static_cast<T>(a); });
+ { Dst.elem<T>(I) = static_cast<T>(ALane); });
}
}
Dst.initializeAllElements();
>From 916079d5d689f975b41dc9e3cb526b59bf4ea8ba Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 00:53:17 -0700
Subject: [PATCH 07/15] Clang-format
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 4c8abd7172109..936dd546e7442 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2914,7 +2914,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
});
} else if (MaskZ) {
- INT_TYPE_SWITCH_NO_BOOL(DstElemT, { /* Zeroes lane */
+ INT_TYPE_SWITCH_NO_BOOL(DstElemT, { // Zeroes lane
Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
});
} else {
>From c7222db83eb70319de37dbfe1998c3d412b82bde Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Wed, 17 Sep 2025 23:26:28 -0700
Subject: [PATCH 08/15] Apply feedback: Rebase and refactor lines
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 936dd546e7442..46cace519c3f7 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2883,10 +2883,10 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
unsigned DstLen = VecT->getNumElements();
- PrimType DstElemT = *S.getContext().classify(VecT->getElementType());
+ const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType());
- APSInt U = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(4)));
- APSInt Imm = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(3)));
+ APSInt U = popToAPSInt(S, Call->getArg(4));
+ APSInt Imm = popToAPSInt(S, Call->getArg(3));
const Pointer &C = S.Stk.pop<Pointer>();
const Pointer &B = S.Stk.pop<Pointer>();
const Pointer &A = S.Stk.pop<Pointer>();
>From 1d13362d91d3bc5b33da737441ab3994515666d8 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Fri, 19 Sep 2025 14:41:05 -0700
Subject: [PATCH 09/15] Apply feedback: Rebase with main
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 46cace519c3f7..f51e4a3d18c4f 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2923,6 +2923,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
}
}
Dst.initializeAllElements();
+ return true;
}
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
>From fbb9ced09422389965595c93a57eaf14cd01f16f Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Fri, 19 Sep 2025 14:47:39 -0700
Subject: [PATCH 10/15] Apply feedback: Rebase with main
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index f51e4a3d18c4f..5678339597a83 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2900,7 +2900,6 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
BLane = B.elem<T>(I).toAPSInt();
CLane = C.elem<T>(I).toAPSInt();
});
-
unsigned BitWidth = ALane.getBitWidth();
APInt RLane(BitWidth, 0);
bool DstUnsigned = ALane.isUnsigned();
>From 97eb8465677e5d6753af18051d42595415e17699 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Mon, 22 Sep 2025 21:16:42 -0700
Subject: [PATCH 11/15] Upload failing testcase for now
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 10 ++-
clang/test/CodeGen/X86/avx512f-builtins.c | 89 +++++++++++++----------
2 files changed, 58 insertions(+), 41 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 5678339597a83..d540f303d9134 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2882,7 +2882,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
assert(Call->getNumArgs() == 5);
const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
- unsigned DstLen = VecT->getNumElements();
+ const unsigned DstLen = VecT->getNumElements();
const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType());
APSInt U = popToAPSInt(S, Call->getArg(4));
@@ -2895,7 +2895,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
for (unsigned I = 0; I != DstLen; ++I) {
APSInt ALane, BLane, CLane;
- INT_TYPE_SWITCH(DstElemT, {
+ INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
ALane = A.elem<T>(I).toAPSInt();
BLane = B.elem<T>(I).toAPSInt();
CLane = C.elem<T>(I).toAPSInt();
@@ -2906,7 +2906,11 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
if (U[I]) {
for (unsigned Bit = 0; Bit != BitWidth; ++Bit) {
- unsigned Idx = (ALane[Bit] << 2) | (BLane[Bit] << 1) | (CLane[Bit]);
+ unsigned ABit = ALane[Bit];
+ unsigned BBit = BLane[Bit];
+ unsigned CBit = CLane[Bit];
+
+ unsigned Idx = (ABit << 2) | (BBit << 1) | (CBit);
RLane.setBitVal(Bit, Imm[Idx]);
}
INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 03cbb20ab0ed5..5b13807014520 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -6278,21 +6278,21 @@ TEST_CONSTEXPR(match_v16si(
((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
- (unsigned char)0xCA),
+ (unsigned char)0xCA), // A ? B : C
0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC));
TEST_CONSTEXPR(match_v16si(
_mm512_ternarylogic_epi32(
((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE),
+ (unsigned char)0xFE), // A | B | C
0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF));
TEST_CONSTEXPR(match_v16si(
_mm512_ternarylogic_epi32(
((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80),
+ (unsigned char)0x80), // A & B & C
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
__m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
@@ -6303,28 +6303,37 @@ __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i _
}
TEST_CONSTEXPR(match_v16si(
_mm512_mask_ternarylogic_epi32(
- ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
- (__mmask16)0x3333,
- ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
- ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
- (unsigned char)0xCA),
- 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0));
+ ((__m512i)((__v16si){0x1, 0x0, 0x2, 0x0, 0x3, 0x0, 0x4, 0x0,
+ 0x5, 0x0, 0x6, 0x0, 0x7, 0x0, 0x8, 0x0})),
+ (__mmask16)0xA55A,
+ ((__m512i)((__v16si){0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10, 0x11,
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19})),
+ ((__m512i)((__v16si){0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
+ 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10})),
+ (unsigned char)0xCA), // A ? B : C
+ 0x1, 0x2, 0x2, 0x4, 0x6, 0x0, 0x3, 0x0, 0x8, 0x0, 0xD, 0x0, 0x7, 0xE, 0x8, 0x10));
TEST_CONSTEXPR(match_v16si(
_mm512_mask_ternarylogic_epi32(
- ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- (__mmask16)0xCCCC,
- ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE),
- 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF));
+ ((__m512i)((__v16si){0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
+ 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF})),
+ (__mmask16)0x0F0F,
+ ((__m512i)((__v16si){0x1, 0x2, 0x4, 0x8, 0x1, 0x2, 0x4, 0x8,
+ 0x1, 0x2, 0x4, 0x8, 0x1, 0x2, 0x4, 0x8})),
+ ((__m512i)((__v16si){0x10, 0x20, 0x40, 0x80, 0x10, 0x20, 0x40, 0x80,
+ 0x10, 0x20, 0x40, 0x80, 0x10, 0x20, 0x40, 0x80})),
+ (unsigned char)0xFE), // A | B | C
+ 0x11, 0x23, 0x46, 0x8B, 0x4, 0x5, 0x6, 0x7, 0x19, 0x2B, 0x4E, 0x8B, 0xC, 0xD, 0xE, 0xF));
TEST_CONSTEXPR(match_v16si(
_mm512_mask_ternarylogic_epi32(
- ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- (__mmask16)0x5555,
- ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80),
- 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9));
+ ((__m512i)((__v16si){0xF, 0x7, 0x3, 0x1, 0xF, 0x7, 0x3, 0x1,
+ 0xFF, 0xF, 0xF0, 0xAA, 0x55, 0xCC, 0x33, 0xFF})),
+ (__mmask16)0xAAAA,
+ ((__m512i)((__v16si){0xE, 0x7, 0x2, 0x1, 0xF, 0x0, 0x3, 0x0,
+ 0xF, 0xF0, 0xFF, 0x55, 0x55, 0x33, 0x33, 0xF})),
+ ((__m512i)((__v16si){0xD, 0x7, 0x0, 0x1, 0xF, 0x7, 0x0, 0x1,
+ 0xF0, 0xF, 0xF, 0xFF, 0xF, 0xCC, 0x33, 0xF0})),
+ (unsigned char)0x80), // A & B & C
+ 0xF, 0x7, 0x3, 0x1, 0xF, 0x0, 0x3, 0x0, 0xFF, 0x0, 0xF0, 0x0, 0x55, 0x0, 0x33, 0x0));
__m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_ternarylogic_epi32
@@ -6338,7 +6347,7 @@ TEST_CONSTEXPR(match_v16si(
((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
- (unsigned char)0xCA),
+ (unsigned char)0xCA), // A ? B : C
0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0));
TEST_CONSTEXPR(match_v16si(
_mm512_maskz_ternarylogic_epi32(
@@ -6346,7 +6355,7 @@ TEST_CONSTEXPR(match_v16si(
((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE),
+ (unsigned char)0xFE), // A | B | C
0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF));
TEST_CONSTEXPR(match_v16si(
_mm512_maskz_ternarylogic_epi32(
@@ -6354,7 +6363,7 @@ TEST_CONSTEXPR(match_v16si(
((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80),
+ (unsigned char)0x80), // A & B & C
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
__m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) {
@@ -6364,25 +6373,29 @@ __m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) {
}
TEST_CONSTEXPR(match_v8di(
_mm512_ternarylogic_epi64(
+ ((__m512i)((__v8di){0x1111, 0x2222, 0x3333, 0x4444, 0x5555, 0x6666, 0x7777, 0x8888})),
+ ((__m512i)((__v8di){0xAAAA, 0xBBBB, 0xCCCC, 0xDDDD, 0xEEEE, 0xFFFF, 0x1111, 0x2222})),
((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
- ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
- ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
- (unsigned char)0xCA),
- 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC));
+ (unsigned char)0xD8), // C ? B : A
+ 0xAAAA, 0x2222, 0xCCCC, 0x4444, 0xEEEE, 0x6666, 0x1111, 0x8888));
TEST_CONSTEXPR(match_v8di(
_mm512_ternarylogic_epi64(
- ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE),
- 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF));
+ ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, 0xF0F0, 0xFF, -0x5555555555555556, 0x5555555555555555})),
+ ((__m512i)((__v8di){0x1234, 0xFFFF, 0xFF, 0xF0F, 0x3333, 0xFF00, -0x5555555555555556, -0x0F0F0F0F0F0F0F10})),
+ ((__m512i)((__v8di){0xFFFF, 0x1234, 0xF0F, 0xFF00, 0xF0F0, 0x3333, 0x5555555555555555, 0x0F0F0F0F0F0F0F0})),
+ (unsigned char)0x8F), // ~A | (B & C)
+ 0x1234, -0x1, 0xF, -0x1, -0xC0C1, -0x100, 0x5555555555555555, -0x5505050505050506));
TEST_CONSTEXPR(match_v8di(
_mm512_ternarylogic_epi64(
- ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80),
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
+ ((__m512i)((__v8di){0x7FFFFFFFFFFFFFFF, 0x0, 0x00FF00FF00FF00FF, 0x0F0F0F0F0F0F0F0F,
+ 0x123456789ABCDEF0, 0x3333333333333333, 0x5555555555555555, 0x0123456789ABCDEF})),
+ ((__m512i)((__v8di){0x1111111111111111, 0x2222222222222222, 0xFFFFFFFF, -0x100000000,
+ 0x0, -0x3333333333333334, -0x0F0F0F0F0F0F0F10, -0x123456789ABCDF0})),
+ ((__m512i)((__v8di){0x2222222222222222, 0x1111111111111111, -0x1000000000000, 0xFFFFFFFF,
+ -0x1, 0x0, 0x0F0F0F0F0F0F0F0F, 0x0})),
+ (unsigned char)0xE0), // A & (B | C)
+ 0x3333333333333333, 0x0, 0x00FF000000FF00FF, 0x0F0F0F0F0F0F0F0F,
+ 0x123456789ABCDEF0, 0x0, 0x5555555555555555, 0x0));
__m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_mask_ternarylogic_epi64
>From c7c07e338eef7519607793422804deed7dec750a Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Wed, 24 Sep 2025 08:14:40 -0700
Subject: [PATCH 12/15] Save debug changes
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index d540f303d9134..13d9b01bd8e0c 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2884,9 +2884,10 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
const unsigned DstLen = VecT->getNumElements();
const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType());
+ const bool DstUnsigned = VecT->isUnsignedIntegerOrEnumerationType();
- APSInt U = popToAPSInt(S, Call->getArg(4));
- APSInt Imm = popToAPSInt(S, Call->getArg(3));
+ APInt U = popToAPSInt(S, Call->getArg(4));
+ APInt Imm = popToAPSInt(S, Call->getArg(3));
const Pointer &C = S.Stk.pop<Pointer>();
const Pointer &B = S.Stk.pop<Pointer>();
const Pointer &A = S.Stk.pop<Pointer>();
@@ -2894,7 +2895,9 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
const Pointer &Dst = S.Stk.peek<Pointer>();
for (unsigned I = 0; I != DstLen; ++I) {
- APSInt ALane, BLane, CLane;
+ APInt ALane;
+ APInt BLane;
+ APInt CLane;
INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
ALane = A.elem<T>(I).toAPSInt();
BLane = B.elem<T>(I).toAPSInt();
@@ -2902,7 +2905,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
});
unsigned BitWidth = ALane.getBitWidth();
APInt RLane(BitWidth, 0);
- bool DstUnsigned = ALane.isUnsigned();
+ // bool DstUnsigned = ALane.isUnsigned();
if (U[I]) {
for (unsigned Bit = 0; Bit != BitWidth; ++Bit) {
@@ -2922,7 +2925,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
});
} else {
INT_TYPE_SWITCH_NO_BOOL(DstElemT,
- { Dst.elem<T>(I) = static_cast<T>(ALane); });
+ { Dst.elem<T>(I) = static_cast<T>(APSInt(ALane, DstUnsigned)); });
}
}
Dst.initializeAllElements();
>From b822c2d80f1cf4de7089ff4155bd19da0be08baa Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 25 Sep 2025 18:52:33 -0700
Subject: [PATCH 13/15] Save debug changes
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 13d9b01bd8e0c..ee298a92343fa 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -20,6 +20,10 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SipHash.h"
+#include <iostream>
+#include "llvm/Support/Debug.h"
+#define DEBUG_TYPE "interp-builtin"
+
namespace clang {
namespace interp {
@@ -2881,6 +2885,9 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
const CallExpr *Call, bool MaskZ) {
assert(Call->getNumArgs() == 5);
+ // LLVM_DEBUG(llvm::dbgs() << "Debug\n");
+ std::cout << "Debug here\n";
+
const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
const unsigned DstLen = VecT->getNumElements();
const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType());
>From 79e065f4d6f03e2d238e3d4119e2a8c02355200e Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Sat, 27 Sep 2025 21:30:33 -0700
Subject: [PATCH 14/15] Save debug changes
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 23 +++++++-------
clang/test/CodeGen/X86/avx512f-builtins.c | 5 +--
clang/test/CodeGen/X86/avx512vl-builtins.c | 36 +++++++++++++++++++---
3 files changed, 44 insertions(+), 20 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ee298a92343fa..c5cfb85ca7439 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -20,10 +20,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SipHash.h"
-#include <iostream>
-#include "llvm/Support/Debug.h"
-#define DEBUG_TYPE "interp-builtin"
-
namespace clang {
namespace interp {
@@ -2885,16 +2881,13 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
const CallExpr *Call, bool MaskZ) {
assert(Call->getNumArgs() == 5);
- // LLVM_DEBUG(llvm::dbgs() << "Debug\n");
- std::cout << "Debug here\n";
-
const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
const unsigned DstLen = VecT->getNumElements();
const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType());
- const bool DstUnsigned = VecT->isUnsignedIntegerOrEnumerationType();
+ const bool DstUnsigned = VecT->getElementType()->isUnsignedIntegerOrEnumerationType();
- APInt U = popToAPSInt(S, Call->getArg(4));
- APInt Imm = popToAPSInt(S, Call->getArg(3));
+ const APInt U = popToAPSInt(S, Call->getArg(4));
+ const APInt Imm = popToAPSInt(S, Call->getArg(3));
const Pointer &C = S.Stk.pop<Pointer>();
const Pointer &B = S.Stk.pop<Pointer>();
const Pointer &A = S.Stk.pop<Pointer>();
@@ -2910,9 +2903,17 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
BLane = B.elem<T>(I).toAPSInt();
CLane = C.elem<T>(I).toAPSInt();
});
- unsigned BitWidth = ALane.getBitWidth();
+ const unsigned BitWidth = ALane.getBitWidth();
APInt RLane(BitWidth, 0);
// bool DstUnsigned = ALane.isUnsigned();
+
+ #define DEBUG_TYPE "ptern"
+ LLVM_DEBUG({
+ ALane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
+ BLane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
+ CLane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
+ RLane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
+ });
if (U[I]) {
for (unsigned Bit = 0; Bit != BitWidth; ++Bit) {
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 5b13807014520..7233999787218 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3,10 +3,7 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+
#include <immintrin.h>
#include "builtin_test_helpers.h"
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 5a94532883d5f..d786b5fb8ddbf 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -1,8 +1,7 @@
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+
#include <immintrin.h>
#include "builtin_test_helpers.h"
@@ -8393,7 +8392,7 @@ TEST_CONSTEXPR(match_v4si(
(__mmask8)0x03,
((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})),
((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })),
- (unsigned char)0xCA),
+ (unsigned char)0xCA), // A ? B : C
0xB, 0xC, -0x1, 0x0));
TEST_CONSTEXPR(match_v4si(
_mm_mask_ternarylogic_epi32(
@@ -8401,7 +8400,7 @@ TEST_CONSTEXPR(match_v4si(
(__mmask8)0x0C,
((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE),
+ (unsigned char)0xFE), // A | B | C
0x9, 0x9, 0xF, 0xF));
TEST_CONSTEXPR(match_v4si(
_mm_mask_ternarylogic_epi32(
@@ -8409,7 +8408,7 @@ TEST_CONSTEXPR(match_v4si(
(__mmask8)0x05,
((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80),
+ (unsigned char)0x80), // A & B & C
0x0, 0x9, 0x0, 0x9));
__m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
@@ -8418,6 +8417,33 @@ __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B,
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
return _mm_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4);
}
+// B ? A : C imm = 0xE2 (Idx = (A<<2)|(B<<1)|C per VPTERNLOG)
+TEST_CONSTEXPR(match_v4si(
+ _mm_maskz_ternarylogic_epi32(
+ (__mmask8)0x0B,
+ ((__m128i)((__v4si){(int)0xDEADBEEF, 0, (int)0xFFFFFFFF, 0x13579BDF})), // A
+ ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, 0})), // B
+ ((__m128i)((__v4si){(int)0xCAFEBABE, (int)0xFFFFFFFF, 0, 0x2468ACE0})), // C
+ (unsigned char)0xE2),
+ (int)0xDEADBEEF, (int)0xFFFFFFFF, 0, 0x2468ACE0));
+ // ~(A & B) | ~(B & C) imm = 0x7F
+TEST_CONSTEXPR(match_v4si(
+ _mm_maskz_ternarylogic_epi32(
+ (__mmask8)0x0C,
+ ((__m128i)((__v4si){0, (int)0xFFFFFFFF, (int)0xAAAAAAAA, 0x55555555})), // A
+ ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, (int)0xFFFFFFFF})), // B
+ ((__m128i)((__v4si){(int)0xF0F0F0F0, 0, 0, (int)0xFFFFFFFF})), // C
+ (unsigned char)0x7F),
+ 0, 0, (int)0xFFFFFFFF, (int)0xAAAAAAAA));
+ // ~A | ~B | C imm = 0xBF
+TEST_CONSTEXPR(match_v4si(
+ _mm_maskz_ternarylogic_epi32(
+ (__mmask8)0x05,
+ ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, 0x12345678, 0})), // A
+ ((__m128i)((__v4si){0, 0, 0x0000FFFF, (int)0xFFFFFFFF})), // B
+ ((__m128i)((__v4si){0, 0, 0x0000000F, 0})), // C
+ (unsigned char)0xBF),
+ (int)0xFFFFFFFF, 0, (int)0xFFFFA98F, 0));
TEST_CONSTEXPR(match_v4si(
_mm_maskz_ternarylogic_epi32(
(__mmask8)0x03,
>From e55bd08facad7349bc91ecf954417216e75054c1 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 30 Sep 2025 23:18:09 -0700
Subject: [PATCH 15/15] Add better testcases and try to debug
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 74 +++-
clang/test/CodeGen/X86/avx512f-builtins.c | 364 ++++++++++++-----
clang/test/CodeGen/X86/avx512vl-builtins.c | 430 +++++++++++----------
3 files changed, 547 insertions(+), 321 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index c5cfb85ca7439..92ceed8a71fde 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2877,14 +2877,15 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
return true;
}
-static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
- const CallExpr *Call, bool MaskZ) {
+static bool interp__builtin_ia32_pternlog_maskz(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
assert(Call->getNumArgs() == 5);
const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
const unsigned DstLen = VecT->getNumElements();
const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType());
- const bool DstUnsigned = VecT->getElementType()->isUnsignedIntegerOrEnumerationType();
+ const bool DstUnsigned =
+ VecT->getElementType()->isUnsignedIntegerOrEnumerationType();
const APInt U = popToAPSInt(S, Call->getArg(4));
const APInt Imm = popToAPSInt(S, Call->getArg(3));
@@ -2894,7 +2895,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
const Pointer &Dst = S.Stk.peek<Pointer>();
- for (unsigned I = 0; I != DstLen; ++I) {
+ for (unsigned I = 0; I < DstLen; ++I) {
APInt ALane;
APInt BLane;
APInt CLane;
@@ -2905,18 +2906,57 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
});
const unsigned BitWidth = ALane.getBitWidth();
APInt RLane(BitWidth, 0);
- // bool DstUnsigned = ALane.isUnsigned();
-
- #define DEBUG_TYPE "ptern"
- LLVM_DEBUG({
- ALane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
- BLane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
- CLane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
- RLane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
+
+ if (U[I]) {
+ for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+ unsigned ABit = ALane[Bit];
+ unsigned BBit = BLane[Bit];
+ unsigned CBit = CLane[Bit];
+
+ unsigned Idx = (ABit << 2) | (BBit << 1) | (CBit);
+ RLane.setBitVal(Bit, Imm[Idx]);
+ }
+ }
+ INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
+ Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
});
+ }
+ Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp__builtin_ia32_pternlog_mask(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ assert(Call->getNumArgs() == 5);
+
+ const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
+ const unsigned DstLen = VecT->getNumElements();
+ const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType());
+ const bool DstUnsigned =
+ VecT->getElementType()->isUnsignedIntegerOrEnumerationType();
+
+ const APInt U = popToAPSInt(S, Call->getArg(4));
+ const APInt Imm = popToAPSInt(S, Call->getArg(3));
+ const Pointer &C = S.Stk.pop<Pointer>();
+ const Pointer &B = S.Stk.pop<Pointer>();
+ const Pointer &A = S.Stk.pop<Pointer>();
+
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ for (unsigned I = 0; I < DstLen; ++I) {
+ APInt ALane;
+ APInt BLane;
+ APInt CLane;
+ INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
+ ALane = A.elem<T>(I).toAPSInt();
+ BLane = B.elem<T>(I).toAPSInt();
+ CLane = C.elem<T>(I).toAPSInt();
+ });
+ const unsigned BitWidth = ALane.getBitWidth();
+ APInt RLane(BitWidth, 0);
if (U[I]) {
- for (unsigned Bit = 0; Bit != BitWidth; ++Bit) {
+ for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
unsigned ABit = ALane[Bit];
unsigned BBit = BLane[Bit];
unsigned CBit = CLane[Bit];
@@ -2927,10 +2967,6 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
});
- } else if (MaskZ) {
- INT_TYPE_SWITCH_NO_BOOL(DstElemT, { // Zeroes lane
- Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
- });
} else {
INT_TYPE_SWITCH_NO_BOOL(DstElemT,
{ Dst.elem<T>(I) = static_cast<T>(APSInt(ALane, DstUnsigned)); });
@@ -3729,14 +3765,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_pternlogq128_mask:
case X86::BI__builtin_ia32_pternlogq256_mask:
case X86::BI__builtin_ia32_pternlogq512_mask:
- return interp__builtin_pternlog(S, OpPC, Call, /*MaskZ=*/false);
+ return interp__builtin_ia32_pternlog_mask(S, OpPC, Call);
case X86::BI__builtin_ia32_pternlogd128_maskz:
case X86::BI__builtin_ia32_pternlogd256_maskz:
case X86::BI__builtin_ia32_pternlogd512_maskz:
case X86::BI__builtin_ia32_pternlogq128_maskz:
case X86::BI__builtin_ia32_pternlogq256_maskz:
case X86::BI__builtin_ia32_pternlogq512_maskz:
- return interp__builtin_pternlog(S, OpPC, Call, /*MaskZ=*/true);
+ return interp__builtin_ia32_pternlog_maskz(S, OpPC, Call);
case Builtin::BI__builtin_elementwise_fshl:
return interp__builtin_elementwise_triop(S, OpPC, Call,
llvm::APIntOps::fshl);
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 7233999787218..fd446c1f22003 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3,7 +3,10 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
-
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
#include <immintrin.h>
#include "builtin_test_helpers.h"
@@ -6272,25 +6275,76 @@ __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) {
}
TEST_CONSTEXPR(match_v16si(
_mm512_ternarylogic_epi32(
- ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
- ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
- ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
- (unsigned char)0xCA), // A ? B : C
- 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC));
+ ((__m512i)((__v16si){
+ 0x6AA79987, (int)0xBB91433A, 0x029A7245, (int)0xD1F6F86C,
+ (int)0xD340BBCD, (int)0xCD8778E7, 0x4C73A942, (int)0xDAEA58BA,
+ 0x5E503A67, (int)0xEE897110, 0x3193CA54, 0x452EC40A,
+ (int)0x90E5E945, 0x6FACAA50, 0x29645F8B, 0x5F811CB9
+ })),
+ ((__m512i)((__v16si){
+ 0x1FCFF454, (int)0xDFC9E3B1, 0x6ED4E94B, 0x42D6CB5C,
+ (int)0x8FE46024, (int)0xA091250E, 0x2CA1C789, (int)0x9C9CEA0C,
+ (int)0x8D9FE5B9, 0x2FD2B7A4, 0x5ADAD121, (int)0xBCF74D7A,
+ (int)0xF543BBCF, (int)0xBB9D58E4, 0x175F0CD2, (int)0x87F26AEE
+ })),
+ ((__m512i)((__v16si){
+ (int)0xFA882692, (int)0xBC428D42, 0x6980A81F, (int)0x95C5FB98,
+ (int)0x8101E89A, 0x2AA4857E, 0x25ECE845, 0x34A9AF41,
+ (int)0xB80E3B0D, 0x13ED748B, 0x30A1F6D5, (int)0xD64A3CE0,
+ 0x57708107, 0x527122DC, 0x06057C82, 0x7576714A
+ })),
+ (unsigned char)0x11), // ~A & ~C
+ 0x00300929, 0x0034100C, (int)0x902B16A0, 0x28280423,
+ 0x701A1741, 0x554A5A81, (int)0xD2121032, 0x434210B2,
+ 0x42600042, (int)0xC0000850, (int)0x8504080A, 0x01008205,
+ 0x088C4430, 0x04028503, (int)0xE8A0832D, 0x08098411));
TEST_CONSTEXPR(match_v16si(
_mm512_ternarylogic_epi32(
- ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE), // A | B | C
- 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF));
+ ((__m512i)((__v16si){
+ (int)0xA3B1799D, (int)0x46685257, (int)0x392456DE, (int)0xBC8960A9,
+ (int)0x6C031199, (int)0x07A0CA6E, (int)0x37F8A88B, (int)0x8B8148F6,
+ (int)0x386ECBE0, (int)0x96DA1DAC, (int)0xCE4A2BBD, (int)0xB2B9437A,
+ (int)0x571AA876, (int)0x27CD8130, (int)0x562B0F79, (int)0x17BE3111
+ })),
+ ((__m512i)((__v16si){
+ (int)0x18C26797, (int)0xD8F56413, (int)0x9A8DCA03, (int)0xCE9FF57F,
+ (int)0xBACFB3D0, (int)0x89463E85, (int)0x60E7A113, (int)0x8D5288F1,
+ (int)0xDC98D2C1, (int)0x93CD59BF, (int)0xB45ED1F0, (int)0x19DB3AD0,
+ (int)0x47294739, (int)0x5D65A441, (int)0x5EC42E08, (int)0xA5E5A5AB
+ })),
+ ((__m512i)((__v16si){
+ (int)0xBAA80DD4, (int)0x29D4BEEF, (int)0x6123FDF7, (int)0x8E944239,
+ (int)0xAF42E12F, (int)0xC6A7EE39, (int)0x50C187FC, (int)0x448AAA9E,
+ (int)0x508EBAD7, (int)0xA7CAD415, (int)0x757750A9, (int)0x43CF2FDE,
+ (int)0x95A76D79, (int)0x663F1C97, (int)0xFF5E9FF0, (int)0x827050A8
+ })),
+ (unsigned char)0x38), // (C & ~B) | (~C & A & B)
+ (int)0xBB311C08, (int)0x0E9C3644, (int)0x21219CDD, (int)0x32140090,
+ (int)0xC640A009, (int)0x86A6E46B, (int)0x57190998, (int)0x0683C006,
+ (int)0x60E61921, (int)0x05124411, (int)0x7A147A0D, (int)0xA36269AA,
+ (int)0x1033ED4F, (int)0x62A80531, (int)0x086F0171, (int)0x925A10B8));
TEST_CONSTEXPR(match_v16si(
_mm512_ternarylogic_epi32(
- ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80), // A & B & C
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
+ ((__m512i)((__v16si){
+ (int)0x3193CA54, (int)0x90E5E945, (int)0x29645F8B, (int)0x6ED4E94B,
+ (int)0x8D9FE5B9, (int)0x8101E89A, (int)0x25ECE845, (int)0xB80E3B0D,
+ (int)0x57708107, (int)0x06057C82, (int)0x56EAA301, (int)0xBE99854A,
+ (int)0x00E266D0, (int)0xDEEA959E, (int)0x2DCAABD5, (int)0x6A1ECCDA})),
+ ((__m512i)((__v16si){
+ (int)0x93FD7234, (int)0xBC90A6EC, (int)0xD3285151, (int)0xCE9FB6A8,
+ (int)0x3B788B66, (int)0xDF8960AD, (int)0x2F927291, (int)0x96AF0DEA,
+ (int)0xF56AE7EA, (int)0x2A04F77A, (int)0xD50B612B, (int)0x3AA725CB,
+ (int)0x8A04F74F, (int)0x282FE557, (int)0x52E1FBB0, (int)0x0CA02F4D})),
+ ((__m512i)((__v16si){
+ (int)0xB6307BAD, (int)0x141CB03E, (int)0xEBAA7701, (int)0xC9F0B072,
+ (int)0x5E2503DD, (int)0xC2E1DAC4, (int)0x0FC01B11, (int)0xA0485922,
+ (int)0x339BB47E, (int)0xB2D4F32A, (int)0x8E7AE9AF, (int)0x147DE9B0,
+ (int)0xF79FCAA0, (int)0x3B0B6398, (int)0x29DDF4C7, (int)0x49CDBEC7})),
+ (unsigned char)0xC3), // ~(B ^ C)
+ (int)0x5D91479F, (int)0xD38AB056, (int)0x05B3F125, (int)0x5FB4A01C,
+ (int)0x49189120, (int)0xA17777C8, (int)0xF581652B, (int)0xD15EC918,
+ (int)0x5DE59912, (int)0xD3FE7407, (int)0x7C1E3DD5, (int)0x7BC15F7E,
+ (int)0x75196E60, (int)0x093A8F36, (int)0x80D4AF9A, (int)0x99411C68));
__m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_mask_ternarylogic_epi32
@@ -6300,37 +6354,61 @@ __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i _
}
TEST_CONSTEXPR(match_v16si(
_mm512_mask_ternarylogic_epi32(
- ((__m512i)((__v16si){0x1, 0x0, 0x2, 0x0, 0x3, 0x0, 0x4, 0x0,
- 0x5, 0x0, 0x6, 0x0, 0x7, 0x0, 0x8, 0x0})),
- (__mmask16)0xA55A,
- ((__m512i)((__v16si){0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10, 0x11,
- 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19})),
- ((__m512i)((__v16si){0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
- 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10})),
- (unsigned char)0xCA), // A ? B : C
- 0x1, 0x2, 0x2, 0x4, 0x6, 0x0, 0x3, 0x0, 0x8, 0x0, 0xD, 0x0, 0x7, 0xE, 0x8, 0x10));
+ _mm512_setr_epi32(
+ (int)0xFFFFFFFF, 0x00000000, (int)0xDEADBEEF, (int)0xCAFEBABE, 0x12345678, (int)0x87654321,
+ (int)0xAAAAAAAA, 0x55555555, (int)0xF00DBEEF, (int)0xBAD2FEAF, 0x0112358D, (int)0xDEADF00D,
+ (int)0x8BADF00D, (int)0xBADDCAFE, (int)0xBAADF00D, (int)0xBAAAAAAD),
+ (__mmask16)0x9D71,
+ _mm512_setr_epi32(
+ 0x11111111, 0x22222222, 0x33333333, 0x44444444, (int)0xABCDEF01, (int)0xFEDCBA98,
+ (int)0xCCCCCCCC, 0x33333333, 0x1337BEEF, 0x01010101, (int)0x81321345, (int)0xBAADF00D,
+ 0x1BADB002, 0x5EE7C0DE, 0x12345678, 0x55555555),
+ _mm512_setr_epi32(
+ (int)0xF0F0F0F0, 0x0F0F0F0F, 0x1234ABCD, (int)0x9876FEDC, 0x00FF00FF, (int)0xFF00FF00,
+ (int)0xFF0000FF, 0x00FFFF00, 0x50D4CAFE, (int)0x8BADF00D, (int)0xABCDEFFF, (int)0xFEEDF00D,
+ (int)0xBEEFCAFE, (int)0xDEADC0DE, (int)0x1BADBEEF, 0x33333333),
+ (unsigned char)0xB1), // op: (~B & (A | ~C)) | (B & A & C)
+ (int)0xFEFEFEFE, 0x00000000, (int)0xDEADBEEF, (int)0xCAFEBABE, 0x54341078, (int)0x87234367,
+ (int)0xAA3333AA, 0x55555555, (int)0xFC0C8BEE, (int)0xBAD2FEAF, 0x5500258D, (int)0xDFBFFFFF,
+ (int)0xCABDC50D, (int)0xBADDCAFE, (int)0xBAADF00D, (int)0xBAAAAAA9));
TEST_CONSTEXPR(match_v16si(
_mm512_mask_ternarylogic_epi32(
- ((__m512i)((__v16si){0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
- 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF})),
- (__mmask16)0x0F0F,
- ((__m512i)((__v16si){0x1, 0x2, 0x4, 0x8, 0x1, 0x2, 0x4, 0x8,
- 0x1, 0x2, 0x4, 0x8, 0x1, 0x2, 0x4, 0x8})),
- ((__m512i)((__v16si){0x10, 0x20, 0x40, 0x80, 0x10, 0x20, 0x40, 0x80,
- 0x10, 0x20, 0x40, 0x80, 0x10, 0x20, 0x40, 0x80})),
- (unsigned char)0xFE), // A | B | C
- 0x11, 0x23, 0x46, 0x8B, 0x4, 0x5, 0x6, 0x7, 0x19, 0x2B, 0x4E, 0x8B, 0xC, 0xD, 0xE, 0xF));
+ _mm512_setr_epi32(
+ 0x0000FFFF, (int)0xFFFF0000, 0x01010101, (int)0xFF00FF00, (int)0xAAAAAAAA, 0x33333333,
+ (int)0xF0F0F0F0, 0x0F0F0F0F, 0x12345678, (int)0x87654321, 0x7FFFFFFF, (int)0xDEADBEEF,
+ (int)0xCAFEBABE, 0x01234567, (int)0xABCDEF01, (int)0xFEDCBA98),
+ (__mmask16)0x3C3C,
+ _mm512_setr_epi32(
+ 0x1111EEEE, 0x2222DDDD, (int)0x80808080, 0x00FF00FF, 0x55555555, 0x00000000,
+ (int)0xCCCCCCCC, 0x33333333, 0x11111111, 0x22222222, (int)0x80000000, 0x12345678,
+ 0x11223344, (int)0xFEDCBA98, (int)0xBAD0BAD0, (int)0xBEEFCAFE),
+ _mm512_setr_epi32(
+ 0x12345678, (int)0x87654321, 0x7F7F7F7F, (int)0xFEDCBA98, (int)0xCCCCCCCC, (int)0xFFFFFFFF,
+ 0x11111111, 0x22222222, (int)0xABABABAB, (int)0xCDCDCDCD, 0x00000001, (int)0xFACEB00C,
+ 0x55667788, (int)0xABCDEF01, 0x12345678, (int)0xDEADBEEF),
+ (unsigned char)0xE8), // op: (A & B) | (B & C) | (C & A) (Majority)
+ 0x0000FFFF, (int)0xFFFF0000, 0x01010101, (int)0xFEDCBA98, (int)0xCCCCCCCC, 0x33333333,
+ (int)0xF0F0F0F0, 0x0F0F0F0F, 0x12345678, (int)0x87654321, 0x00000001, (int)0xDAACB66C,
+ 0x5166338C, (int)0xABCDEF01, (int)0xABCDEF01, (int)0xFEDCBA98));
TEST_CONSTEXPR(match_v16si(
_mm512_mask_ternarylogic_epi32(
- ((__m512i)((__v16si){0xF, 0x7, 0x3, 0x1, 0xF, 0x7, 0x3, 0x1,
- 0xFF, 0xF, 0xF0, 0xAA, 0x55, 0xCC, 0x33, 0xFF})),
- (__mmask16)0xAAAA,
- ((__m512i)((__v16si){0xE, 0x7, 0x2, 0x1, 0xF, 0x0, 0x3, 0x0,
- 0xF, 0xF0, 0xFF, 0x55, 0x55, 0x33, 0x33, 0xF})),
- ((__m512i)((__v16si){0xD, 0x7, 0x0, 0x1, 0xF, 0x7, 0x0, 0x1,
- 0xF0, 0xF, 0xF, 0xFF, 0xF, 0xCC, 0x33, 0xF0})),
- (unsigned char)0x80), // A & B & C
- 0xF, 0x7, 0x3, 0x1, 0xF, 0x0, 0x3, 0x0, 0xFF, 0x0, 0xF0, 0x0, 0x55, 0x0, 0x33, 0x0));
+ _mm512_setr_epi32(
+ (int)0xDEADBEEF, 0x01234567, (int)0xAAAAAAAA, 0x0F0F0F0F, (int)0xBAADF00D, 0x00000001,
+ (int)0x80000000, 0x7FFFFFFF, (int)0xCAFEBABE, 0x13579BDF, (int)0xABCDEF01, (int)0xCAFEBABE,
+ (int)0xDEADBEEF, (int)0xFF00FF00, (int)0xBEEFCAFE, 0x00000001),
+ (__mmask16)0xBEEF,
+ _mm512_setr_epi32(
+ (int)0xFACEB00C, (int)0x89ABCDEF, 0x55555555, (int)0xF0F0F0F0, 0x1337C0DE, 0x00000002,
+ 0x40000000, (int)0xBFFFFFFF, 0x00000000, 0x2468ACE0, 0x10FEDCBA, 0x00000000,
+ (int)0xFEEDFACE, 0x00FF00FF, 0x12345678, 0x00000002),
+ _mm512_setr_epi32(
+ 0x12345678, (int)0xFFFFFFFF, (int)0xCCCCCCCC, (int)0x88888888, (int)0xDEADC0DE, 0x00000004,
+ 0x20000000, (int)0xDFFFFFFF, (int)0xFFFFFFFF, (int)0xFEDCBA98, 0x55555555, (int)0xFFFFFFFF,
+ (int)0x8BADF00D, (int)0xF0F0F0F0, (int)0xFACEB00C, 0x00000003),
+ (unsigned char)0x96), // op: A ^ B ^ C (XOR3)
+ (int)0x3657589B, 0x77777777, 0x33333333, 0x77777777, (int)0xBAADF00D, 0x00000007,
+ (int)0xE0000000, 0x1FFFFFFF, (int)0xCAFEBABE, (int)0xC9E38DA7, (int)0xEE6666EE, 0x35014541,
+ (int)0xABEDB42C, 0x0F0F0F0F, (int)0xBEEFCAFE, 0x00000000));
__m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_ternarylogic_epi32
@@ -6340,28 +6418,73 @@ __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i
}
TEST_CONSTEXPR(match_v16si(
_mm512_maskz_ternarylogic_epi32(
- (__mmask16)0x3333,
- ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
- ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
- ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
- (unsigned char)0xCA), // A ? B : C
- 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0));
+ (__mmask16)0x6498,
+ ((__m512i)((__v16si){
+ 1393174638, 1243877629, -826208314, 1770837977,
+ -1678093555, -414088391, 1288769935, 703296098,
+ 1428104678, 405688910, -167788555, 1965219804,
+ -1959018749, 514303227, 754191429, 579811517})),
+ ((__m512i)((__v16si){
+ -1301280384, -923736510, -797648805, 475853364,
+ 1247377062, 213070102, 626020209, 2037794518,
+ 122183669, 1712787569, -1042441569, -1416844145,
+ 1374304252, -1323427639, 1432483217, 1621706359})),
+ ((__m512i)((__v16si){
+ 234227517, -313293475, 1851213039, -300885844,
+ -1479339544, 575183087, -655840260, -1853668117,
+ 433622095, 933629633, -1324904005, -68434060,
+ 486070655, 226865941, -1461464269, 1471789621})),
+ (unsigned char)0xAB), // (~A & ~B) | (B & C)
+ 0, 0, 0, -298592082,
+ -1479042568, 0, 0, -1752969749,
+ 0, 0, -1157115461, 0,
+ 0, 1304818453, -1427385541, 0));
TEST_CONSTEXPR(match_v16si(
_mm512_maskz_ternarylogic_epi32(
- (__mmask16)0xCCCC,
- ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE), // A | B | C
- 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF));
+ (__mmask16)0xA593,
+ ((__m512i)((__v16si){
+ 1789368711, -1148107974, 43676229, -772343700,
+ -750732339, -846759705, 1282648386, -622176070,
+ 1582316135, -292982512, 831769172, 1160692746,
+ -1863980731, 1873586768, 694443915, 1602297017})),
+ ((__m512i)((__v16si){
+ 533722196, -540417103, 1859447115, 1121373020,
+ -1880858588, -1601100530, 748799881, -1667438068,
+ -1918900807, 802338724, 1524289825, -1124643462,
+ -180110385, -1147315996, 392105170, -2014156050})),
+ ((__m512i)((__v16si){
+ -91740526, -1136489150, 1770039327, -1782187112,
+ -2130581350, 715425150, 636282949, 883535681,
+ -1207026931, 334328971, 815920853, -699777824,
+ 1466990855, 1383146204, 101022850, 1970696522})),
+ (unsigned char)0x21), // (~B) & ~(A ^ C)
+ 1611661482, 539234310, 0, 0,
+ 538610824, 0, 0, 18874368,
+ 270539268, 0, -1543175586, 0,
+ 0, 1075980051, 0, 1342738432));
TEST_CONSTEXPR(match_v16si(
_mm512_maskz_ternarylogic_epi32(
- (__mmask16)0x5555,
- ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80), // A & B & C
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
+ (__mmask16)0xC3A5,
+ ((__m512i)((__v16si){
+ 0x00000000, -0x1, (int)0x80000000, 0x7FFFFFFF,
+ (int)0xAAAAAAAA, 0x55555555, 0x00000001, (int)0xFFFFFFFE,
+ 0x0000FFFF, (int)0xFFFF0000, (int)0xDEADBEEF, (int)0xCAFEBABE,
+ 0x01234567, (int)0x89ABCDEF, 0x13579BDF, 0x2468ACE0})),
+ ((__m512i)((__v16si){
+ 0x2468ACE0, 0x13579BDF, (int)0x89ABCDEF, 0x01234567,
+ (int)0xCAFEBABE, (int)0xDEADBEEF, (int)0xFFFF0000, 0x0000FFFF,
+ (int)0xFFFFFFFE, 0x00000001, 0x55555555, (int)0xAAAAAAAA,
+ 0x7FFFFFFF, (int)0x80000000, -0x1, 0x00000000})),
+ ((__m512i)((__v16si){
+ -0x1, 0x00000000, -0x1, 0x00000000,
+ -0x1, 0x00000000, -0x1, 0x00000000,
+ -0x1, 0x00000000, -0x1, 0x00000000,
+ -0x1, 0x00000000, -0x1, 0x00000000})),
+ (unsigned char)0xC9), // F = (A & B) | (~A & ~(B ^ C))
+ 0x2468ACE0, 0x0, (int)0x89ABCDEF, 0x0,
+ 0x0, 0x74071445, 0x0, 0x0000FFFE,
+ (int)0xFFFFFFFE, 0x0000FFFE, 0x0, 0x0,
+ 0x0, 0x0, (int)0xFFFFFFFF, (int)0xDB97531F));
__m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_ternarylogic_epi64
@@ -6402,28 +6525,38 @@ __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __
}
TEST_CONSTEXPR(match_v8di(
_mm512_mask_ternarylogic_epi64(
- ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
- (__mmask8)0x33,
- ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
- ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
- (unsigned char)0xCA),
- 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0));
-TEST_CONSTEXPR(match_v8di(
- _mm512_mask_ternarylogic_epi64(
- ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- (__mmask8)0xCC,
- ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE),
- 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF));
+ ((__m512i)((__v8di){0x0LL, 0x1LL, 0x2LL, 0x3LL, 0x4LL, 0x5LL, 0x6LL, 0x7LL})),
+ (__mmask8)0xFF,
+ ((__m512i)((__v8di){0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL})),
+ ((__m512i)((__v8di){0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL})),
+ (unsigned char)0x96),
+ 0x1, 0x0, 0x3, 0x2, 0x5, 0x4, 0x7, 0x6));
TEST_CONSTEXPR(match_v8di(
_mm512_mask_ternarylogic_epi64(
- ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- (__mmask8)0x55,
- ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80),
- 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9));
+ ((__m512i)((__v8di){
+ (long long)0x9FD641D41C6A70FEULL, (long long)0xB51D9082CF18D398ULL,
+ (long long)0x730E520285F4D01BULL, (long long)0x347E72CE341FD932ULL,
+ (long long)0x438F8D9BEA5D486FULL, (long long)0xFDB554A5DEEF750DULL,
+ (long long)0x0ABAA254BFFC2308ULL, (long long)0x825FE29BF1D51FC6ULL
+ })),
+ (__mmask8)0xE4,
+ ((__m512i)((__v8di){
+ (long long)0xC1779B12FA832A6EULL, (long long)0xCF6E876B587C4762ULL,
+ (long long)0x25DC09833D4ECA24ULL, (long long)0x34E55E25691BB80AULL,
+ (long long)0x9A02450CD8F20DD7ULL, (long long)0x78B9E240FB5B77A9ULL,
+ (long long)0xE1F37F76C1162596ULL, (long long)0xDCCB561738CE2941ULL
+ })),
+ ((__m512i)((__v8di){
+ (long long)0xD13840986BC8DC3CULL, (long long)0x34CDE7E8C960187EULL,
+ (long long)0x7EE068D9D111EEB8ULL, (long long)0xAD11149DE686B811ULL,
+ (long long)0x849F38BFD9AB0DFAULL, (long long)0x5C28948ED106227BULL,
+ (long long)0xFB1918D4A18E304DULL, (long long)0x4EDE6944F84AD59FULL
+ })),
+ (unsigned char)0x67),
+ (long long)0x9FD641D41C6A70FEULL, (long long)0xB51D9082CF18D398ULL,
+ (long long)0xDB3DE57EEE5F25DCULL, (long long)0x347E72CE341FD932ULL,
+ (long long)0x438F8D9BEA5D486FULL, (long long)0x26D37FDE2A5DDDD2ULL,
+ (long long)0x1EEE67AB6099DDFBULL, (long long)0xB3353F73C6A4FCFEULL));
__m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_ternarylogic_epi64
@@ -6433,28 +6566,57 @@ __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i _
}
TEST_CONSTEXPR(match_v8di(
_mm512_maskz_ternarylogic_epi64(
- (__mmask8)0x33,
- ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
- ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
- ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
- (unsigned char)0xCA),
- 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0));
-TEST_CONSTEXPR(match_v8di(
- _mm512_maskz_ternarylogic_epi64(
- (__mmask8)0xCC,
- ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE),
- 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF));
+ (__mmask8)0x6D,
+ ((__m512i)((__v8di){
+ (long long)0xFFFFFFFFFFFFFFFF, (long long)0x0000000000000000,
+ (long long)0x0000FFFF0000FFFF, (long long)0x5555555555555555,
+ (long long)0x0123456789ABCDEF, (long long)0x1122334455667788,
+ (long long)0x00000000FFFFFFFF, (long long)0x0F0F0F0F0F0F0F0F
+ })),
+ ((__m512i)((__v8di){
+ (long long)0x000000000000000B, (long long)0x000000000000000C,
+ (long long)0x00000000FFFF0000, (long long)0x3333333333333333,
+ (long long)0x0FEDCBA987654321, (long long)0x1111111111111111,
+ (long long)0x7FFFFFFFFFFFFFFF, (long long)0x2222222222222222
+ })),
+ ((__m512i)((__v8di){
+ (long long)0x000000000000000C, (long long)0x000000000000000B,
+ (long long)0x00F0F0F0F0F0F0F0, (long long)0x5555555555555555,
+ (long long)0x0000000000000000, (long long)0x7FFFFFFFFFFFFFFF,
+ (long long)0x0000000000000001, (long long)0x2222222222222222
+ })),
+ (unsigned char)0x89),
+ (long long)0x0000000000000008, (long long)0x0000000000000000,
+ (long long)0xFF0F0000F0F00000, (long long)0x9999999999999999,
+ (long long)0x0000000000000000, (long long)0x9111111111111111,
+ (long long)0x8000000000000001, (long long)0x0000000000000000));
+
TEST_CONSTEXPR(match_v8di(
_mm512_maskz_ternarylogic_epi64(
- (__mmask8)0x55,
- ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80),
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
+ (__mmask8)0x6D,
+ ((__m512i)((__v8di){
+ (long long)0xFFFFFFFFFFFFFFFF, (long long)0x0000000000000000,
+ (long long)0x0000FFFF0000FFFF, (long long)0x5555555555555555,
+ (long long)0x0123456789ABCDEF, (long long)0x1122334455667788,
+ (long long)0x00000000FFFFFFFF, (long long)0x0F0F0F0F0F0F0F0F
+ })),
+ ((__m512i)((__v8di){
+ (long long)0x000000000000000B, (long long)0x000000000000000C,
+ (long long)0x00000000FFFF0000, (long long)0x3333333333333333,
+ (long long)0x0FEDCBA987654321, (long long)0x1111111111111111,
+ (long long)0x7FFFFFFFFFFFFFFF, (long long)0x2222222222222222
+ })),
+ ((__m512i)((__v8di){
+ (long long)0x000000000000000C, (long long)0x000000000000000B,
+ (long long)0x00F0F0F0F0F0F0F0, (long long)0x5555555555555555,
+ (long long)0x0000000000000000, (long long)0x7FFFFFFFFFFFFFFF,
+ (long long)0x0000000000000001, (long long)0x2222222222222222
+ })),
+ (unsigned char)0x29),
+ (long long)0x0000000000000004, (long long)0x0000000000000000,
+ (long long)0xFF0FF0F0F0F0F0F0, (long long)0xCCCCCCCCCCCCCCCC,
+ (long long)0x0000000000000000, (long long)0x8033225544776699,
+ (long long)0x8000000000000000, (long long)0x0000000000000000));
__m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_shuffle_f32x4
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index d786b5fb8ddbf..3104ba3cfd5aa 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -1,7 +1,8 @@
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
-
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
#include <immintrin.h>
#include "builtin_test_helpers.h"
@@ -8360,25 +8361,25 @@ __m128i test_mm_ternarylogic_epi32(__m128i __A, __m128i __B, __m128i __C) {
}
TEST_CONSTEXPR(match_v4si(
_mm_ternarylogic_epi32(
- ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})),
- ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})),
- ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })),
- (unsigned char)0xCA),
- 0xB, 0xC, 0xB, 0xC));
+ ((__m128i)((__v4si){(int)0x7FFFFFFF, (int)0x80000000, (int)0xAAAAAAAA, 0x00000000})),
+ ((__m128i)((__v4si){0x00000000, (int)0xFFFFFFFF, 0x12345678, (int)0xFFFFFFFF})),
+ ((__m128i)((__v4si){(int)0xCAFEBABE, 0x0F0F0F0F, (int)0xFFFFFFFF, 0x00000000})),
+ (unsigned char)0xCA), /* B ? (A | C) : (C & ~A) */
+ (int)0x80000000, (int)0x8F0F0F0F, 0x5775577D, 0x00000000));
TEST_CONSTEXPR(match_v4si(
_mm_ternarylogic_epi32(
- ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
- ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
- ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE),
- 0xF, 0xF, 0xF, 0xF));
+ ((__m128i)((__v4si){0x12345678, (int)0x80000000, 0x00000000, (int)0xAAAAAAAA})),
+ ((__m128i)((__v4si){0x0000FFFF, 0x7FFFFFFF, 0x55555555, 0x00000000})),
+ ((__m128i)((__v4si){(int)0xF0F0F0F0, 0x00000001, 0x0F0F0F0F, 0x33333333})),
+ (unsigned char)0xFE), /* A | B | C */
+ (int)0xF2F4FFFF, (int)0xFFFFFFFF, 0x5F5F5F5F, (int)0xBBBBBBBB));
TEST_CONSTEXPR(match_v4si(
_mm_ternarylogic_epi32(
- ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
- ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
- ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80),
- 0x0, 0x0, 0x0, 0x0));
+ ((__m128i)((__v4si){(int)0xFFFFFFFF, 0x12345678, (int)0x80000000, 0x0F0F0F0F})),
+ ((__m128i)((__v4si){0x00FF00FF, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0xF0F0F0F0})),
+ ((__m128i)((__v4si){0x0F0F0F0F, 0x00FF00FF, (int)0xFFFFFFFF, (int)0xFFFFFFFF})),
+ (unsigned char)0x80), /* A & B & C */
+ 0x000F000F, 0x00340078, 0x00000000, 0x00000000));
__m128i test_mm_mask_ternarylogic_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_mask_ternarylogic_epi32
@@ -8417,57 +8418,30 @@ __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B,
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
return _mm_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4);
}
-// B ? A : C imm = 0xE2 (Idx = (A<<2)|(B<<1)|C per VPTERNLOG)
TEST_CONSTEXPR(match_v4si(
_mm_maskz_ternarylogic_epi32(
(__mmask8)0x0B,
- ((__m128i)((__v4si){(int)0xDEADBEEF, 0, (int)0xFFFFFFFF, 0x13579BDF})), // A
- ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, 0})), // B
- ((__m128i)((__v4si){(int)0xCAFEBABE, (int)0xFFFFFFFF, 0, 0x2468ACE0})), // C
- (unsigned char)0xE2),
+ ((__m128i)((__v4si){(int)0xDEADBEEF, 0, (int)0xFFFFFFFF, 0x13579BDF})),
+ ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, 0})),
+ ((__m128i)((__v4si){(int)0xCAFEBABE, (int)0xFFFFFFFF, 0, 0x2468ACE0})),
+ (unsigned char)0xE2), // B ? A : C
(int)0xDEADBEEF, (int)0xFFFFFFFF, 0, 0x2468ACE0));
- // ~(A & B) | ~(B & C) imm = 0x7F
TEST_CONSTEXPR(match_v4si(
_mm_maskz_ternarylogic_epi32(
(__mmask8)0x0C,
- ((__m128i)((__v4si){0, (int)0xFFFFFFFF, (int)0xAAAAAAAA, 0x55555555})), // A
- ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, (int)0xFFFFFFFF})), // B
- ((__m128i)((__v4si){(int)0xF0F0F0F0, 0, 0, (int)0xFFFFFFFF})), // C
- (unsigned char)0x7F),
+ ((__m128i)((__v4si){0, (int)0xFFFFFFFF, (int)0xAAAAAAAA, 0x55555555})),
+ ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, (int)0xFFFFFFFF})),
+ ((__m128i)((__v4si){(int)0xF0F0F0F0, 0, 0, (int)0xFFFFFFFF})),
+ (unsigned char)0x7F), // ~(A & B) | ~(B & C)
0, 0, (int)0xFFFFFFFF, (int)0xAAAAAAAA));
- // ~A | ~B | C imm = 0xBF
TEST_CONSTEXPR(match_v4si(
_mm_maskz_ternarylogic_epi32(
(__mmask8)0x05,
- ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, 0x12345678, 0})), // A
- ((__m128i)((__v4si){0, 0, 0x0000FFFF, (int)0xFFFFFFFF})), // B
- ((__m128i)((__v4si){0, 0, 0x0000000F, 0})), // C
- (unsigned char)0xBF),
+ ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, 0x12345678, 0})),
+ ((__m128i)((__v4si){0, 0, 0x0000FFFF, (int)0xFFFFFFFF})),
+ ((__m128i)((__v4si){0, 0, 0x0000000F, 0})),
+ (unsigned char)0xBF), // ~A | ~B | C imm = 0xBF
(int)0xFFFFFFFF, 0, (int)0xFFFFA98F, 0));
-TEST_CONSTEXPR(match_v4si(
- _mm_maskz_ternarylogic_epi32(
- (__mmask8)0x03,
- ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})),
- ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})),
- ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })),
- (unsigned char)0xCA),
- 0xB, 0xC, 0x0, 0x0));
-TEST_CONSTEXPR(match_v4si(
- _mm_maskz_ternarylogic_epi32(
- (__mmask8)0x0C,
- ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
- ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
- ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE),
- 0x0, 0x0, 0xF, 0xF));
-TEST_CONSTEXPR(match_v4si(
- _mm_maskz_ternarylogic_epi32(
- (__mmask8)0x05,
- ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
- ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
- ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80),
- 0x0, 0x0, 0x0, 0x0));
__m256i test_mm256_ternarylogic_epi32(__m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_ternarylogic_epi32
@@ -8476,25 +8450,25 @@ __m256i test_mm256_ternarylogic_epi32(__m256i __A, __m256i __B, __m256i __C) {
}
TEST_CONSTEXPR(match_v8si(
_mm256_ternarylogic_epi32(
- ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
- ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
- ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
- (unsigned char)0xCA),
- 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC));
+ ((__m256i)((__v8si){0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0x80000000, 0x00FF00FF, (int)0xF0F0F0F0, (int)0xAAAAAAAA})),
+ ((__m256i)((__v8si){(int)0xDEADBEEF, 0x11111111, 0x22222222, 0x33333333, 0x44444444, 0x55555555, 0x66666666, 0x77777777})),
+ ((__m256i)((__v8si){(int)0xCAFEBABE, (int)0x88888888, (int)0x99999999, (int)0xAAAAAAAA, (int)0xBBBBBBBB, (int)0xCCCCCCCC, (int)0xDDDDDDDD, (int)0xFFFFFFFF})),
+ (unsigned char)0xF0), /* A */
+ 0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0x80000000, 0x00FF00FF, (int)0xF0F0F0F0, (int)0xAAAAAAAA));
TEST_CONSTEXPR(match_v8si(
_mm256_ternarylogic_epi32(
- ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE),
- 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF));
+ ((__m256i)((__v8si){0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0x80000000, 0x00FF00FF, (int)0xF0F0F0F0, (int)0xAAAAAAAA})),
+ ((__m256i)((__v8si){(int)0xAAAAAAAA, (int)0xBBBBBBBB, (int)0xCCCCCCCC, (int)0xDDDDDDDD, (int)0xEEEEEEEE, (int)0xFFFFFFFF, 0x00000000, 0x11111111})),
+ ((__m256i)((__v8si){0x22222222, 0x33333333, 0x44444444, 0x55555555, 0x66666666, 0x77777777, (int)0x88888888, (int)0x99999999})),
+ (unsigned char)0x0F), /* ~A */
+ (int)0xEDCBA987, (int)0xFFFFFFFF, 0x00000000, (int)0x80000000, 0x7FFFFFFF, (int)0xFF00FF00, 0x0F0F0F0F, 0x55555555));
TEST_CONSTEXPR(match_v8si(
_mm256_ternarylogic_epi32(
- ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80),
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
+ ((__m256i)((__v8si){0x0F0F0F0F, (int)0xAAAAAAAA, 0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x13579BDF, (int)0x80000000, 0x7FFFFFFF})),
+ ((__m256i)((__v8si){(int)0xF0F0F0F0, 0x55555555, 0x11111111, (int)0xFFFFFFFF, 0x00000000, 0x02468ACE, 0x7FFFFFFF, (int)0x80000000})),
+ ((__m256i)((__v8si){(int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA})),
+ (unsigned char)0x3C), /* A ^ B */
+ (int)0xFFFFFFFF, (int)0xFFFFFFFF, 0x03254769, (int)0xFFFFFFFF, (int)0xFFFFFFFF, 0x11111111, (int)0xFFFFFFFF, (int)0xFFFFFFFF));
__m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_mask_ternarylogic_epi32
@@ -8504,28 +8478,28 @@ __m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __
}
TEST_CONSTEXPR(match_v8si(
_mm256_mask_ternarylogic_epi32(
- ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
- (__mmask8)0x33,
- ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
- ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
- (unsigned char)0xCA),
- 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0));
+ ((__m256i)((__v8si){(int)0xFFFFFFFF, 0x00000000, 0x12345678, (int)0xAAAAAAAA, 0x7FFFFFFF, (int)0x80000000, 0x13579BDF, 0x2468ACE0})),
+ (__mmask8)0xA5,
+ ((__m256i)((__v8si){(int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x00000000})),
+ ((__m256i)((__v8si){0x00000000, (int)0xFFFFFFFF, 0x55555555, 0x33333333, (int)0x89ABCDEF, 0x00000000, (int)0xFFFFFFFF, 0x11111111})),
+ (unsigned char)0xE2), /* B ? A : C */
+ (int)0xFFFFFFFF, 0x00000000, 0x12345678, (int)0xAAAAAAAA, 0x7FFFFFFF, 0x00000000, 0x13579BDF, 0x11111111));
TEST_CONSTEXPR(match_v8si(
_mm256_mask_ternarylogic_epi32(
- ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- (__mmask8)0xCC,
- ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE),
- 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF));
+ ((__m256i)((__v8si){0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F})),
+ (__mmask8)0xFF,
+ ((__m256i)((__v8si){0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF})),
+ ((__m256i)((__v8si){0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333})),
+ (unsigned char)0x96), /* A ^ B ^ C */
+ 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3));
TEST_CONSTEXPR(match_v8si(
_mm256_mask_ternarylogic_epi32(
- ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- (__mmask8)0x55,
- ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
- ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80),
- 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9));
+ ((__m256i)((__v8si){(int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x12345678, (int)0xAAAAAAAA, 0x55555555, (int)0x80000000, 0x7FFFFFFF})),
+ (__mmask8)0x5A,
+ ((__m256i)((__v8si){0x00000000, (int)0xFFFFFFFF, 0x11111111, (int)0xFFFFFFFF, 0x55555555, (int)0xAAAAAAAA, (int)0x80000000, 0x7FFFFFFF})),
+ ((__m256i)((__v8si){0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000})),
+ (unsigned char)0xC0), /* A & B */
+ (int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x12345678, 0x00000000, 0x55555555, (int)0x80000000, 0x7FFFFFFF));
__m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_maskz_ternarylogic_epi32
@@ -8535,28 +8509,28 @@ __m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i _
}
TEST_CONSTEXPR(match_v8si(
_mm256_maskz_ternarylogic_epi32(
- (__mmask8)0x33,
- ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+ (__mmask8)0x6D,
+ ((__m256i)((__v8si){(int)-1, 0, (int)-1, 0, (int)-1, 0, (int)-1, 0})),
((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
- (unsigned char)0xCA),
- 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0));
+ (unsigned char)0x30), /* A & ~B */
+ (int)0xFFFFFFF4, 0, (int)0xFFFFFFF4, 0, 0, 0, (int)0xFFFFFFF4, 0));
TEST_CONSTEXPR(match_v8si(
_mm256_maskz_ternarylogic_epi32(
- (__mmask8)0xCC,
+ (__mmask8)0x90,
((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE),
- 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF));
+ (unsigned char)0x44), /* B & ~C */
+ 0, 0, 0, 0, 0x4, 0, 0, 0x4));
TEST_CONSTEXPR(match_v8si(
_mm256_maskz_ternarylogic_epi32(
- (__mmask8)0x55,
- ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
- ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ (__mmask8)0x0F,
+ ((__m256i)((__v8si){0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3})),
+ ((__m256i)((__v8si){0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1})),
((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80),
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
+ (unsigned char)0x28), /* (A ^ B) & C */
+ 0x2, 0x2, 0x2, 0x2, 0, 0, 0, 0));
__m128i test_mm_ternarylogic_epi64(__m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_ternarylogic_epi64
@@ -8565,25 +8539,28 @@ __m128i test_mm_ternarylogic_epi64(__m128i __A, __m128i __B, __m128i __C) {
}
TEST_CONSTEXPR(match_v2di(
_mm_ternarylogic_epi64(
- ((__m128i)((__v2di){-0x1, 0x0})),
- ((__m128i)((__v2di){0xB, 0xB})),
- ((__m128i)((__v2di){0xC, 0xC})),
- (unsigned char)0xCA),
- 0xB, 0xC));
+ ((__m128i)((__v2di){ (long long)0xBB91433A6AA79987ULL, (long long)0xD1F6F86C029A7245ULL })),
+ ((__m128i)((__v2di){ (long long)0xCD8778E7D340BBCDULL, (long long)0xDAEA58BA4C73A942ULL })),
+ ((__m128i)((__v2di){ (long long)0xEE8971105E503A67ULL, (long long)0x452EC40A3193CA54ULL })),
+ (unsigned char)0x77), // F = ~(A & B)
+ (long long)0x337E8FFFADBFC5BAULL,
+ (long long)0xBFD5BFF5FFEC77BFULL));
TEST_CONSTEXPR(match_v2di(
_mm_ternarylogic_epi64(
- ((__m128i)((__v2di){0x9, 0x9})),
- ((__m128i)((__v2di){0x4, 0x4})),
- ((__m128i)((__v2di){0x2, 0x2})),
- (unsigned char)0xFE),
- 0xF, 0xF));
+ ((__m128i)((__v2di){ (long long)0x6FACAA5090E5E945ULL, (long long)0x5F811CB929645F8BULL })),
+ ((__m128i)((__v2di){ (long long)0xDFC9E3B11FCFF454ULL, (long long)0x42D6CB5C6ED4E94BULL })),
+ ((__m128i)((__v2di){ (long long)0xA091250E8FE46024ULL, (long long)0x9C9CEA0C2CA1C789ULL })),
+ (unsigned char)0xDD), // F = (~A) | B
+ (long long)0xDFEFFBF17FDFFFDFULL,
+ (long long)0x63F7DFFFFFDEF97FULL));
TEST_CONSTEXPR(match_v2di(
_mm_ternarylogic_epi64(
- ((__m128i)((__v2di){0x9, 0x9})),
- ((__m128i)((__v2di){0x4, 0x4})),
- ((__m128i)((__v2di){0x2, 0x2})),
- (unsigned char)0x80),
- 0x0, 0x0));
+ ((__m128i)((__v2di){ (long long)0x2FD2B7A48D9FE5B9ULL, (long long)0xBCF74D7A5ADAD121ULL })),
+ ((__m128i)((__v2di){ (long long)0xBB9D58E4F543BBCFULL, (long long)0x87F26AEE175F0CD2ULL })),
+ ((__m128i)((__v2di){ (long long)0xBC428D42FA882692ULL, (long long)0x95C5FB986980A81FULL })),
+ (unsigned char)0x22), // F = A & ~B
+ (long long)0x044285020A880410ULL,
+ (long long)0x100591106880A00DULL));
__m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_mask_ternarylogic_epi64
@@ -8593,28 +8570,38 @@ __m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B,
}
TEST_CONSTEXPR(match_v2di(
_mm_mask_ternarylogic_epi64(
- ((__m128i)((__v2di){-0x1, 0x0})),
- (__mmask8)0x33,
- ((__m128i)((__v2di){0xB, 0xB})),
- ((__m128i)((__v2di){0xC, 0xC})),
- (unsigned char)0xCA),
- 0xB, 0xC));
+ ((__m128i)((__v2di){(long long)0xF4C3B00C0D15EA5ELL, (long long)0x0123456789ABCDE0LL})),
+ (__mmask8)0x9D,
+ ((__m128i)((__v2di){(long long)0x9A7F3C2155EE00DDLL, (long long)0xDEADBEEFCAFEBABELL})),
+ ((__m128i)((__v2di){(long long)0x00F0F0F0F0F0F0F0LL, (long long)0x13579BDF2468ACE0LL})),
+ (unsigned char)0xFF), // All 1s
+ (long long)-1,
+ (long long)0x0123456789ABCDE0LL));
TEST_CONSTEXPR(match_v2di(
_mm_mask_ternarylogic_epi64(
- ((__m128i)((__v2di){0x9, 0x9})),
- (__mmask8)0xCC,
- ((__m128i)((__v2di){0x4, 0x4})),
- ((__m128i)((__v2di){0x2, 0x2})),
- (unsigned char)0xFE),
- 0x9, 0x9));
+ ((__m128i)((__v2di){ (long long)0x3A7C19E54B20D8A1LL, (long long)0x4F12B39D0C85E762LL })),
+ (__mmask8)0xD2,
+ ((__m128i)((__v2di){ (long long)0x6D93A0F217C54E3BLL, (long long)0x24E1C7A95B08D6F2LL })),
+ ((__m128i)((__v2di){ (long long)0x5A0C3E19D472B8F5LL, (long long)0x0187D3B2C9E4056ALL })),
+ (unsigned char)0x00),
+ (long long)0x3A7C19E54B20D8A1LL,
+ (long long)0x0LL)); // All 0s
TEST_CONSTEXPR(match_v2di(
_mm_mask_ternarylogic_epi64(
- ((__m128i)((__v2di){0x9, 0x9})),
- (__mmask8)0x55,
- ((__m128i)((__v2di){0x4, 0x4})),
- ((__m128i)((__v2di){0x2, 0x2})),
- (unsigned char)0x80),
- 0x0, 0x9));
+ ((__m128i)((__v2di){
+ (long long)0xA3F10B6C7D8294E1ULL, (long long)0x19D4E7350AB2C98FLL
+ })),
+ (__mmask8)0xB5,
+ ((__m128i)((__v2di){
+ (long long)0x5C2E9A10F4B7D863LL, (long long)0x9B7E1D2C3A4F5E60LL
+ })),
+ ((__m128i)((__v2di){
+ (long long)0x2A6D3F81C9E047B5LL, (long long)0x7F0A1C3E5D2B6490LL
+ })),
+ (unsigned char)0x55), // ~C
+ (long long)0xD592C07E361FB84AULL,
+ (long long)0x19D4E7350AB2C98FLL
+));
__m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
// CHECK-LABEL: test_mm_maskz_ternarylogic_epi64
@@ -8624,28 +8611,50 @@ __m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B,
}
TEST_CONSTEXPR(match_v2di(
_mm_maskz_ternarylogic_epi64(
- (__mmask8)0x03,
- ((__m128i)((__v2di){-0x1, 0x0})),
- ((__m128i)((__v2di){0xB, 0xB})),
- ((__m128i)((__v2di){0xC, 0xC})),
- (unsigned char)0xCA),
- 0xB, 0xC));
+ (__mmask8)0xA9,
+ ((__m128i)((__v2di){
+ (long long)0x8F3A5C7E21D4B690ULL, (long long)0x5AD02CE19B7F46A3ULL
+ })),
+ ((__m128i)((__v2di){
+ (long long)0xC19E04B2A7D35F68ULL, (long long)0x2F7B93C4E1A05D76ULL
+ })),
+ ((__m128i)((__v2di){
+ (long long)0x7A0C1D2E3F405162ULL, (long long)0xD4E5F60718293A4BULL
+ })),
+ (unsigned char)0xD2), // F = C ? (B | ~A) : (A & ~B)
+ (long long)0xB53A457239D4B692ULL,
+ (long long)0x0ULL));
TEST_CONSTEXPR(match_v2di(
_mm_maskz_ternarylogic_epi64(
- (__mmask8)0x0C,
- ((__m128i)((__v2di){0x9, 0x9})),
- ((__m128i)((__v2di){0x4, 0x4})),
- ((__m128i)((__v2di){0x2, 0x2})),
- (unsigned char)0xFE),
- 0x0, 0x0));
+ (__mmask8)0xB6,
+ ((__m128i)((__v2di){
+ (long long)0x83C1D2E3F4051627ULL, (long long)0x5A0B1C2D3E4F6071ULL
+ })),
+ ((__m128i)((__v2di){
+ (long long)0x9E8D7C6B5A493827ULL, (long long)0x13579BDF2468ACE0ULL
+ })),
+ ((__m128i)((__v2di){
+ (long long)0x02468ACE13579BDFULL, (long long)0xFEDCBA9876543210ULL
+ })),
+ (unsigned char)0xFE), // F = A | B | C
+ (long long)0x0ULL,
+ (long long)0xFFDFBFFF7E7FFEF1ULL));
TEST_CONSTEXPR(match_v2di(
_mm_maskz_ternarylogic_epi64(
- (__mmask8)0x05,
- ((__m128i)((__v2di){0x9, 0x9})),
- ((__m128i)((__v2di){0x4, 0x4})),
- ((__m128i)((__v2di){0x2, 0x2})),
- (unsigned char)0x80),
- 0x0, 0x0));
+ (__mmask8)0xA5,
+ ((__m128i)((__v2di){
+ (long long)0x1C80317FA3B1799DULL, (long long)0xBDD640FB06671AD1ULL
+ })),
+ ((__m128i)((__v2di){
+ (long long)0x3EB13B9046685257ULL, (long long)0x23B8C1E9392456DEULL
+ })),
+ ((__m128i)((__v2di){
+ (long long)0x1A3D1FA7BC8960A9ULL, (long long)0xBD9C66B3AD3C2D6DULL
+ })),
+ (unsigned char)0x80), // F = A & B & C
+ (long long)0x1800110000004001ULL,
+ (long long)0x0ULL
+));
__m256i test_mm256_ternarylogic_epi64(__m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_ternarylogic_epi64
@@ -8657,22 +8666,15 @@ TEST_CONSTEXPR(match_v4di(
((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})),
((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})),
((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})),
- (unsigned char)0xCA),
- 0xB, 0xC, 0xB, 0xC));
+ (unsigned char)0x94),
+ (long long)-0x8, (long long)0x3, (long long)-0x8, (long long)0x3));
TEST_CONSTEXPR(match_v4di(
_mm256_ternarylogic_epi64(
((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE),
- 0xF, 0xF, 0xF, 0xF));
-TEST_CONSTEXPR(match_v4di(
- _mm256_ternarylogic_epi64(
- ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
- ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
- ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80),
- 0x0, 0x0, 0x0, 0x0));
+ (unsigned char)0x76),
+ (long long)0xF, (long long)0xF, (long long)0xF, (long long)0xF));
__m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_mask_ternarylogic_epi64
@@ -8682,28 +8684,44 @@ __m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __
}
TEST_CONSTEXPR(match_v4di(
_mm256_mask_ternarylogic_epi64(
- ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})),
- (__mmask8)0x33,
- ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})),
- ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})),
- (unsigned char)0xCA),
- 0xB, 0xC, -0x1, 0x0));
-TEST_CONSTEXPR(match_v4di(
- _mm256_mask_ternarylogic_epi64(
- ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
- (__mmask8)0xCC,
- ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
- ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0xFE),
- 0x9, 0x9, 0xF, 0xF));
+ ((__m256i)((__v4di){
+ (long long)0x0123456789ABCDEFULL, (long long)0x0F0F0F0F0F0F0F0FULL,
+ (long long)0xAAAAAAAAAAAAAAAALL, (long long)0x13579BDF02468ACEULL
+ })),
+ (__mmask8)0x09,
+ ((__m256i)((__v4di){
+ (long long)0x1111111111111111ULL, (long long)0x2222222222222222ULL,
+ (long long)0x3333333333333333ULL, (long long)0x4444444444444444ULL
+ })),
+ ((__m256i)((__v4di){
+ (long long)0x5555555555555555ULL, (long long)0x6666666666666666ULL,
+ (long long)0x7777777777777777ULL, (long long)0x8888888888888888ULL
+ })),
+ (unsigned char)0x12),
+ (long long)0x44660022CCEE88AAULL,
+ (long long)0x0F0F0F0F0F0F0F0FULL,
+ (long long)0xAAAAAAAAAAAAAAAALL,
+ (long long)0x9B9B13138A8A0202ULL));
TEST_CONSTEXPR(match_v4di(
_mm256_mask_ternarylogic_epi64(
- ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
- (__mmask8)0x55,
- ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
- ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80),
- 0x0, 0x9, 0x0, 0x9));
+ ((__m256i)((__v4di){
+ (long long)0xDEADBEEFDEADBEEFULL, (long long)0xCAFEBABECAFEBABEULL,
+ (long long)0xF00DFACEF00DFACEULL, (long long)0x0123456789ABCDEFULL
+ })),
+ (__mmask8)0x06,
+ ((__m256i)((__v4di){
+ (long long)0x0000000000000000ULL, (long long)0xFFFFFFFFFFFFFFFFULL,
+ (long long)0x13579BDF13579BDFULL, (long long)0x0AAAAAAAAAAAAAAULL
+ })),
+ ((__m256i)((__v4di){
+ (long long)0x1111111111111111ULL, (long long)0x2222222222222222ULL,
+ (long long)0x3333333333333333ULL, (long long)0x4444444444444444ULL
+ })),
+ (unsigned char)0x23),
+ (long long)0xDEADBEEFDEADBEEFULL,
+ (long long)0x0000000000000000ULL,
+ (long long)0x2CA024202CA02420ULL,
+ (long long)0x0123456789ABCDEFULL));
__m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
// CHECK-LABEL: test_mm256_maskz_ternarylogic_epi64
@@ -8713,28 +8731,38 @@ __m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i _
}
TEST_CONSTEXPR(match_v4di(
_mm256_maskz_ternarylogic_epi64(
- (__mmask8)0x33,
- ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})),
- ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})),
- ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})),
- (unsigned char)0xCA),
- 0xB, 0xC, 0x0, 0x0));
-TEST_CONSTEXPR(match_v4di(
- _mm256_maskz_ternarylogic_epi64(
- (__mmask8)0xCC,
- ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
- ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
- ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
+ (__mmask8)0x05,
+ ((__m256i)((__v4di){
+ (long long)0x1, (long long)0x2, (long long)0x0, (long long)0x7
+ })),
+ ((__m256i)((__v4di){
+ (long long)0x0, (long long)0x3, (long long)0x4, (long long)0x0
+ })),
+ ((__m256i)((__v4di){
+ (long long)0x0, (long long)0x5, (long long)0x0, (long long)0x1
+ })),
(unsigned char)0xFE),
- 0x0, 0x0, 0xF, 0xF));
+ (long long)0x1,
+ (long long)0x0,
+ (long long)0x4,
+ (long long)0x0));
TEST_CONSTEXPR(match_v4di(
_mm256_maskz_ternarylogic_epi64(
- (__mmask8)0x55,
- ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
- ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
- ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
- (unsigned char)0x80),
- 0x0, 0x0, 0x0, 0x0));
+ (__mmask8)0x0A,
+ ((__m256i)((__v4di){
+ (long long)0x1, (long long)0x0, (long long)0x2, (long long)0x1
+ })),
+ ((__m256i)((__v4di){
+ (long long)0x0, (long long)0x1, (long long)0x0, (long long)0x0
+ })),
+ ((__m256i)((__v4di){
+ (long long)0x0, (long long)0x0, (long long)0x4, (long long)0x1
+ })),
+ (unsigned char)0xED),
+ (long long)0x0,
+ (long long)-0x1,
+ (long long)0x0,
+ (long long)-0x1));
__m256 test_mm256_shuffle_f32x4(__m256 __A, __m256 __B) {
// CHECK-LABEL: test_mm256_shuffle_f32x4
More information about the cfe-commits
mailing list