[clang] 0b1ef8c - [X86][Clang] Add AVX512 Integer Comparison Intrinsics for constexpr Evaluation (#164026)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 28 09:05:24 PDT 2025
Author: sskzakaria
Date: 2025-10-28T16:05:20Z
New Revision: 0b1ef8c6b29ec97f1613d033920b7f6276eaf2f4
URL: https://github.com/llvm/llvm-project/commit/0b1ef8c6b29ec97f1613d033920b7f6276eaf2f4
DIFF: https://github.com/llvm/llvm-project/commit/0b1ef8c6b29ec97f1613d033920b7f6276eaf2f4.diff
LOG: [X86][Clang] Add AVX512 Integer Comparison Intrinsics for constexpr Evaluation (#164026)
Enables constexpr evaluation for the following AVX512 Integer Comparison Intrinsics:
```
_mm_cmp_epi8_mask _mm_cmp_epu8_mask
_mm_cmp_epi16_mask _mm_cmp_epu16_mask
_mm_cmp_epi32_mask _mm_cmp_epu32_mask
_mm_cmp_epi64_mask _mm_cmp_epu64_mask
_mm256_cmp_epi8_mask _mm256_cmp_epu8_mask
_mm256_cmp_epi16_mask _mm256_cmp_epu16_mask
_mm256_cmp_epi32_mask _mm256_cmp_epu32_mask
_mm256_cmp_epi64_mask _mm256_cmp_epu64_mask
_mm512_cmp_epi8_mask _mm512_cmp_epu8_mask
_mm512_cmp_epi16_mask _mm512_cmp_epu16_mask
_mm512_cmp_epi32_mask _mm512_cmp_epu32_mask
_mm512_cmp_epi64_mask _mm512_cmp_epu64_mask
```
Part 1 of #162054
Added:
Modified:
clang/include/clang/Basic/BuiltinsX86.td
clang/lib/AST/ByteCode/InterpBuiltin.cpp
clang/lib/AST/ExprConstant.cpp
clang/lib/Headers/avx512vlbwintrin.h
clang/test/CodeGen/X86/avx512vlbw-builtins.c
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 0c85e280e748b..500aa85fe5356 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1282,81 +1282,99 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
def knotdi : X86Builtin<"unsigned long long int(unsigned long long int)">;
}
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
def cmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
}
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">;
}
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
def cmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
}
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
def cmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
}
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def ucmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def ucmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
def ucmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
}
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def ucmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">;
}
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def ucmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def ucmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
def ucmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
}
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def ucmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def ucmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def ucmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
def ucmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 8f23001ea5a39..ab6b3ed1be0aa 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3296,6 +3296,60 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
return true;
}
+static bool evalICmpImm(uint8_t Imm, const APSInt &A, const APSInt &B,
+ bool IsUnsigned) {
+ switch (Imm & 0x7) {
+ case 0x00: // _MM_CMPINT_EQ
+ return (A == B);
+ case 0x01: // _MM_CMPINT_LT
+ return IsUnsigned ? A.ult(B) : A.slt(B);
+ case 0x02: // _MM_CMPINT_LE
+ return IsUnsigned ? A.ule(B) : A.sle(B);
+ case 0x03: // _MM_CMPINT_FALSE
+ return false;
+ case 0x04: // _MM_CMPINT_NE
+ return (A != B);
+ case 0x05: // _MM_CMPINT_NLT
+ return IsUnsigned ? A.ugt(B) : A.sgt(B);
+ case 0x06: // _MM_CMPINT_NLE
+ return IsUnsigned ? A.uge(B) : A.sge(B);
+ case 0x07: // _MM_CMPINT_TRUE
+ return true;
+ default:
+ llvm_unreachable("Invalid Op");
+ }
+}
+
+static bool interp__builtin_ia32_cmp_mask(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call, unsigned ID,
+ bool IsUnsigned) {
+ assert(Call->getNumArgs() == 4);
+
+ APSInt Mask = popToAPSInt(S, Call->getArg(3));
+ APSInt Opcode = popToAPSInt(S, Call->getArg(2));
+ unsigned CmpOp = static_cast<unsigned>(Opcode.getZExtValue());
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+
+ assert(LHS.getNumElems() == RHS.getNumElems());
+
+ APInt RetMask = APInt::getZero(LHS.getNumElems());
+ unsigned VectorLen = LHS.getNumElems();
+ PrimType ElemT = LHS.getFieldDesc()->getPrimType();
+
+ for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
+ APSInt A, B;
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ A = LHS.elem<T>(ElemNum).toAPSInt();
+ B = RHS.elem<T>(ElemNum).toAPSInt();
+ });
+ RetMask.setBitVal(ElemNum,
+ Mask[ElemNum] && evalICmpImm(CmpOp, A, B, IsUnsigned));
+ }
+ pushInteger(S, RetMask, Call->getType());
+ return true;
+}
+
static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
assert(Call->getNumArgs() == 1);
@@ -4488,6 +4542,35 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_vec_set_v4di:
return interp__builtin_vec_set(S, OpPC, Call, BuiltinID);
+ case X86::BI__builtin_ia32_cmpb128_mask:
+ case X86::BI__builtin_ia32_cmpw128_mask:
+ case X86::BI__builtin_ia32_cmpd128_mask:
+ case X86::BI__builtin_ia32_cmpq128_mask:
+ case X86::BI__builtin_ia32_cmpb256_mask:
+ case X86::BI__builtin_ia32_cmpw256_mask:
+ case X86::BI__builtin_ia32_cmpd256_mask:
+ case X86::BI__builtin_ia32_cmpq256_mask:
+ case X86::BI__builtin_ia32_cmpb512_mask:
+ case X86::BI__builtin_ia32_cmpw512_mask:
+ case X86::BI__builtin_ia32_cmpd512_mask:
+ case X86::BI__builtin_ia32_cmpq512_mask:
+ return interp__builtin_ia32_cmp_mask(S, OpPC, Call, BuiltinID,
+ /*IsUnsigned=*/false);
+
+ case X86::BI__builtin_ia32_ucmpb128_mask:
+ case X86::BI__builtin_ia32_ucmpw128_mask:
+ case X86::BI__builtin_ia32_ucmpd128_mask:
+ case X86::BI__builtin_ia32_ucmpq128_mask:
+ case X86::BI__builtin_ia32_ucmpb256_mask:
+ case X86::BI__builtin_ia32_ucmpw256_mask:
+ case X86::BI__builtin_ia32_ucmpd256_mask:
+ case X86::BI__builtin_ia32_ucmpq256_mask:
+ case X86::BI__builtin_ia32_ucmpb512_mask:
+ case X86::BI__builtin_ia32_ucmpw512_mask:
+ case X86::BI__builtin_ia32_ucmpd512_mask:
+ case X86::BI__builtin_ia32_ucmpq512_mask:
+ return interp__builtin_ia32_cmp_mask(S, OpPC, Call, BuiltinID,
+ /*IsUnsigned=*/true);
case X86::BI__builtin_ia32_pslldqi128_byteshift:
case X86::BI__builtin_ia32_pslldqi256_byteshift:
case X86::BI__builtin_ia32_pslldqi512_byteshift:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 29ee089505125..d0404b957ab03 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -15766,6 +15766,89 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1));
return Success(Vec.getVectorElt(Idx).getInt(), E);
}
+
+ case clang::X86::BI__builtin_ia32_cmpb128_mask:
+ case clang::X86::BI__builtin_ia32_cmpw128_mask:
+ case clang::X86::BI__builtin_ia32_cmpd128_mask:
+ case clang::X86::BI__builtin_ia32_cmpq128_mask:
+ case clang::X86::BI__builtin_ia32_cmpb256_mask:
+ case clang::X86::BI__builtin_ia32_cmpw256_mask:
+ case clang::X86::BI__builtin_ia32_cmpd256_mask:
+ case clang::X86::BI__builtin_ia32_cmpq256_mask:
+ case clang::X86::BI__builtin_ia32_cmpb512_mask:
+ case clang::X86::BI__builtin_ia32_cmpw512_mask:
+ case clang::X86::BI__builtin_ia32_cmpd512_mask:
+ case clang::X86::BI__builtin_ia32_cmpq512_mask:
+ case clang::X86::BI__builtin_ia32_ucmpb128_mask:
+ case clang::X86::BI__builtin_ia32_ucmpw128_mask:
+ case clang::X86::BI__builtin_ia32_ucmpd128_mask:
+ case clang::X86::BI__builtin_ia32_ucmpq128_mask:
+ case clang::X86::BI__builtin_ia32_ucmpb256_mask:
+ case clang::X86::BI__builtin_ia32_ucmpw256_mask:
+ case clang::X86::BI__builtin_ia32_ucmpd256_mask:
+ case clang::X86::BI__builtin_ia32_ucmpq256_mask:
+ case clang::X86::BI__builtin_ia32_ucmpb512_mask:
+ case clang::X86::BI__builtin_ia32_ucmpw512_mask:
+ case clang::X86::BI__builtin_ia32_ucmpd512_mask:
+ case clang::X86::BI__builtin_ia32_ucmpq512_mask: {
+ assert(E->getNumArgs() == 4);
+
+ bool IsUnsigned =
+ (BuiltinOp >= clang::X86::BI__builtin_ia32_ucmpb128_mask &&
+ BuiltinOp <= clang::X86::BI__builtin_ia32_ucmpq512_mask);
+
+ APValue LHS, RHS;
+ APSInt Mask, Opcode;
+ if (!EvaluateVector(E->getArg(0), LHS, Info) ||
+ !EvaluateVector(E->getArg(1), RHS, Info) ||
+ !EvaluateInteger(E->getArg(2), Opcode, Info) ||
+ !EvaluateInteger(E->getArg(3), Mask, Info))
+ return false;
+
+ assert(LHS.getVectorLength() == RHS.getVectorLength());
+
+ unsigned VectorLen = LHS.getVectorLength();
+ unsigned RetWidth = Mask.getBitWidth();
+
+ APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
+
+ for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
+ const APSInt &A = LHS.getVectorElt(ElemNum).getInt();
+ const APSInt &B = RHS.getVectorElt(ElemNum).getInt();
+ bool Result = false;
+
+ switch (Opcode.getExtValue() & 0x7) {
+ case 0: // _MM_CMPINT_EQ
+ Result = (A == B);
+ break;
+ case 1: // _MM_CMPINT_LT
+ Result = IsUnsigned ? A.ult(B) : A.slt(B);
+ break;
+ case 2: // _MM_CMPINT_LE
+ Result = IsUnsigned ? A.ule(B) : A.sle(B);
+ break;
+ case 3: // _MM_CMPINT_FALSE
+ Result = false;
+ break;
+ case 4: // _MM_CMPINT_NE
+ Result = (A != B);
+ break;
+ case 5: // _MM_CMPINT_NLT (>=)
+ Result = IsUnsigned ? A.uge(B) : A.sge(B);
+ break;
+ case 6: // _MM_CMPINT_NLE (>)
+ Result = IsUnsigned ? A.ugt(B) : A.sgt(B);
+ break;
+ case 7: // _MM_CMPINT_TRUE
+ Result = true;
+ break;
+ }
+
+ RetMask.setBitVal(ElemNum, Mask[ElemNum] && Result);
+ }
+
+ return Success(APValue(RetMask), E);
+ }
}
}
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 0fcfe3779fa19..263a1079b26d5 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -2385,22 +2385,19 @@ _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
(__mmask32) __U);
}
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
-_mm_test_epi8_mask (__m128i __A, __m128i __B)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_test_epi8_mask(__m128i __A, __m128i __B) {
return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128());
}
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
-_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B),
_mm_setzero_si128());
}
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
-_mm256_test_epi8_mask (__m256i __A, __m256i __B)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_test_epi8_mask(__m256i __A, __m256i __B) {
return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B),
_mm256_setzero_si256());
}
@@ -2439,9 +2436,8 @@ _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
_mm256_setzero_si256());
}
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
-_mm_testn_epi8_mask (__m128i __A, __m128i __B)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_testn_epi8_mask(__m128i __A, __m128i __B) {
return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
}
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index 116d86fcd597d..febef46458ae9 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -645,6 +645,21 @@ __mmask16 test_mm_cmp_epi8_mask(__m128i __a, __m128i __b) {
return (__mmask16)_mm_cmp_epi8_mask(__a, __b, 0);
}
+TEST_CONSTEXPR(_mm_cmpeq_epi8_mask(
+ ((__m128i)(__v16qi){5, 3, 7, 2, 9, 3, 7, 1, 5, 4, 8, 2, 9, 6, 7, 5}),
+ ((__m128i)(__v16qi){5, 2, 7, 3, 9, 4, 6, 1, 5, 3, 8, 1, 9, 5, 7, 5})
+) == (__mmask16)0xd595);
+
+TEST_CONSTEXPR(_mm_cmplt_epi8_mask(
+ ((__m128i)(__v16qi){1, 5, 3, 7, 2, 8, 4, 6, 9, 5, 3, 11, 2, 6, 15, 8}),
+ ((__m128i)(__v16qi){2, 4, 6, 8, 3, 5, 7, 9, 4, 6, 8, 10, 5, 7, 9, 11})
+) == (__mmask16)0xb6dd);
+
+TEST_CONSTEXPR(_mm_cmple_epi8_mask(
+ ((__m128i)(__v16qi){1, 3, 5, 7, 2, 6, 6, 8, 1, 3, 9, 7, 2, 4, 6, 10}),
+ ((__m128i)(__v16qi){2, 3, 4, 7, 3, 4, 5, 8, 2, 3, 4, 7, 3, 4, 5, 8})
+) == (__mmask16)0x3b9b);
+
__mmask16 test_mm_mask_cmp_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
// CHECK-LABEL: test_mm_mask_cmp_epi8_mask
// CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}}
@@ -2894,6 +2909,12 @@ __mmask16 test_mm_test_epi8_mask(__m128i __A, __m128i __B) {
return _mm_test_epi8_mask(__A, __B);
}
+TEST_CONSTEXPR(_mm_test_epi8_mask(
+ (__m128i)(__v16qi){1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ (__m128i)(__v16qi){1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
+)
+== (__mmask16)0xfffb);
+
__mmask16 test_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_test_epi8_mask
// CHECK: and <2 x i64> %{{.*}}, %{{.*}}
@@ -2901,6 +2922,12 @@ __mmask16 test_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm_mask_test_epi8_mask(__U, __A, __B);
}
+TEST_CONSTEXPR(_mm_mask_test_epi8_mask(
+ 0xFFFF,
+ (__m128i)(__v16qi){1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ (__m128i)(__v16qi){1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
+)
+== (__mmask16)0xfffb);
__mmask32 test_mm256_test_epi8_mask(__m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_test_epi8_mask
@@ -2908,6 +2935,11 @@ __mmask32 test_mm256_test_epi8_mask(__m256i __A, __m256i __B) {
// CHECK: icmp ne <32 x i8> %{{.*}}, %{{.*}}
return _mm256_test_epi8_mask(__A, __B);
}
+TEST_CONSTEXPR(_mm256_test_epi8_mask(
+ (__m256i)(__v32qi){1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ (__m256i)(__v32qi){1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
+)
+== (__mmask32)0xfffbfffb);
__mmask32 test_mm256_mask_test_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_test_epi8_mask
@@ -2954,6 +2986,12 @@ __mmask16 test_mm_testn_epi8_mask(__m128i __A, __m128i __B) {
return _mm_testn_epi8_mask(__A, __B);
}
+TEST_CONSTEXPR(_mm_testn_epi8_mask(
+ (__m128i)(__v16qi){1, 2, 77, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 16, 16},
+ (__m128i)(__v16qi){2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15}
+)
+== (__mmask16)0xe001);
+
__mmask16 test_mm_mask_testn_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_testn_epi8_mask
// CHECK: and <2 x i64> %{{.*}}, %{{.*}}
More information about the cfe-commits
mailing list