[clang] 456ca91 - [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow AVX512 VPSHUFBITQMB intrinsics to be used in constexpr (#168100)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Nov 22 09:19:32 PST 2025
Author: NagaChaitanya Vellanki
Date: 2025-11-22T17:19:27Z
New Revision: 456ca91815c3fdb60b5ca695c8bb05b75016a343
URL: https://github.com/llvm/llvm-project/commit/456ca91815c3fdb60b5ca695c8bb05b75016a343
DIFF: https://github.com/llvm/llvm-project/commit/456ca91815c3fdb60b5ca695c8bb05b75016a343.diff
LOG: [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow AVX512 VPSHUFBITQMB intrinsics to be used in constexpr (#168100)
Resolves #161337
Added:
Modified:
clang/include/clang/Basic/BuiltinsX86.td
clang/lib/AST/ByteCode/InterpBuiltin.cpp
clang/lib/AST/ExprConstant.cpp
clang/lib/Headers/avx512bitalgintrin.h
clang/lib/Headers/avx512vlbitalgintrin.h
clang/test/CodeGen/X86/avx512bitalg-builtins.c
clang/test/CodeGen/X86/avx512vlbitalg-builtins.c
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 3c9fbd912ceaf..f6069fdc5707a 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1366,15 +1366,15 @@ let Features = "avx512cd", Attributes = [NoThrow, Const, Constexpr, RequiredVect
def vpconflictsi_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
}
-let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpshufbitqmb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, unsigned short)">;
}
-let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpshufbitqmb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, unsigned int)">;
}
-let Features = "avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bitalg", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpshufbitqmb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, unsigned long long int)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 27eb6c5c698f2..83e40f64fd979 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3468,6 +3468,69 @@ static bool interp__builtin_ia32_shuffle_generic(
return true;
}
+static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+
+ assert(Call->getNumArgs() == 3);
+
+ QualType SourceType = Call->getArg(0)->getType();
+ QualType ShuffleMaskType = Call->getArg(1)->getType();
+ QualType ZeroMaskType = Call->getArg(2)->getType();
+ if (!SourceType->isVectorType() || !ShuffleMaskType->isVectorType() ||
+ !ZeroMaskType->isIntegerType()) {
+ return false;
+ }
+
+ Pointer Source, ShuffleMask;
+ APSInt ZeroMask = popToAPSInt(S, Call->getArg(2));
+ ShuffleMask = S.Stk.pop<Pointer>();
+ Source = S.Stk.pop<Pointer>();
+
+ const auto *SourceVecT = SourceType->castAs<VectorType>();
+ const auto *ShuffleMaskVecT = ShuffleMaskType->castAs<VectorType>();
+ assert(SourceVecT->getNumElements() == ShuffleMaskVecT->getNumElements());
+ assert(ZeroMask.getBitWidth() == SourceVecT->getNumElements());
+
+ PrimType SourceElemT = *S.getContext().classify(SourceVecT->getElementType());
+ PrimType ShuffleMaskElemT =
+ *S.getContext().classify(ShuffleMaskVecT->getElementType());
+
+ unsigned NumBytesInQWord = 8;
+ unsigned NumBitsInByte = 8;
+ unsigned NumBytes = SourceVecT->getNumElements();
+ unsigned NumQWords = NumBytes / NumBytesInQWord;
+ unsigned RetWidth = ZeroMask.getBitWidth();
+ APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
+
+ for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
+ APInt SourceQWord(64, 0);
+ for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
+ uint64_t Byte = 0;
+ INT_TYPE_SWITCH(SourceElemT, {
+ Byte = static_cast<uint64_t>(
+ Source.elem<T>(QWordId * NumBytesInQWord + ByteIdx));
+ });
+ SourceQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
+ }
+
+ for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
+ unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx;
+ unsigned M = 0;
+ INT_TYPE_SWITCH(ShuffleMaskElemT, {
+ M = static_cast<unsigned>(ShuffleMask.elem<T>(SelIdx)) & 0x3F;
+ });
+
+ if (ZeroMask[SelIdx]) {
+ RetMask.setBitVal(SelIdx, SourceQWord[M]);
+ }
+ }
+ }
+
+ pushInteger(S, RetMask, Call->getType());
+
+ return true;
+}
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -4868,6 +4931,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_ucmpq512_mask:
return interp__builtin_ia32_cmp_mask(S, OpPC, Call, BuiltinID,
/*IsUnsigned=*/true);
+
+ case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
+ case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
+ case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
+ return interp__builtin_ia32_shufbitqmb_mask(S, OpPC, Call);
+
case X86::BI__builtin_ia32_pslldqi128_byteshift:
case X86::BI__builtin_ia32_pslldqi256_byteshift:
case X86::BI__builtin_ia32_pslldqi512_byteshift:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ad1f49ce9b04e..3b91678f7d400 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -16762,6 +16762,48 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success(APValue(RetMask), E);
}
+ case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
+ case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
+ case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
+ assert(E->getNumArgs() == 3);
+
+ APValue Source, ShuffleMask;
+ APSInt ZeroMask;
+ if (!EvaluateVector(E->getArg(0), Source, Info) ||
+ !EvaluateVector(E->getArg(1), ShuffleMask, Info) ||
+ !EvaluateInteger(E->getArg(2), ZeroMask, Info))
+ return false;
+
+ assert(Source.getVectorLength() == ShuffleMask.getVectorLength());
+ assert(ZeroMask.getBitWidth() == Source.getVectorLength());
+
+ unsigned NumBytesInQWord = 8;
+ unsigned NumBitsInByte = 8;
+ unsigned NumBytes = Source.getVectorLength();
+ unsigned NumQWords = NumBytes / NumBytesInQWord;
+ unsigned RetWidth = ZeroMask.getBitWidth();
+ APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
+
+ for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
+ APInt SourceQWord(64, 0);
+ for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
+ uint64_t Byte = Source.getVectorElt(QWordId * NumBytesInQWord + ByteIdx)
+ .getInt()
+ .getZExtValue();
+ SourceQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
+ }
+
+ for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
+ unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx;
+ unsigned M =
+ ShuffleMask.getVectorElt(SelIdx).getInt().getZExtValue() & 0x3F;
+ if (ZeroMask[SelIdx]) {
+ RetMask.setBitVal(SelIdx, SourceQWord[M]);
+ }
+ }
+ }
+ return Success(APValue(RetMask), E);
+ }
}
}
diff --git a/clang/lib/Headers/avx512bitalgintrin.h b/clang/lib/Headers/avx512bitalgintrin.h
index 98197e468370d..f5e9b1a84fcfe 100644
--- a/clang/lib/Headers/avx512bitalgintrin.h
+++ b/clang/lib/Headers/avx512bitalgintrin.h
@@ -15,44 +15,42 @@
#define __AVX512BITALGINTRIN_H
/* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), \
- __min_vector_width__(512)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+ __min_vector_width__(512))) constexpr
#else
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), \
+ __min_vector_width__(512)))
#endif
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_popcnt_epi16(__m512i __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi16(__m512i __A) {
return (__m512i)__builtin_elementwise_popcount((__v32hu)__A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512(
(__mmask32)__U, (__v32hi)_mm512_popcnt_epi16(__B), (__v32hi)__A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) {
return _mm512_mask_popcnt_epi16((__m512i)_mm512_setzero_si512(), __U, __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_popcnt_epi8(__m512i __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi8(__m512i __A) {
return (__m512i)__builtin_elementwise_popcount((__v64qu)__A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512(
(__mmask64)__U, (__v64qi)_mm512_popcnt_epi8(__B), (__v64qi)__A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) {
return _mm512_mask_popcnt_epi8((__m512i)_mm512_setzero_si512(), __U, __B);
}
@@ -74,6 +72,4 @@ _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
}
#undef __DEFAULT_FN_ATTRS
-#undef __DEFAULT_FN_ATTRS_CONSTEXPR
-
#endif
diff --git a/clang/lib/Headers/avx512vlbitalgintrin.h b/clang/lib/Headers/avx512vlbitalgintrin.h
index 1874adce4ea6e..edfb9c1e1f241 100644
--- a/clang/lib/Headers/avx512vlbitalgintrin.h
+++ b/clang/lib/Headers/avx512vlbitalgintrin.h
@@ -15,6 +15,16 @@
#define __AVX512VLBITALGINTRIN_H
/* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512bitalg"), \
+ __min_vector_width__(128))) constexpr
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512bitalg"), \
+ __min_vector_width__(256))) constexpr
+#else
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bitalg"), \
@@ -23,75 +33,66 @@
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bitalg"), \
__min_vector_width__(256)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
-#else
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
#endif
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi16(__m256i __A) {
return (__m256i)__builtin_elementwise_popcount((__v16hu)__A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256(
(__mmask16)__U, (__v16hi)_mm256_popcnt_epi16(__B), (__v16hi)__A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) {
return _mm256_mask_popcnt_epi16((__m256i)_mm256_setzero_si256(), __U, __B);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_popcnt_epi16(__m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi16(__m128i __A) {
return (__m128i)__builtin_elementwise_popcount((__v8hu)__A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128(
(__mmask8)__U, (__v8hi)_mm_popcnt_epi16(__B), (__v8hi)__A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) {
return _mm_mask_popcnt_epi16((__m128i)_mm_setzero_si128(), __U, __B);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi8(__m256i __A) {
return (__m256i)__builtin_elementwise_popcount((__v32qu)__A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256(
(__mmask32)__U, (__v32qi)_mm256_popcnt_epi8(__B), (__v32qi)__A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) {
return _mm256_mask_popcnt_epi8((__m256i)_mm256_setzero_si256(), __U, __B);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_popcnt_epi8(__m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi8(__m128i __A) {
return (__m128i)__builtin_elementwise_popcount((__v16qu)__A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128(
(__mmask16)__U, (__v16qi)_mm_popcnt_epi8(__B), (__v16qi)__A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) {
return _mm_mask_popcnt_epi8((__m128i)_mm_setzero_si128(), __U, __B);
}
@@ -131,7 +132,4 @@ _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B)
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
-#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
-#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
-
#endif
diff --git a/clang/test/CodeGen/X86/avx512bitalg-builtins.c b/clang/test/CodeGen/X86/avx512bitalg-builtins.c
index 3ac8674421d93..7d524ab156500 100644
--- a/clang/test/CodeGen/X86/avx512bitalg-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bitalg-builtins.c
@@ -70,4 +70,14 @@ __mmask64 test_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) {
// CHECK: @llvm.x86.avx512.vpshufbitqmb.512
return _mm512_bitshuffle_epi64_mask(__A, __B);
}
+TEST_CONSTEXPR(_mm512_bitshuffle_epi64_mask(
+ (__m512i)(__v64qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85,
+ 1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
+ (__m512i)(__v64qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7,
+ 0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff010155ff0101ULL);
+TEST_CONSTEXPR(_mm512_mask_bitshuffle_epi64_mask(0xFFFFFFFF00000000ULL,
+ (__m512i)(__v64qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85,
+ 1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
+ (__m512i)(__v64qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7,
+ 0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff010100000000ULL);
diff --git a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c
index e0b55c6fde81a..d19d2c28f3649 100644
--- a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c
@@ -116,6 +116,13 @@ __mmask32 test_mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) {
// CHECK: @llvm.x86.avx512.vpshufbitqmb.256
return _mm256_bitshuffle_epi64_mask(__A, __B);
}
+TEST_CONSTEXPR(_mm256_bitshuffle_epi64_mask(
+ (__m256i)(__v32qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
+ (__m256i)(__v32qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff0101);
+
+TEST_CONSTEXPR(_mm256_mask_bitshuffle_epi64_mask(0xFFFF0000,
+ (__m256i)(__v32qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
+ (__m256i)(__v32qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff0000);
__mmask16 test_mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_bitshuffle_epi64_mask
@@ -129,4 +136,11 @@ __mmask16 test_mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) {
// CHECK: @llvm.x86.avx512.vpshufbitqmb.128
return _mm_bitshuffle_epi64_mask(__A, __B);
}
+TEST_CONSTEXPR(_mm_bitshuffle_epi64_mask(
+ (__m128i)(__v16qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128},
+ (__m128i)(__v16qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56}) == 0x0101);
+
+TEST_CONSTEXPR(_mm_mask_bitshuffle_epi64_mask(0xFF00,
+ (__m128i)(__v16qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128},
+ (__m128i)(__v16qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56}) == 0x0100);
More information about the cfe-commits
mailing list