[clang] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow PSLL/PSRA/PSRL var intrinsics to be used in constexpr (PR #169276)
Simon Pilgrim via cfe-commits
cfe-commits at lists.llvm.org
Thu Nov 27 06:33:04 PST 2025
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/169276
>From f5662479038d0943e07690047526c8df0bfa4a09 Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Sun, 23 Nov 2025 18:45:35 -0800
Subject: [PATCH 1/5] [Clang] VectorExprEvaluator::VisitCallExpr /
InterpretBuiltin - Allow PSLL/PSRA/PSRL var intrinsics to be used in
constexpr
Resolves:#169176
---
clang/include/clang/Basic/BuiltinsX86.td | 60 ++++------
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 105 +++++++++++++++++
clang/lib/AST/ExprConstant.cpp | 106 +++++++++++++++++
clang/lib/Headers/avx2intrin.h | 40 +++----
clang/lib/Headers/avx512bwintrin.h | 45 +++----
clang/lib/Headers/avx512fintrin.h | 90 ++++++--------
clang/lib/Headers/avx512vlintrin.h | 130 +++++++++------------
clang/lib/Headers/emmintrin.h | 32 ++---
clang/test/CodeGen/X86/avx2-builtins.c | 34 ++++++
clang/test/CodeGen/X86/avx512bw-builtins.c | 27 ++++-
clang/test/CodeGen/X86/avx512f-builtins.c | 54 +++++++--
clang/test/CodeGen/X86/avx512vl-builtins.c | 41 +++++--
clang/test/CodeGen/X86/sse2-builtins.c | 16 +++
13 files changed, 520 insertions(+), 260 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index e82d7a8f62d8c..98cea35beb0ea 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -210,17 +210,6 @@ let Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
def _mm_pause : X86LibBuiltin<"void()">;
}
-let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
- def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
- def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
- def psrld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
- def psrlq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
- def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
- def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
- def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def movmskpd : X86Builtin<"int(_Vector<2, double>)">;
def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">;
@@ -261,6 +250,15 @@ let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">;
def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">;
def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">;
+
+ def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+ def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+ def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+ def psrld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+ def psrlq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+ def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+ def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
}
let Features = "sse3", Attributes = [NoThrow] in {
@@ -579,14 +577,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def psadbw256
: X86Builtin<
"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
- def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
- def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
- def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
- def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
- def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
- def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
- def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
- def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
@@ -663,6 +653,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
+
+ def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
+ def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
+ def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
+ def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
+ def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
+ def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
+ def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
+ def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
}
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
@@ -1926,16 +1925,13 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
def prorq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
-}
-
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pmaddubsw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, _Vector<64, char>)">;
def pmaddwd512 : X86Builtin<"_Vector<16, int>(_Vector<32, short>, _Vector<32, short>)">;
def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
+ def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
}
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
@@ -1991,7 +1987,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
def psravq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psraw512
: X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
def psrlw512
@@ -2308,25 +2304,17 @@ let Features = "avx512f",
def psraqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psraq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512vl",
- Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psraqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">;
}
-let Features = "avx512vl",
- Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
def psraqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pslld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;
def psllq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">;
def psrad512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 8874d9e3f6222..d21f42d94d3a5 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3468,6 +3468,69 @@ static bool interp__builtin_ia32_shuffle_generic(
return true;
}
+static bool interp__builtin_ia32_shift_with_count(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp,
+ llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) {
+
+ assert(Call->getNumArgs() == 2);
+
+ const Pointer &Count = S.Stk.pop<Pointer>();
+ const Pointer &Source = S.Stk.pop<Pointer>();
+
+ QualType SourceType = Call->getArg(0)->getType();
+ QualType CountType = Call->getArg(1)->getType();
+ assert(SourceType->isVectorType() && CountType->isVectorType());
+
+ const auto *SourceVecT = SourceType->castAs<VectorType>();
+ const auto *CountVecT = CountType->castAs<VectorType>();
+ PrimType SourceElemT = *S.getContext().classify(SourceVecT->getElementType());
+ PrimType CountElemT = *S.getContext().classify(CountVecT->getElementType());
+
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ unsigned DestEltWidth =
+ S.getASTContext().getTypeSize(SourceVecT->getElementType());
+ bool IsDestUnsigned = SourceVecT->getElementType()->isUnsignedIntegerType();
+ unsigned DestLen = SourceVecT->getNumElements();
+ unsigned CountEltWidth =
+ S.getASTContext().getTypeSize(CountVecT->getElementType());
+ unsigned NumBitsInQWord = 64;
+ unsigned NumCountElts = NumBitsInQWord / CountEltWidth;
+
+ uint64_t CountLQWord = 0;
+ for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) {
+ uint64_t Elt = 0;
+ INT_TYPE_SWITCH(CountElemT,
+ { Elt = static_cast<uint64_t>(Count.elem<T>(EltIdx)); });
+ CountLQWord |= (Elt << (EltIdx * CountEltWidth));
+ }
+
+ for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) {
+ APSInt Elt;
+ INT_TYPE_SWITCH(SourceElemT, { Elt = Source.elem<T>(EltIdx).toAPSInt(); });
+
+ APInt Result;
+ if (CountLQWord < DestEltWidth) {
+ Result = ShiftOp(Elt, CountLQWord);
+ } else {
+ Result = OverflowOp(Elt, DestEltWidth);
+ }
+ if (IsDestUnsigned) {
+ INT_TYPE_SWITCH(SourceElemT, {
+ Dst.elem<T>(EltIdx) = T::from(Result.getZExtValue());
+ });
+ } else {
+ INT_TYPE_SWITCH(SourceElemT, {
+ Dst.elem<T>(EltIdx) = T::from(Result.getSExtValue());
+ });
+ }
+ }
+
+ Dst.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
@@ -4971,6 +5034,48 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_phminposuw128:
return interp__builtin_ia32_phminposuw(S, OpPC, Call);
+ case X86::BI__builtin_ia32_psraq128:
+ case X86::BI__builtin_ia32_psraq256:
+ case X86::BI__builtin_ia32_psraq512:
+ case X86::BI__builtin_ia32_psrad128:
+ case X86::BI__builtin_ia32_psrad256:
+ case X86::BI__builtin_ia32_psrad512:
+ case X86::BI__builtin_ia32_psraw128:
+ case X86::BI__builtin_ia32_psraw256:
+ case X86::BI__builtin_ia32_psraw512:
+ return interp__builtin_ia32_shift_with_count(
+ S, OpPC, Call,
+ [](const APInt &Elt, uint64_t Count) { return Elt.ashr(Count); },
+ [](const APInt &Elt, unsigned Width) { return Elt.ashr(Width - 1); });
+
+ case X86::BI__builtin_ia32_psllq128:
+ case X86::BI__builtin_ia32_psllq256:
+ case X86::BI__builtin_ia32_psllq512:
+ case X86::BI__builtin_ia32_pslld128:
+ case X86::BI__builtin_ia32_pslld256:
+ case X86::BI__builtin_ia32_pslld512:
+ case X86::BI__builtin_ia32_psllw128:
+ case X86::BI__builtin_ia32_psllw256:
+ case X86::BI__builtin_ia32_psllw512:
+ return interp__builtin_ia32_shift_with_count(
+ S, OpPC, Call,
+ [](const APInt &Elt, uint64_t Count) { return Elt.shl(Count); },
+ [](const APInt &Elt, unsigned Width) { return APInt::getZero(Width); });
+
+ case X86::BI__builtin_ia32_psrlq128:
+ case X86::BI__builtin_ia32_psrlq256:
+ case X86::BI__builtin_ia32_psrlq512:
+ case X86::BI__builtin_ia32_psrld128:
+ case X86::BI__builtin_ia32_psrld256:
+ case X86::BI__builtin_ia32_psrld512:
+ case X86::BI__builtin_ia32_psrlw128:
+ case X86::BI__builtin_ia32_psrlw256:
+ case X86::BI__builtin_ia32_psrlw512:
+ return interp__builtin_ia32_shift_with_count(
+ S, OpPC, Call,
+ [](const APInt &Elt, uint64_t Count) { return Elt.lshr(Count); },
+ [](const APInt &Elt, unsigned Width) { return APInt::getZero(Width); });
+
case X86::BI__builtin_ia32_pternlogd128_mask:
case X86::BI__builtin_ia32_pternlogd256_mask:
case X86::BI__builtin_ia32_pternlogd512_mask:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 7e2fa7debdd87..7b9380de6834d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12166,6 +12166,52 @@ static bool evalShuffleGeneric(
return true;
}
+static bool evalShiftWithCount(
+ EvalInfo &Info, const CallExpr *Call, APValue &Out,
+ llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp,
+ llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) {
+
+ APValue Source, Count;
+ if (!EvaluateAsRValue(Info, Call->getArg(0), Source) ||
+ !EvaluateAsRValue(Info, Call->getArg(1), Count))
+ return false;
+
+ assert(Call->getNumArgs() == 2);
+
+ QualType SourceTy = Call->getArg(0)->getType();
+ QualType CountTy = Call->getArg(1)->getType();
+ assert(SourceTy->isVectorType() && CountTy->isVectorType());
+
+ QualType DestEltTy = SourceTy->castAs<VectorType>()->getElementType();
+ unsigned DestEltWidth = Source.getVectorElt(0).getInt().getBitWidth();
+ unsigned DestLen = Source.getVectorLength();
+ bool IsDestUnsigned = DestEltTy->isUnsignedIntegerType();
+ unsigned CountEltWidth = Count.getVectorElt(0).getInt().getBitWidth();
+ unsigned NumBitsInQWord = 64;
+ unsigned NumCountElts = NumBitsInQWord / CountEltWidth;
+ SmallVector<APValue, 64> Result;
+ Result.reserve(DestLen);
+
+ uint64_t CountLQWord = 0;
+ for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) {
+ uint64_t Elt = Count.getVectorElt(EltIdx).getInt().getZExtValue();
+ CountLQWord |= (Elt << (EltIdx * CountEltWidth));
+ }
+
+ for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) {
+ APInt Elt = Source.getVectorElt(EltIdx).getInt();
+ if (CountLQWord < DestEltWidth) {
+ Result.push_back(
+ APValue(APSInt(ShiftOp(Elt, CountLQWord), IsDestUnsigned)));
+ } else {
+ Result.push_back(
+ APValue(APSInt(OverflowOp(Elt, DestEltWidth), IsDestUnsigned)));
+ }
+ }
+ Out = APValue(Result.data(), Result.size());
+ return true;
+}
+
bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!IsConstantEvaluatedBuiltinCall(E))
return ExprEvaluatorBaseTy::VisitCallExpr(E);
@@ -13169,6 +13215,66 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(Result.data(), Result.size()), E);
}
+ case X86::BI__builtin_ia32_psraq128:
+ case X86::BI__builtin_ia32_psraq256:
+ case X86::BI__builtin_ia32_psraq512:
+ case X86::BI__builtin_ia32_psrad128:
+ case X86::BI__builtin_ia32_psrad256:
+ case X86::BI__builtin_ia32_psrad512:
+ case X86::BI__builtin_ia32_psraw128:
+ case X86::BI__builtin_ia32_psraw256:
+ case X86::BI__builtin_ia32_psraw512: {
+ APValue R;
+ if (!evalShiftWithCount(
+ Info, E, R,
+ [](const APInt &Elt, uint64_t Count) { return Elt.ashr(Count); },
+ [](const APInt &Elt, unsigned Width) {
+ return Elt.ashr(Width - 1);
+ }))
+ return false;
+ return Success(R, E);
+ }
+
+ case X86::BI__builtin_ia32_psllq128:
+ case X86::BI__builtin_ia32_psllq256:
+ case X86::BI__builtin_ia32_psllq512:
+ case X86::BI__builtin_ia32_pslld128:
+ case X86::BI__builtin_ia32_pslld256:
+ case X86::BI__builtin_ia32_pslld512:
+ case X86::BI__builtin_ia32_psllw128:
+ case X86::BI__builtin_ia32_psllw256:
+ case X86::BI__builtin_ia32_psllw512: {
+ APValue R;
+ if (!evalShiftWithCount(
+ Info, E, R,
+ [](const APInt &Elt, uint64_t Count) { return Elt.shl(Count); },
+ [](const APInt &Elt, unsigned Width) {
+ return APInt::getZero(Width);
+ }))
+ return false;
+ return Success(R, E);
+ }
+
+ case X86::BI__builtin_ia32_psrlq128:
+ case X86::BI__builtin_ia32_psrlq256:
+ case X86::BI__builtin_ia32_psrlq512:
+ case X86::BI__builtin_ia32_psrld128:
+ case X86::BI__builtin_ia32_psrld256:
+ case X86::BI__builtin_ia32_psrld512:
+ case X86::BI__builtin_ia32_psrlw128:
+ case X86::BI__builtin_ia32_psrlw256:
+ case X86::BI__builtin_ia32_psrlw512: {
+ APValue R;
+ if (!evalShiftWithCount(
+ Info, E, R,
+ [](const APInt &Elt, uint64_t Count) { return Elt.lshr(Count); },
+ [](const APInt &Elt, unsigned Width) {
+ return APInt::getZero(Width);
+ }))
+ return false;
+ return Success(R, E);
+ }
+
case X86::BI__builtin_ia32_pternlogd128_mask:
case X86::BI__builtin_ia32_pternlogd256_mask:
case X86::BI__builtin_ia32_pternlogd512_mask:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 3e3c13d8bd662..d3ceb2327ac62 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -2095,9 +2095,8 @@ _mm256_slli_epi16(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sll_epi16(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sll_epi16(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);
}
@@ -2134,9 +2133,8 @@ _mm256_slli_epi32(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sll_epi32(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sll_epi32(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);
}
@@ -2173,9 +2171,8 @@ _mm256_slli_epi64(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sll_epi64(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sll_epi64(__m256i __a, __m128i __count) {
return __builtin_ia32_psllq256((__v4di)__a, __count);
}
@@ -2214,9 +2211,8 @@ _mm256_srai_epi16(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sra_epi16(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sra_epi16(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);
}
@@ -2255,9 +2251,8 @@ _mm256_srai_epi32(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sra_epi32(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sra_epi32(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);
}
@@ -2336,9 +2331,8 @@ _mm256_srli_epi16(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srl_epi16(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srl_epi16(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);
}
@@ -2375,9 +2369,8 @@ _mm256_srli_epi32(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srl_epi32(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srl_epi32(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);
}
@@ -2414,9 +2407,8 @@ _mm256_srli_epi64(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srl_epi64(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srl_epi64(__m256i __a, __m128i __count) {
return __builtin_ia32_psrlq256((__v4di)__a, __count);
}
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 53d641e9e2eae..67e8461560b04 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1383,23 +1383,20 @@ _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sll_epi16(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sll_epi16(__m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_sll_epi16(__A, __B),
(__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_sll_epi16(__A, __B),
(__v32hi)_mm512_setzero_si512());
@@ -1473,23 +1470,20 @@ _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B)
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sra_epi16(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sra_epi16(__m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_sra_epi16(__A, __B),
(__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_sra_epi16(__A, __B),
(__v32hi)_mm512_setzero_si512());
@@ -1515,23 +1509,20 @@ _mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B) {
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srl_epi16(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srl_epi16(__m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_srl_epi16(__A, __B),
(__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_srl_epi16(__A, __B),
(__v32hi)_mm512_setzero_si512());
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index e1de56069870b..806a13c414c10 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -5382,45 +5382,39 @@ _mm512_kmov (__mmask16 __A)
((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
#endif
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sll_epi32(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i
+ __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_sll_epi32(__A, __B),
(__v16si)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_sll_epi32(__A, __B),
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sll_epi64(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sll_epi64(__m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_sll_epi64(__A, __B),
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_sll_epi64(__A, __B),
(__v8di)_mm512_setzero_si512());
@@ -5467,45 +5461,39 @@ _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
(__v8di)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sra_epi32(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sra_epi32(__m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_sra_epi32(__A, __B),
(__v16si)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_sra_epi32(__A, __B),
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sra_epi64(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sra_epi64(__m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_sra_epi64(__A, __B),
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_sra_epi64(__A, __B),
(__v8di)_mm512_setzero_si512());
@@ -5552,45 +5540,39 @@ _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
(__v8di)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srl_epi32(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srl_epi32(__m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_srl_epi32(__A, __B),
(__v16si)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_srl_epi32(__A, __B),
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srl_epi64(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srl_epi64(__m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_srl_epi64(__A, __B),
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_srl_epi64(__A, __B),
(__v8di)_mm512_setzero_si512());
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 99c057030a4cc..388f99d812312 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -4299,33 +4299,29 @@ _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
(__v4di)_mm256_ror_epi64((a), (b)), \
(__v4di)_mm256_setzero_si256()))
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_sll_epi32(__A, __B),
(__v4si)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_sll_epi32(__A, __B),
(__v4si)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_sll_epi32(__A, __B),
(__v8si)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_sll_epi32(__A, __B),
(__v8si)_mm256_setzero_si256());
@@ -4362,33 +4358,29 @@ _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
(__v8si)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_sll_epi64(__A, __B),
(__v2di)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_sll_epi64(__A, __B),
(__v2di)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_sll_epi64(__A, __B),
(__v4di)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_sll_epi64(__A, __B),
(__v4di)_mm256_setzero_si256());
@@ -4641,33 +4633,29 @@ _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
(__v8si)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_srl_epi32(__A, __B),
(__v4si)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_srl_epi32(__A, __B),
(__v4si)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_srl_epi32(__A, __B),
(__v8si)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_srl_epi32(__A, __B),
(__v8si)_mm256_setzero_si256());
@@ -4704,33 +4692,29 @@ _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
(__v8si)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_srl_epi64(__A, __B),
(__v2di)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_srl_epi64(__A, __B),
(__v2di)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_srl_epi64(__A, __B),
(__v4di)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_srl_epi64(__A, __B),
(__v4di)_mm256_setzero_si256());
@@ -6127,33 +6111,29 @@ _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
(__v4di)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_sra_epi32(__A, __B),
(__v4si)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_sra_epi32(__A, __B),
(__v4si)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_sra_epi32(__A, __B),
(__v8si)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_sra_epi32(__A, __B),
(__v8si)_mm256_setzero_si256());
@@ -6188,45 +6168,39 @@ _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
(__v8si)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_sra_epi64(__m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_sra_epi64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
(__v2di)_mm_sra_epi64(__A, __B), \
(__v2di)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
(__v2di)_mm_sra_epi64(__A, __B), \
(__v2di)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sra_epi64(__m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sra_epi64(__m256i __A, __m128i __B) {
return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
(__v4di)_mm256_sra_epi64(__A, __B), \
(__v4di)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
(__v4di)_mm256_sra_epi64(__A, __B), \
(__v4di)_mm256_setzero_si256());
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 7eb15e06db698..9d71c69878e47 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2782,8 +2782,8 @@ _mm_slli_epi16(__m128i __a, int __count) {
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
/// to left-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the left-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a,
- __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sll_epi16(__m128i __a, __m128i __count) {
return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
}
@@ -2818,8 +2818,8 @@ _mm_slli_epi32(__m128i __a, int __count) {
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
/// to left-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the left-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a,
- __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sll_epi32(__m128i __a, __m128i __count) {
return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
}
@@ -2854,8 +2854,8 @@ _mm_slli_epi64(__m128i __a, int __count) {
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
/// to left-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the left-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a,
- __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sll_epi64(__m128i __a, __m128i __count) {
return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);
}
@@ -2892,8 +2892,8 @@ _mm_srai_epi16(__m128i __a, int __count) {
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
/// to right-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a,
- __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sra_epi16(__m128i __a, __m128i __count) {
return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
}
@@ -2930,8 +2930,8 @@ _mm_srai_epi32(__m128i __a, int __count) {
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
/// to right-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a,
- __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sra_epi32(__m128i __a, __m128i __count) {
return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
}
@@ -2991,8 +2991,8 @@ _mm_srli_epi16(__m128i __a, int __count) {
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
/// to right-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a,
- __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srl_epi16(__m128i __a, __m128i __count) {
return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
}
@@ -3027,8 +3027,8 @@ _mm_srli_epi32(__m128i __a, int __count) {
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
/// to right-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a,
- __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srl_epi32(__m128i __a, __m128i __count) {
return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
}
@@ -3063,8 +3063,8 @@ _mm_srli_epi64(__m128i __a, int __count) {
/// A 128-bit integer vector in which bits [63:0] specify the number of bits
/// to right-shift each value in operand \a __a.
/// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a,
- __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srl_epi64(__m128i __a, __m128i __count) {
return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);
}
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 6a884e98e9f3b..398bf0c90105f 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -1191,6 +1191,30 @@ __m256i test_mm256_sign_epi32(__m256i a, __m256i b) {
}
TEST_CONSTEXPR(match_v8si(_mm256_sign_epi32((__m256i)(__v8si){0xbeef,0xfeed,0xbead,0xdeed, -1,2,-3,4}, (__m256i)(__v8si){0,0,0,0,-1,-1,-1,-1}), 0,0,0,0, 1,-2,3,-4));
+__m256i test_mm256_sll_epi16(__m256i a, __m128i b) {
+ // CHECK-LABEL: test_mm256_sll_epi16
+ // CHECK: call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %{{.*}}, <8 x i16> %{{.*}})
+ return _mm256_sll_epi16(a, b);
+}
+TEST_CONSTEXPR(match_v16hi(_mm256_sll_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30));
+TEST_CONSTEXPR(match_v16hi(_mm256_sll_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+
+__m256i test_mm256_sll_epi32(__m256i a, __m128i b) {
+ // CHECK-LABEL: test_mm256_sll_epi32
+ // CHECK: call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ return _mm256_sll_epi32(a, b);
+}
+TEST_CONSTEXPR(match_v8si(_mm256_sll_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 2, 4, 6, 8, 10, 12, 14));
+TEST_CONSTEXPR(match_v8si(_mm256_sll_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+
+__m256i test_mm256_sll_epi64(__m256i a, __m128i b) {
+ // CHECK-LABEL: test_mm256_sll_epi64
+ // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ return _mm256_sll_epi64(a, b);
+}
+TEST_CONSTEXPR(match_v4di(_mm256_sll_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){1, 0}), 0, 2, 4, 6));
+TEST_CONSTEXPR(match_v4di(_mm256_sll_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){64, 0}), 0, 0, 0, 0));
+
__m256i test_mm256_slli_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_slli_epi16
// CHECK: call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %{{.*}}, i32 %{{.*}})
@@ -1283,12 +1307,16 @@ __m256i test_mm256_sra_epi16(__m256i a, __m128i b) {
// CHECK: call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm256_sra_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_sra_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), -16384, 16383, -2, -1, -1, 0, 0, 1, -16384, 16383, -2, -1, -1, 0, 0, 1));
+TEST_CONSTEXPR(match_v16hi(_mm256_sra_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), -1, 0, -1, -1, -1, 0, 0, 0, -1, 0, -1, -1, -1, 0, 0, 0));
__m256i test_mm256_sra_epi32(__m256i a, __m128i b) {
// CHECK-LABEL: test_mm256_sra_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm256_sra_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_sra_epi32((__m256i)(__v8si){-32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v4si){1, 0, 0, 0}), -16384, 16383, -2, -1, -1, 0, 0, 1));
+TEST_CONSTEXPR(match_v8si(_mm256_sra_epi32((__m256i)(__v8si){-32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v4si){32, 0, 0, 0}), -1, 0, -1, -1, -1, 0, 0, 0));
__m256i test_mm256_srai_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_srai_epi16
@@ -1335,18 +1363,24 @@ __m256i test_mm256_srl_epi16(__m256i a, __m128i b) {
// CHECK: call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm256_srl_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_srl_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7));
+TEST_CONSTEXPR(match_v16hi(_mm256_srl_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m256i test_mm256_srl_epi32(__m256i a, __m128i b) {
// CHECK-LABEL: test_mm256_srl_epi32
// CHECK:call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm256_srl_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_srl_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 0, 1, 1, 2, 2, 3, 3));
+TEST_CONSTEXPR(match_v8si(_mm256_srl_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
__m256i test_mm256_srl_epi64(__m256i a, __m128i b) {
// CHECK-LABEL: test_mm256_srl_epi64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm256_srl_epi64(a, b);
}
+TEST_CONSTEXPR(match_v4di(_mm256_srl_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){1, 0}), 0, 0, 1, 1));
+TEST_CONSTEXPR(match_v4di(_mm256_srl_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){64, 0}), 0, 0, 0, 0));
__m256i test_mm256_srli_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_srli_epi16
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 091d0c40f0ffa..5a653cb3fa0a3 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -2297,8 +2297,11 @@ TEST_CONSTEXPR(match_v32hi(_mm512_maskz_sllv_epi16(0xB120676B, (__m512i)(__v32hi
__m512i test_mm512_sll_epi16(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_sll_epi16
// CHECK: @llvm.x86.avx512.psll.w.512
- return _mm512_sll_epi16(__A, __B);
+ return _mm512_sll_epi16(__A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62));
+TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){17, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m512i test_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sll_epi16
@@ -2311,8 +2314,10 @@ __m512i test_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_sll_epi16
// CHECK: @llvm.x86.avx512.psll.w.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
- return _mm512_maskz_sll_epi16(__U, __A, __B);
+ return _mm512_maskz_sll_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_sll_epi16((__m512i)(__v32hi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A5A5A5A, (__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 99, 2, 99, 6, 8, 99, 12, 99, 99, 18, 99, 22, 24, 99, 28, 99, 99, 34, 99, 38, 40, 99, 44, 99, 99, 50, 99, 54, 56, 99, 60, 99));
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_sll_epi16(0xA5A5A5A5, (__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 4, 0, 0, 10, 0, 14, 16, 0, 20, 0, 0, 26, 0, 30, 32, 0, 36, 0, 0, 42, 0, 46, 48, 0, 52, 0, 0, 58, 0, 62));
__m512i test_mm512_slli_epi16(__m512i __A) {
// CHECK-LABEL: test_mm512_slli_epi16
@@ -2422,8 +2427,11 @@ TEST_CONSTEXPR(match_v32hi(_mm512_maskz_srav_epi16(0xB120676B, (__m512i)(__v32hi
__m512i test_mm512_sra_epi16(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_sra_epi16
// CHECK: @llvm.x86.avx512.psra.w.512
- return _mm512_sra_epi16(__A, __B);
+ return _mm512_sra_epi16(__A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31));
+TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){17, 0, 0, 0, 0, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
__m512i test_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sra_epi16
@@ -2436,8 +2444,10 @@ __m512i test_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_sra_epi16
// CHECK: @llvm.x86.avx512.psra.w.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
- return _mm512_maskz_sra_epi16(__U, __A, __B);
+ return _mm512_maskz_sra_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_sra_epi16((__m512i)(__v32hi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A5A5A5A, (__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 99, -1, 99, -3, 4, 99, 6, 99, 99, -9, 99, -11, 12, 99, 14, 99, 99, -17, 99, -19, 20, 99, 22, 99, 99, -25, 99, -27, 28, 99, 30, 99));
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_sra_epi16(0xA5A5A5A5, (__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 2, 0, 0, -5, 0, -7, 8, 0, 10, 0, 0, -13, 0, -15, 16, 0, 18, 0, 0, -21, 0, -23, 24, 0, 26, 0, 0, -29, 0, -31));
__m512i test_mm512_srai_epi16(__m512i __A) {
// CHECK-LABEL: test_mm512_srai_epi16
@@ -2485,8 +2495,11 @@ __m512i test_mm512_maskz_srai_epi16_2(__mmask32 __U, __m512i __A, unsigned int _
__m512i test_mm512_srl_epi16(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_srl_epi16
// CHECK: @llvm.x86.avx512.psrl.w.512
- return _mm512_srl_epi16(__A, __B);
+ return _mm512_srl_epi16(__A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 32767, 2, 32765, 4, 32763, 6, 32761, 8, 32759, 10, 32757, 12, 32755, 14, 32753, 16, 32751, 18, 32749, 20, 32747, 22, 32745, 24, 32743, 26, 32741, 28, 32739, 30, 32737));
+TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){17, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m512i test_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_srl_epi16
@@ -2499,8 +2512,10 @@ __m512i test_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_srl_epi16
// CHECK: @llvm.x86.avx512.psrl.w.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
- return _mm512_maskz_srl_epi16(__U, __A, __B);
+ return _mm512_maskz_srl_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_srl_epi16((__m512i)(__v32hi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A5A5A5A, (__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 99, 32767, 99, 32765, 4, 99, 6, 99, 99, 32759, 99, 32757, 12, 99, 14, 99, 99, 32751, 99, 32749, 20, 99, 22, 99, 99, 32743, 99, 32741, 28, 99, 30, 99));
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_srl_epi16(0xA5A5A5A5, (__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 2, 0, 0, 32763, 0, 32761, 8, 0, 10, 0, 0, 32755, 0, 32753, 16, 0, 18, 0, 0, 32747, 0, 32745, 24, 0, 26, 0, 0, 32739, 0, 32737));
__m512i test_mm512_srli_epi16(__m512i __A) {
// CHECK-LABEL: test_mm512_srli_epi16
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 753b4e48c20ed..9b1dae2228eb4 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -6282,8 +6282,11 @@ __m512i test_mm512_maskz_srai_epi64_2(__mmask8 __U, __m512i __A, unsigned int __
__m512i test_mm512_sll_epi32(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_sll_epi32
// CHECK: @llvm.x86.avx512.psll.d.512
- return _mm512_sll_epi32(__A, __B);
+ return _mm512_sll_epi32(__A, __B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_sll_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30));
+TEST_CONSTEXPR(match_v16si(_mm512_sll_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_sll_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){33, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m512i test_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sll_epi32
@@ -6296,14 +6299,19 @@ __m512i test_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_sll_epi32
// CHECK: @llvm.x86.avx512.psll.d.512
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
- return _mm512_maskz_sll_epi32(__U, __A, __B);
+ return _mm512_maskz_sll_epi32(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_sll_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A5A, (__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){1, 0, 0, 0}), 99, 2, 99, 6, 8, 99, 12, 99, 99, 18, 99, 22, 24, 99, 28, 99));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_sll_epi32(0xA5A5, (__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 0, 4, 0, 0, 10, 0, 14, 16, 0, 20, 0, 0, 26, 0, 30));
__m512i test_mm512_sll_epi64(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_sll_epi64
// CHECK: @llvm.x86.avx512.psll.q.512
- return _mm512_sll_epi64(__A, __B);
+ return _mm512_sll_epi64(__A, __B);
}
+TEST_CONSTEXPR(match_v8di(_mm512_sll_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v2di){1, 0}), 0, 2, 4, 6, 8, 10, 12, 14));
+TEST_CONSTEXPR(match_v8di(_mm512_sll_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v2di){64, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8di(_mm512_sll_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v2di){65, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
__m512i test_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sll_epi64
@@ -6316,8 +6324,10 @@ __m512i test_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_sll_epi64
// CHECK: @llvm.x86.avx512.psll.q.512
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
- return _mm512_maskz_sll_epi64(__U, __A, __B);
+ return _mm512_maskz_sll_epi64(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_sll_epi64((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}, 0x5A, (__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v2di){1, 0}), 99, 2, 99, 6, 8, 99, 12, 99));
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_sll_epi64(0xA5, (__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v2di){1, 0}), 0, 0, 4, 0, 0, 10, 0, 14));
__m512i test_mm512_sllv_epi32(__m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_sllv_epi32
@@ -6368,8 +6378,11 @@ TEST_CONSTEXPR(match_v8di(_mm512_maskz_sllv_epi64(0xE4, (__m512i)(__v8di){ 16, -
__m512i test_mm512_sra_epi32(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_sra_epi32
// CHECK: @llvm.x86.avx512.psra.d.512
- return _mm512_sra_epi32(__A, __B);
+ return _mm512_sra_epi32(__A, __B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_sra_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 0, 0}), 0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15));
+TEST_CONSTEXPR(match_v16si(_mm512_sra_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){32, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v16si(_mm512_sra_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){33, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
__m512i test_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sra_epi32
@@ -6382,14 +6395,19 @@ __m512i test_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_sra_epi32
// CHECK: @llvm.x86.avx512.psra.d.512
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
- return _mm512_maskz_sra_epi32(__U, __A, __B);
+ return _mm512_maskz_sra_epi32(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_sra_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A5A, (__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 0, 0}), 99, -1, 99, -3, 4, 99, 6, 99, 99, -9, 99, -11, 12, 99, 14, 99));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_sra_epi32(0xA5A5, (__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 0, 2, 0, 0, -5, 0, -7, 8, 0, 10, 0, 0, -13, 0, -15));
__m512i test_mm512_sra_epi64(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_sra_epi64
// CHECK: @llvm.x86.avx512.psra.q.512
- return _mm512_sra_epi64(__A, __B);
+ return _mm512_sra_epi64(__A, __B);
}
+TEST_CONSTEXPR(match_v8di(_mm512_sra_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 0}), 0, -1, 2, -3, 4, -5, 6, -7));
+TEST_CONSTEXPR(match_v8di(_mm512_sra_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){64, 0}), 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v8di(_mm512_sra_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){65, 0}), 0, -1, 0, -1, 0, -1, 0, -1));
__m512i test_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sra_epi64
@@ -6402,8 +6420,10 @@ __m512i test_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_sra_epi64
// CHECK: @llvm.x86.avx512.psra.q.512
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
- return _mm512_maskz_sra_epi64(__U, __A, __B);
+ return _mm512_maskz_sra_epi64(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_sra_epi64((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}, 0x5A, (__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 0}), 99, -1, 99, -3, 4, 99, 6, 99));
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_sra_epi64(0xA5, (__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 0}), 0, 0, 2, 0, 0, -5, 0, -7));
__m512i test_mm512_srav_epi32(__m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_srav_epi32
@@ -6454,8 +6474,11 @@ TEST_CONSTEXPR(match_v8di(_mm512_maskz_srav_epi64(0xE4, (__m512i)(__v8di){ 16, -
__m512i test_mm512_srl_epi32(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_srl_epi32
// CHECK: @llvm.x86.avx512.psrl.d.512
- return _mm512_srl_epi32(__A, __B);
+ return _mm512_srl_epi32(__A, __B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_srl_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 2147483647, 2, 2147483645, 4, 2147483643, 6, 2147483641, 8, 2147483639, 10, 2147483637, 12, 2147483635, 14, 2147483633));
+TEST_CONSTEXPR(match_v16si(_mm512_srl_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_srl_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){33, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m512i test_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_srl_epi32
@@ -6468,14 +6491,19 @@ __m512i test_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_srl_epi32
// CHECK: @llvm.x86.avx512.psrl.d.512
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
- return _mm512_maskz_srl_epi32(__U, __A, __B);
+ return _mm512_maskz_srl_epi32(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_srl_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A5A, (__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 0, 0}), 99, 2147483647, 99, 2147483645, 4, 99, 6, 99, 99, 2147483639, 99, 2147483637, 12, 99, 14, 99));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_srl_epi32(0xA5A5, (__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 0, 2, 0, 0, 2147483643, 0, 2147483641, 8, 0, 10, 0, 0, 2147483635, 0, 2147483633));
__m512i test_mm512_srl_epi64(__m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_srl_epi64
// CHECK: @llvm.x86.avx512.psrl.q.512
- return _mm512_srl_epi64(__A, __B);
+ return _mm512_srl_epi64(__A, __B);
}
+TEST_CONSTEXPR(match_v8di(_mm512_srl_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 0}), 0, 9223372036854775807, 2, 9223372036854775805, 4, 9223372036854775803, 6, 9223372036854775801));
+TEST_CONSTEXPR(match_v8di(_mm512_srl_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){64, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8di(_mm512_srl_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){65, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
__m512i test_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_srl_epi64
@@ -6488,8 +6516,10 @@ __m512i test_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_maskz_srl_epi64
// CHECK: @llvm.x86.avx512.psrl.q.512
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
- return _mm512_maskz_srl_epi64(__U, __A, __B);
+ return _mm512_maskz_srl_epi64(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_srl_epi64((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}, 0x5A, (__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 0}), 99, 9223372036854775807, 99, 9223372036854775805, 4, 99, 6, 99));
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_srl_epi64(0xA5, (__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 0}), 0, 0, 2, 0, 0, 9223372036854775803, 0, 9223372036854775801));
__m512i test_mm512_srlv_epi32(__m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_srlv_epi32
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index e1878bd84b1ad..5f6d8360888f5 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -6746,8 +6746,12 @@ __m256i test_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: test_mm256_maskz_srl_epi32
// CHECK: @llvm.x86.avx2.psrl.d
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
- return _mm256_maskz_srl_epi32(__U, __A, __B);
+ return _mm256_maskz_srl_epi32(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(_mm_mask_srl_epi32((__m128i)(__v4si){99, 99, 99, 99}, 0x5, (__m128i)(__v4si){-2, 4, -6, 8}, (__m128i)(__v4si){1, 0, 0, 0}), 2147483647, 99, 2147483645, 99));
+TEST_CONSTEXPR(match_v4si(_mm_maskz_srl_epi32(0xA, (__m128i)(__v4si){-2, 4, -6, 8}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 2, 0, 4));
+TEST_CONSTEXPR(match_v8si(_mm256_mask_srl_epi32((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}, 0x5A, (__m256i)(__v8si){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v4si){1, 0, 0, 0}), 99, 2147483647, 99, 2147483645, 4, 99, 6, 99));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_srl_epi32(0xA5, (__m256i)(__v8si){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 0, 2, 0, 0, 2147483643, 0, 2147483641));
__m128i test_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_srli_epi32
@@ -6831,8 +6835,12 @@ __m256i test_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: test_mm256_maskz_srl_epi64
// CHECK: @llvm.x86.avx2.psrl.q
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
- return _mm256_maskz_srl_epi64(__U, __A, __B);
+ return _mm256_maskz_srl_epi64(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_srl_epi64((__m128i)(__v2di){99, 99}, 0x1, (__m128i)(__v2di){-2, 4}, (__m128i)(__v2di){1, 0}), 9223372036854775807, 99));
+TEST_CONSTEXPR(match_v2di(_mm_maskz_srl_epi64(0x2, (__m128i)(__v2di){-2, 4}, (__m128i)(__v2di){1, 0}), 0, 2));
+TEST_CONSTEXPR(match_v4di(_mm256_mask_srl_epi64((__m256i)(__v4di){99, 99, 99, 99}, 0x5, (__m256i)(__v4di){0, -2, 4, -6}, (__m128i)(__v2di){1, 0}), 0, 99, 2, 99));
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_srl_epi64(0xA, (__m256i)(__v4di){0, -2, 4, -6}, (__m128i)(__v2di){1, 0}), 0, 9223372036854775807, 0, 9223372036854775805));
__m128i test_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_srli_epi64
@@ -6917,8 +6925,12 @@ __m256i test_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: test_mm256_maskz_sll_epi32
// CHECK: @llvm.x86.avx2.psll.d
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
- return _mm256_maskz_sll_epi32(__U, __A, __B);
+ return _mm256_maskz_sll_epi32(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(_mm_mask_sll_epi32((__m128i)(__v4si){99, 99, 99, 99}, 0x5, (__m128i)(__v4si){1, 2, 3, 4}, (__m128i)(__v4si){1, 0, 0, 0}), 2, 99, 6, 99));
+TEST_CONSTEXPR(match_v4si(_mm_maskz_sll_epi32(0xA, (__m128i)(__v4si){1, 2, 3, 4}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 4, 0, 8));
+TEST_CONSTEXPR(match_v8si(_mm256_mask_sll_epi32((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}, 0x5A, (__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){1, 0, 0, 0}), 99, 2, 99, 6, 8, 99, 12, 99));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_sll_epi32(0xA5, (__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 0, 4, 0, 0, 10, 0, 14));
__m128i test_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_slli_epi32
@@ -7007,8 +7019,12 @@ __m256i test_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: test_mm256_maskz_sll_epi64
// CHECK: @llvm.x86.avx2.psll.q
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
- return _mm256_maskz_sll_epi64(__U, __A, __B);
+ return _mm256_maskz_sll_epi64(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_sll_epi64((__m128i)(__v2di){99, 99}, 0x1, (__m128i)(__v2di){1, 2}, (__m128i)(__v2di){1, 0}), 2, 99));
+TEST_CONSTEXPR(match_v2di(_mm_maskz_sll_epi64(0x2, (__m128i)(__v2di){1, 2}, (__m128i)(__v2di){1, 0}), 0, 4));
+TEST_CONSTEXPR(match_v4di(_mm256_mask_sll_epi64((__m256i)(__v4di){99, 99, 99, 99}, 0x5, (__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){1, 0}), 0, 99, 4, 99));
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_sll_epi64(0xA, (__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){1, 0}), 0, 2, 0, 6));
__m128i test_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_slli_epi64
@@ -8478,8 +8494,12 @@ __m256i test_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: test_mm256_maskz_sra_epi32
// CHECK: @llvm.x86.avx2.psra.d
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
- return _mm256_maskz_sra_epi32(__U, __A, __B);
+ return _mm256_maskz_sra_epi32(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v4si(_mm_mask_sra_epi32((__m128i)(__v4si){99, 99, 99, 99}, 0x5, (__m128i)(__v4si){-2, 4, -6, 8}, (__m128i)(__v4si){1, 0, 0, 0}), -1, 99, -3, 99));
+TEST_CONSTEXPR(match_v4si(_mm_maskz_sra_epi32(0xA, (__m128i)(__v4si){-2, 4, -6, 8}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 2, 0, 4));
+TEST_CONSTEXPR(match_v8si(_mm256_mask_sra_epi32((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}, 0x5A, (__m256i)(__v8si){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v4si){1, 0, 0, 0}), 99, -1, 99, -3, 4, 99, 6, 99));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_sra_epi32(0xA5, (__m256i)(__v8si){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 0, 2, 0, 0, -5, 0, -7));
__m128i test_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_srai_epi32
@@ -8542,8 +8562,10 @@ __m256i test_mm256_maskz_srai_epi32_2(__mmask8 __U, __m256i __A, unsigned int __
__m128i test_mm_sra_epi64(__m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_sra_epi64
// CHECK: @llvm.x86.avx512.psra.q.128
- return _mm_sra_epi64(__A, __B);
+ return _mm_sra_epi64(__A, __B);
}
+TEST_CONSTEXPR(match_v2di(_mm_sra_epi64((__m128i)(__v2di){-2, 4}, (__m128i)(__v2di){1, 0}), -1, 2));
+TEST_CONSTEXPR(match_v2di(_mm_sra_epi64((__m128i)(__v2di){-2, 4}, (__m128i)(__v2di){64, 0}), -1, 0));
__m128i test_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_sra_epi64
@@ -8576,8 +8598,13 @@ __m256i test_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: test_mm256_maskz_sra_epi64
// CHECK: @llvm.x86.avx512.psra.q.256
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
- return _mm256_maskz_sra_epi64(__U, __A, __B);
+ return _mm256_maskz_sra_epi64(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_sra_epi64((__m128i)(__v2di){99, 99}, 0x1, (__m128i)(__v2di){-2, 4}, (__m128i)(__v2di){1, 0}), -1, 99));
+TEST_CONSTEXPR(match_v2di(_mm_maskz_sra_epi64(0x2, (__m128i)(__v2di){-2, 4}, (__m128i)(__v2di){1, 0}), 0, 2));
+TEST_CONSTEXPR(match_v4di(_mm256_sra_epi64((__m256i)(__v4di){-2, 4, -6, 8}, (__m128i)(__v2di){1, 0}), -1, 2, -3, 4));
+TEST_CONSTEXPR(match_v4di(_mm256_mask_sra_epi64((__m256i)(__v4di){99, 99, 99, 99}, 0x5, (__m256i)(__v4di){0, -2, 4, -6}, (__m128i)(__v2di){1, 0}), 0, 99, 2, 99));
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_sra_epi64(0xA, (__m256i)(__v4di){0, -2, 4, -6}, (__m128i)(__v2di){1, 0}), 0, -1, 0, -3));
__m128i test_mm_srai_epi64(__m128i __A) {
// CHECK-LABEL: test_mm_srai_epi64
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index efe8930fae336..c24e8f8fdf9b2 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -1336,18 +1336,24 @@ __m128i test_mm_sll_epi16(__m128i A, __m128i B) {
// CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_sll_epi16(A, B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_sll_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 2, 4, 6, 8, 10, 12, 14, 16));
+TEST_CONSTEXPR(match_v8hi(_mm_sll_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
__m128i test_mm_sll_epi32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_sll_epi32
// CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_sll_epi32(A, B);
}
+TEST_CONSTEXPR(match_v4si(_mm_sll_epi32((__m128i)(__v4si){1, 2, 3, 4}, (__m128i)(__v4si){1, 0, 0, 0}), 2, 4, 6, 8));
+TEST_CONSTEXPR(match_v4si(_mm_sll_epi32((__m128i)(__v4si){1, 2, 3, 4}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0));
__m128i test_mm_sll_epi64(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_sll_epi64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_sll_epi64(A, B);
}
+TEST_CONSTEXPR(match_v2di(_mm_sll_epi64((__m128i)(__v2di){1, 2}, (__m128i)(__v2di){1, 0}), 2, 4));
+TEST_CONSTEXPR(match_v2di(_mm_sll_epi64((__m128i)(__v2di){1, 2}, (__m128i)(__v2di){64, 0}), 0, 0));
__m128i test_mm_slli_epi16(__m128i A) {
// CHECK-LABEL: test_mm_slli_epi16
@@ -1452,12 +1458,16 @@ __m128i test_mm_sra_epi16(__m128i A, __m128i B) {
// CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_sra_epi16(A, B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_sra_epi16((__m128i)(__v8hi){-16, 16, -8, 8, -4, 4, -2, 2}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), -8, 8, -4, 4, -2, 2, -1, 1));
+TEST_CONSTEXPR(match_v8hi(_mm_sra_epi16((__m128i)(__v8hi){-16, 16, -8, 8, -4, 4, -2, 2}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), -1, 0, -1, 0, -1, 0, -1, 0));
__m128i test_mm_sra_epi32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_sra_epi32
// CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_sra_epi32(A, B);
}
+TEST_CONSTEXPR(match_v4si(_mm_sra_epi32((__m128i)(__v4si){-16, 16, -8, 8}, (__m128i)(__v4si){1, 0, 0, 0}), -8, 8, -4, 4));
+TEST_CONSTEXPR(match_v4si(_mm_sra_epi32((__m128i)(__v4si){-16, 16, -8, 8}, (__m128i)(__v4si){32, 0, 0, 0}), -1, 0, -1, 0));
__m128i test_mm_srai_epi16(__m128i A) {
// CHECK-LABEL: test_mm_srai_epi16
@@ -1503,18 +1513,24 @@ __m128i test_mm_srl_epi16(__m128i A, __m128i B) {
// CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_srl_epi16(A, B);
}
+TEST_CONSTEXPR(match_v8hu(_mm_srl_epi16((__m128i)(__v8hu){0x8000, 16, 8, 4, 2, 1, 0, 0xFFFF}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0x4000, 8, 4, 2, 1, 0, 0, 0x7FFF));
+TEST_CONSTEXPR(match_v8hi(_mm_srl_epi16((__m128i)(__v8hi){-1, 16, 8, 4, 2, 1, 0, -1}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
__m128i test_mm_srl_epi32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_srl_epi32
// CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_srl_epi32(A, B);
}
+TEST_CONSTEXPR(match_v4su(_mm_srl_epi32((__m128i)(__v4su){0x80000000, 16, 8, 4}, (__m128i)(__v4si){1, 0, 0, 0}), 0x40000000, 8, 4, 2));
+TEST_CONSTEXPR(match_v4si(_mm_srl_epi32((__m128i)(__v4si){-1, 16, 8, 4}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0));
__m128i test_mm_srl_epi64(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_srl_epi64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_srl_epi64(A, B);
}
+TEST_CONSTEXPR(match_v2du(_mm_srl_epi64((__m128i)(__v2du){0x8000000000000000ULL, 16}, (__m128i)(__v2di){1, 0}), 0x4000000000000000ULL, 8));
+TEST_CONSTEXPR(match_v2di(_mm_srl_epi64((__m128i)(__v2di){-1, 16}, (__m128i)(__v2di){64, 0}), 0, 0));
__m128i test_mm_srli_epi16(__m128i A) {
// CHECK-LABEL: test_mm_srli_epi16
>From 607f1ea60b57b347f2f09198e6ce9de1ec8b8100 Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Mon, 24 Nov 2025 13:59:28 -0800
Subject: [PATCH 2/5] Make MMX shift intrinsics constexpr _mm_sll_pi16
_mm_sll_pi32 _mm_sll_si64 _mm_sra_pi16 _mm_sra_pi32 _mm_srl_pi16 _mm_srl_pi32
_mm_srl_si64
Also use _zext128 instead of _anyext128 to avoid negative indices.
---
clang/lib/Headers/mmintrin.h | 332 ++++++++++++--------------
clang/test/CodeGen/X86/mmx-builtins.c | 16 ++
2 files changed, 169 insertions(+), 179 deletions(-)
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index aca78e6986ad9..c2a02b1959e1d 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -39,14 +39,14 @@ typedef short __v8hi __attribute__((__vector_size__(16)));
typedef char __v16qi __attribute__((__vector_size__(16)));
/* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS_SSE2 \
__attribute__((__always_inline__, __nodebug__, __target__("sse2"), \
- __min_vector_width__(128)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 constexpr
+ __min_vector_width__(128))) constexpr
#else
-#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2
+#define __DEFAULT_FN_ATTRS_SSE2 \
+ __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \
+ __min_vector_width__(128)))
#endif
#define __trunc64(x) \
@@ -82,10 +82,8 @@ static __inline__ void
/// A 32-bit integer value.
/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
/// parameter. The upper 32 bits are set to 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cvtsi32_si64(int __i)
-{
- return __extension__ (__m64)(__v2si){__i, 0};
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi32_si64(int __i) {
+ return __extension__(__m64)(__v2si){__i, 0};
}
/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
@@ -99,10 +97,8 @@ _mm_cvtsi32_si64(int __i)
/// A 64-bit integer vector.
/// \returns A 32-bit signed integer value containing the lower 32 bits of the
/// parameter.
-static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cvtsi64_si32(__m64 __m)
-{
- return ((__v2si)__m)[0];
+static __inline__ int __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_si32(__m64 __m) {
+ return ((__v2si)__m)[0];
}
/// Casts a 64-bit signed integer value into a 64-bit integer vector.
@@ -115,10 +111,8 @@ _mm_cvtsi64_si32(__m64 __m)
/// A 64-bit signed integer.
/// \returns A 64-bit integer vector containing the same bitwise pattern as the
/// parameter.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cvtsi64_m64(long long __i)
-{
- return __extension__ (__m64)(__v1di){__i};
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_m64(long long __i) {
+ return __extension__(__m64)(__v1di){__i};
}
/// Casts a 64-bit integer vector into a 64-bit signed integer value.
@@ -131,10 +125,8 @@ _mm_cvtsi64_m64(long long __i)
/// A 64-bit integer vector.
/// \returns A 64-bit signed integer containing the same bitwise pattern as the
/// parameter.
-static __inline__ long long __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cvtm64_si64(__m64 __m)
-{
- return ((__v1di)__m)[0];
+static __inline__ long long __DEFAULT_FN_ATTRS_SSE2 _mm_cvtm64_si64(__m64 __m) {
+ return ((__v1di)__m)[0];
}
/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
@@ -156,8 +148,8 @@ _mm_cvtm64_si64(__m64 __m)
/// written to the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [8 x i8] containing the converted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_packs_pi16(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi16(__m64 __m1,
+ __m64 __m2) {
return __trunc64(__builtin_ia32_packsswb128(
(__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
}
@@ -181,8 +173,8 @@ _mm_packs_pi16(__m64 __m1, __m64 __m2) {
/// written to the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [4 x i16] containing the converted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_packs_pi32(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi32(__m64 __m1,
+ __m64 __m2) {
return __trunc64(__builtin_ia32_packssdw128(
(__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){}));
}
@@ -206,8 +198,8 @@ _mm_packs_pi32(__m64 __m1, __m64 __m2) {
/// written to the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [8 x i8] containing the converted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_packs_pu16(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pu16(__m64 __m1,
+ __m64 __m2) {
return __trunc64(__builtin_ia32_packuswb128(
(__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
}
@@ -233,8 +225,8 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2) {
/// Bits [63:56] are written to bits [63:56] of the result.
/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi8(__m64 __m1,
+ __m64 __m2) {
return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 4, 12, 5,
13, 6, 14, 7, 15);
}
@@ -256,8 +248,8 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) {
/// Bits [63:48] are written to bits [63:48] of the result.
/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi16(__m64 __m1,
+ __m64 __m2) {
return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 2, 6, 3, 7);
}
@@ -276,8 +268,8 @@ _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) {
/// the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi32(__m64 __m1,
+ __m64 __m2) {
return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3);
}
@@ -302,8 +294,8 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) {
/// Bits [31:24] are written to bits [63:56] of the result.
/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi8(__m64 __m1,
+ __m64 __m2) {
return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 0, 8, 1, 9,
2, 10, 3, 11);
}
@@ -325,8 +317,8 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) {
/// Bits [31:16] are written to bits [63:48] of the result.
/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi16(__m64 __m1,
+ __m64 __m2) {
return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 0, 4, 1, 5);
}
@@ -345,8 +337,8 @@ _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) {
/// the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi32(__m64 __m1,
+ __m64 __m2) {
return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2);
}
@@ -365,10 +357,9 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) {
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_add_pi8(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi8(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2));
}
/// Adds each 16-bit integer element of the first 64-bit integer vector
@@ -386,10 +377,9 @@ _mm_add_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_add_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi16(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2));
}
/// Adds each 32-bit integer element of the first 64-bit integer vector
@@ -407,10 +397,9 @@ _mm_add_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32].
/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_add_pi32(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v2su)__m1) + ((__v2su)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi32(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)(((__v2su)__m1) + ((__v2su)__m2));
}
/// Adds, with saturation, each 8-bit signed integer element of the first
@@ -431,8 +420,8 @@ _mm_add_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
/// of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_adds_pi8(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi8(__m64 __m1,
+ __m64 __m2) {
return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2);
}
@@ -454,8 +443,8 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2) {
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
/// of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_adds_pi16(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi16(__m64 __m1,
+ __m64 __m2) {
return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2);
}
@@ -476,8 +465,8 @@ _mm_adds_pi16(__m64 __m1, __m64 __m2) {
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
/// unsigned sums of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_adds_pu8(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu8(__m64 __m1,
+ __m64 __m2) {
return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2);
}
@@ -498,8 +487,8 @@ _mm_adds_pu8(__m64 __m1, __m64 __m2) {
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
/// unsigned sums of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_adds_pu16(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu16(__m64 __m1,
+ __m64 __m2) {
return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2);
}
@@ -518,10 +507,9 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2) {
/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
/// both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_sub_pi8(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi8(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2));
}
/// Subtracts each 16-bit integer element of the second 64-bit integer
@@ -539,10 +527,9 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
/// both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_sub_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi16(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2));
}
/// Subtracts each 32-bit integer element of the second 64-bit integer
@@ -560,10 +547,9 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32] containing the subtrahends.
/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
/// both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_sub_pi32(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v2su)__m1) - ((__v2su)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi32(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)(((__v2su)__m1) - ((__v2su)__m2));
}
/// Subtracts, with saturation, each 8-bit signed integer element of the second
@@ -584,8 +570,8 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
/// differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_subs_pi8(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi8(__m64 __m1,
+ __m64 __m2) {
return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2);
}
@@ -607,8 +593,8 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2) {
/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
/// differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_subs_pi16(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi16(__m64 __m1,
+ __m64 __m2) {
return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2);
}
@@ -630,8 +616,8 @@ _mm_subs_pi16(__m64 __m1, __m64 __m2) {
/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
/// differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_subs_pu8(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu8(__m64 __m1,
+ __m64 __m2) {
return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2);
}
@@ -653,8 +639,8 @@ _mm_subs_pu8(__m64 __m1, __m64 __m2) {
/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
/// differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_subs_pu16(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu16(__m64 __m1,
+ __m64 __m2) {
return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2);
}
@@ -679,8 +665,8 @@ _mm_subs_pu16(__m64 __m1, __m64 __m2) {
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
/// products of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_madd_pi16(__m64 __m1, __m64 __m2) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_madd_pi16(__m64 __m1,
+ __m64 __m2) {
return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__zext128(__m1),
(__v8hi)__zext128(__m2)));
}
@@ -700,11 +686,10 @@ _mm_madd_pi16(__m64 __m1, __m64 __m2) {
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
/// of the products of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
-{
- return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__zext128(__m1),
- (__v8hi)__zext128(__m2)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mulhi_pi16(__m64 __m1,
+ __m64 __m2) {
+ return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__zext128(__m1),
+ (__v8hi)__zext128(__m2)));
}
/// Multiplies each 16-bit signed integer element of the first 64-bit
@@ -722,10 +707,9 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
/// of the products of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_mullo_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mullo_pi16(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2));
}
/// Left-shifts each 16-bit signed integer element of the first
@@ -748,8 +732,8 @@ _mm_mullo_pi16(__m64 __m1, __m64 __m2)
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_sll_pi16(__m64 __m, __m64 __count)
{
- return __trunc64(__builtin_ia32_psllw128((__v8hi)__anyext128(__m),
- (__v8hi)__anyext128(__count)));
+ return __trunc64(__builtin_ia32_psllw128((__v8hi)__zext128(__m),
+ (__v8hi)__zext128(__count)));
}
/// Left-shifts each 16-bit signed integer element of a 64-bit integer
@@ -768,8 +752,8 @@ _mm_sll_pi16(__m64 __m, __m64 __count)
/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
/// values. If \a __count is greater or equal to 16, the result is set to all
/// 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_slli_pi16(__m64 __m, int __count) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi16(__m64 __m,
+ int __count) {
return __trunc64(__builtin_ia32_psllwi128((__v8hi)__zext128(__m), __count));
}
@@ -793,8 +777,8 @@ _mm_slli_pi16(__m64 __m, int __count) {
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_sll_pi32(__m64 __m, __m64 __count)
{
- return __trunc64(__builtin_ia32_pslld128((__v4si)__anyext128(__m),
- (__v4si)__anyext128(__count)));
+ return __trunc64(__builtin_ia32_pslld128((__v4si)__zext128(__m),
+ (__v4si)__zext128(__count)));
}
/// Left-shifts each 32-bit signed integer element of a 64-bit integer
@@ -813,8 +797,8 @@ _mm_sll_pi32(__m64 __m, __m64 __count)
/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
/// values. If \a __count is greater or equal to 32, the result is set to all
/// 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_slli_pi32(__m64 __m, int __count) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi32(__m64 __m,
+ int __count) {
return __trunc64(__builtin_ia32_pslldi128((__v4si)__zext128(__m), __count));
}
@@ -835,8 +819,8 @@ _mm_slli_pi32(__m64 __m, int __count) {
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_sll_si64(__m64 __m, __m64 __count)
{
- return __trunc64(__builtin_ia32_psllq128((__v2di)__anyext128(__m),
- (__v2di)__anyext128(__count)));
+ return __trunc64(__builtin_ia32_psllq128((__v2di)__zext128(__m),
+ (__v2di)__zext128(__count)));
}
/// Left-shifts the first parameter, which is a 64-bit integer, by the
@@ -853,8 +837,8 @@ _mm_sll_si64(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector containing the left-shifted value. If
/// \a __count is greater or equal to 64, the result is set to 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_slli_si64(__m64 __m, int __count) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_si64(__m64 __m,
+ int __count) {
return __trunc64(__builtin_ia32_psllqi128((__v2di)__zext128(__m), __count));
}
@@ -879,8 +863,8 @@ _mm_slli_si64(__m64 __m, int __count) {
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_sra_pi16(__m64 __m, __m64 __count)
{
- return __trunc64(__builtin_ia32_psraw128((__v8hi)__anyext128(__m),
- (__v8hi)__anyext128(__count)));
+ return __trunc64(__builtin_ia32_psraw128((__v8hi)__zext128(__m),
+ (__v8hi)__zext128(__count)));
}
/// Right-shifts each 16-bit integer element of a 64-bit integer vector
@@ -900,8 +884,8 @@ _mm_sra_pi16(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_srai_pi16(__m64 __m, int __count) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi16(__m64 __m,
+ int __count) {
return __trunc64(__builtin_ia32_psrawi128((__v8hi)__zext128(__m), __count));
}
@@ -926,8 +910,8 @@ _mm_srai_pi16(__m64 __m, int __count) {
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_sra_pi32(__m64 __m, __m64 __count)
{
- return __trunc64(__builtin_ia32_psrad128((__v4si)__anyext128(__m),
- (__v4si)__anyext128(__count)));
+ return __trunc64(__builtin_ia32_psrad128((__v4si)__zext128(__m),
+ (__v4si)__zext128(__count)));
}
/// Right-shifts each 32-bit integer element of a 64-bit integer vector
@@ -947,8 +931,8 @@ _mm_sra_pi32(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_srai_pi32(__m64 __m, int __count) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi32(__m64 __m,
+ int __count) {
return __trunc64(__builtin_ia32_psradi128((__v4si)__zext128(__m), __count));
}
@@ -972,8 +956,8 @@ _mm_srai_pi32(__m64 __m, int __count) {
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_srl_pi16(__m64 __m, __m64 __count)
{
- return __trunc64(__builtin_ia32_psrlw128((__v8hi)__anyext128(__m),
- (__v8hi)__anyext128(__count)));
+ return __trunc64(__builtin_ia32_psrlw128((__v8hi)__zext128(__m),
+ (__v8hi)__zext128(__count)));
}
/// Right-shifts each 16-bit integer element of a 64-bit integer vector
@@ -992,8 +976,8 @@ _mm_srl_pi16(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_srli_pi16(__m64 __m, int __count) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi16(__m64 __m,
+ int __count) {
return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__zext128(__m), __count));
}
@@ -1017,8 +1001,8 @@ _mm_srli_pi16(__m64 __m, int __count) {
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_srl_pi32(__m64 __m, __m64 __count)
{
- return __trunc64(__builtin_ia32_psrld128((__v4si)__anyext128(__m),
- (__v4si)__anyext128(__count)));
+ return __trunc64(__builtin_ia32_psrld128((__v4si)__zext128(__m),
+ (__v4si)__zext128(__count)));
}
/// Right-shifts each 32-bit integer element of a 64-bit integer vector
@@ -1037,8 +1021,8 @@ _mm_srl_pi32(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_srli_pi32(__m64 __m, int __count) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi32(__m64 __m,
+ int __count) {
return __trunc64(__builtin_ia32_psrldi128((__v4si)__zext128(__m), __count));
}
@@ -1059,8 +1043,8 @@ _mm_srli_pi32(__m64 __m, int __count) {
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_srl_si64(__m64 __m, __m64 __count)
{
- return __trunc64(__builtin_ia32_psrlq128((__v2di)__anyext128(__m),
- (__v2di)__anyext128(__count)));
+ return __trunc64(__builtin_ia32_psrlq128((__v2di)__zext128(__m),
+ (__v2di)__zext128(__count)));
}
/// Right-shifts the first parameter, which is a 64-bit integer, by the
@@ -1078,8 +1062,8 @@ _mm_srl_si64(__m64 __m, __m64 __count)
/// \param __count
/// A 32-bit integer value.
/// \returns A 64-bit integer vector containing the right-shifted value.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_srli_si64(__m64 __m, int __count) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_si64(__m64 __m,
+ int __count) {
return __trunc64(__builtin_ia32_psrlqi128((__v2di)__zext128(__m), __count));
}
@@ -1095,10 +1079,9 @@ _mm_srli_si64(__m64 __m, int __count) {
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise AND of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_and_si64(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v1du)__m1) & ((__v1du)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_and_si64(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)(((__v1du)__m1) & ((__v1du)__m2));
}
/// Performs a bitwise NOT of the first 64-bit integer vector, and then
@@ -1116,10 +1099,9 @@ _mm_and_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise AND of the second
/// parameter and the one's complement of the first parameter.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_andnot_si64(__m64 __m1, __m64 __m2)
-{
- return (__m64)(~((__v1du)__m1) & ((__v1du)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_andnot_si64(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)(~((__v1du)__m1) & ((__v1du)__m2));
}
/// Performs a bitwise OR of two 64-bit integer vectors.
@@ -1134,10 +1116,9 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise OR of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_or_si64(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v1du)__m1) | ((__v1du)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_or_si64(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)(((__v1du)__m1) | ((__v1du)__m2));
}
/// Performs a bitwise exclusive OR of two 64-bit integer vectors.
@@ -1152,10 +1133,9 @@ _mm_or_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_xor_si64(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_xor_si64(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2));
}
/// Compares the 8-bit integer elements of two 64-bit integer vectors of
@@ -1174,10 +1154,9 @@ _mm_xor_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi8(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2));
}
/// Compares the 16-bit integer elements of two 64-bit integer vectors of
@@ -1196,10 +1175,9 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi16(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2));
}
/// Compares the 32-bit integer elements of two 64-bit integer vectors of
@@ -1218,10 +1196,9 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32].
/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
-{
- return (__m64)(((__v2si)__m1) == ((__v2si)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi32(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)(((__v2si)__m1) == ((__v2si)__m2));
}
/// Compares the 8-bit integer elements of two 64-bit integer vectors of
@@ -1240,9 +1217,8 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
-{
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi8(__m64 __m1,
+ __m64 __m2) {
/* This function always performs a signed comparison, but __v8qi is a char
which may be signed or unsigned, so use __v8qs. */
return (__m64)((__v8qs)__m1 > (__v8qs)__m2);
@@ -1264,10 +1240,9 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi16(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
}
/// Compares the 32-bit integer elements of two 64-bit integer vectors of
@@ -1286,10 +1261,9 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32].
/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
-{
- return (__m64)((__v2si)__m1 > (__v2si)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi32(__m64 __m1,
+ __m64 __m2) {
+ return (__m64)((__v2si)__m1 > (__v2si)__m2);
}
/// Constructs a 64-bit integer vector initialized to zero.
@@ -1299,8 +1273,7 @@ _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
/// This intrinsic corresponds to the <c> PXOR </c> instruction.
///
/// \returns An initialized 64-bit integer vector with all elements set to zero.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_setzero_si64(void) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setzero_si64(void) {
return __extension__(__m64){0LL};
}
@@ -1319,8 +1292,8 @@ _mm_setzero_si64(void) {
/// A 32-bit integer value used to initialize the lower 32 bits of the
/// result.
/// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_set_pi32(int __i1, int __i0) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi32(int __i1,
+ int __i0) {
return __extension__(__m64)(__v2si){__i0, __i1};
}
@@ -1341,8 +1314,10 @@ _mm_set_pi32(int __i1, int __i0) {
/// \param __s0
/// A 16-bit integer value used to initialize bits [15:0] of the result.
/// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_set_pi16(short __s3, short __s2, short __s1, short __s0) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi16(short __s3,
+ short __s2,
+ short __s1,
+ short __s0) {
return __extension__(__m64)(__v4hi){__s0, __s1, __s2, __s3};
}
@@ -1371,7 +1346,7 @@ _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) {
/// \param __b0
/// An 8-bit integer value used to initialize bits [7:0] of the result.
/// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
char __b1, char __b0) {
return __extension__(__m64)(__v8qi){__b0, __b1, __b2, __b3,
@@ -1391,8 +1366,7 @@ _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
/// A 32-bit integer value used to initialize each vector element of the
/// result.
/// \returns An initialized 64-bit integer vector of [2 x i32].
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_set1_pi32(int __i) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi32(int __i) {
return _mm_set_pi32(__i, __i);
}
@@ -1409,8 +1383,7 @@ _mm_set1_pi32(int __i) {
/// A 16-bit integer value used to initialize each vector element of the
/// result.
/// \returns An initialized 64-bit integer vector of [4 x i16].
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_set1_pi16(short __w) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi16(short __w) {
return _mm_set_pi16(__w, __w, __w, __w);
}
@@ -1426,8 +1399,7 @@ _mm_set1_pi16(short __w) {
/// An 8-bit integer value used to initialize each vector element of the
/// result.
/// \returns An initialized 64-bit integer vector of [8 x i8].
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_set1_pi8(char __b) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi8(char __b) {
return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
}
@@ -1446,8 +1418,8 @@ _mm_set1_pi8(char __b) {
/// A 32-bit integer value used to initialize the upper 32 bits of the
/// result.
/// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_setr_pi32(int __i0, int __i1) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi32(int __i0,
+ int __i1) {
return _mm_set_pi32(__i1, __i0);
}
@@ -1468,8 +1440,10 @@ _mm_setr_pi32(int __i0, int __i1) {
/// \param __w3
/// A 16-bit integer value used to initialize bits [63:48] of the result.
/// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi16(short __w0,
+ short __w1,
+ short __w2,
+ short __w3) {
return _mm_set_pi16(__w3, __w2, __w1, __w0);
}
@@ -1498,7 +1472,7 @@ _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) {
/// \param __b7
/// An 8-bit integer value used to initialize bits [63:56] of the result.
/// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
char __b6, char __b7) {
return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index ad8a81c61ad43..37d6306ecdb7d 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -632,18 +632,24 @@ __m64 test_mm_sll_pi16(__m64 a, __m64 b) {
// CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(
return _mm_sll_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_sll_pi16((__m64)(__v4hi){1, 2, 3, 4}, (__m64)(__v4hi){1, 0, 0, 0}), 2, 4, 6, 8));
+TEST_CONSTEXPR(match_v4hi(_mm_sll_pi16((__m64)(__v4hi){1, 2, 3, 4}, (__m64)(__v4hi){16, 0, 0, 0}), 0, 0, 0, 0));
__m64 test_mm_sll_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sll_pi32
// CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(
return _mm_sll_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_sll_pi32((__m64)(__v2si){1, 2}, (__m64)(__v2si){1, 0}), 2, 4));
+TEST_CONSTEXPR(match_v2si(_mm_sll_pi32((__m64)(__v2si){1, 2}, (__m64)(__v2si){32, 0}), 0, 0));
__m64 test_mm_sll_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sll_si64
// CHECK: call <2 x i64> @llvm.x86.sse2.psll.q(
return _mm_sll_si64(a, b);
}
+TEST_CONSTEXPR(match_v1di(_mm_sll_si64((__m64)(__v1di){1}, (__m64)(__v1di){1}), 2));
+TEST_CONSTEXPR(match_v1di(_mm_sll_si64((__m64)(__v1di){1}, (__m64)(__v1di){64}), 0));
__m64 test_mm_slli_pi16(__m64 a) {
// CHECK-LABEL: test_mm_slli_pi16
@@ -685,12 +691,16 @@ __m64 test_mm_sra_pi16(__m64 a, __m64 b) {
// CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(
return _mm_sra_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_sra_pi16((__m64)(__v4hi){-16, 16, -8, 8}, (__m64)(__v4hi){1, 0, 0, 0}), -8, 8, -4, 4));
+TEST_CONSTEXPR(match_v4hi(_mm_sra_pi16((__m64)(__v4hi){-16, 16, -8, 8}, (__m64)(__v4hi){16, 0, 0, 0}), -1, 0, -1, 0));
__m64 test_mm_sra_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sra_pi32
// CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(
return _mm_sra_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_sra_pi32((__m64)(__v2si){-16, 16}, (__m64)(__v2si){1, 0}), -8, 8));
+TEST_CONSTEXPR(match_v2si(_mm_sra_pi32((__m64)(__v2si){-16, 16}, (__m64)(__v2si){32, 0}), -1, 0));
__m64 test_mm_srai_pi16(__m64 a) {
// CHECK-LABEL: test_mm_srai_pi16
@@ -722,18 +732,24 @@ __m64 test_mm_srl_pi16(__m64 a, __m64 b) {
// CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(
return _mm_srl_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hu(_mm_srl_pi16((__m64)(__v4hu){0x8000, 16, 8, 4}, (__m64)(__v4hi){1, 0, 0, 0}), 0x4000, 8, 4, 2));
+TEST_CONSTEXPR(match_v4hi(_mm_srl_pi16((__m64)(__v4hi){-1, 16, 8, 4}, (__m64)(__v4hi){16, 0, 0, 0}), 0, 0, 0, 0));
__m64 test_mm_srl_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_srl_pi32
// CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(
return _mm_srl_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2su(_mm_srl_pi32((__m64)(__v2su){0x80000000, 16}, (__m64)(__v2si){1, 0}), 0x40000000, 8));
+TEST_CONSTEXPR(match_v2si(_mm_srl_pi32((__m64)(__v2si){-1, 16}, (__m64)(__v2si){32, 0}), 0, 0));
__m64 test_mm_srl_si64(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_srl_si64
// CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q(
return _mm_srl_si64(a, b);
}
+TEST_CONSTEXPR(match_v1du(_mm_srl_si64((__m64)(__v1du){0x8000000000000000ULL}, (__m64)(__v1di){1}), 0x4000000000000000ULL));
+TEST_CONSTEXPR(match_v1di(_mm_srl_si64((__m64)(__v1di){-1}, (__m64)(__v1di){64}), 0));
__m64 test_mm_srli_pi16(__m64 a) {
// CHECK-LABEL: test_mm_srli_pi16
>From bfe01c05df583876ce196d2618c0d7edaf70acfc Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Tue, 25 Nov 2025 05:16:09 -0800
Subject: [PATCH 3/5] Remove unused _anyext128 macro
---
clang/lib/Headers/mmintrin.h | 3 ---
1 file changed, 3 deletions(-)
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index c2a02b1959e1d..4829f9ec69cb8 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -54,9 +54,6 @@ typedef char __v16qi __attribute__((__vector_size__(16)));
#define __zext128(x) \
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
1, 2, 3)
-#define __anyext128(x) \
- (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
- 1, -1, -1)
/// Clears the MMX state by setting the state of the x87 stack registers
/// to empty.
>From 2a194b28b17ed9bd169b05cc8217aab8878fd849 Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Wed, 26 Nov 2025 11:42:33 -0800
Subject: [PATCH 4/5] Add more tests
---
clang/test/CodeGen/X86/avx2-builtins.c | 20 ++++++++++++++++++++
clang/test/CodeGen/X86/avx512bw-builtins.c | 12 ++++++++++++
clang/test/CodeGen/X86/avx512f-builtins.c | 10 +++++++++-
clang/test/CodeGen/X86/sse2-builtins.c | 20 ++++++++++++++++++++
4 files changed, 61 insertions(+), 1 deletion(-)
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 398bf0c90105f..d6facfea8962e 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -1198,6 +1198,10 @@ __m256i test_mm256_sll_epi16(__m256i a, __m128i b) {
}
TEST_CONSTEXPR(match_v16hi(_mm256_sll_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30));
TEST_CONSTEXPR(match_v16hi(_mm256_sll_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sll_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sll_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sll_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sll_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30));
__m256i test_mm256_sll_epi32(__m256i a, __m128i b) {
// CHECK-LABEL: test_mm256_sll_epi32
@@ -1206,6 +1210,8 @@ __m256i test_mm256_sll_epi32(__m256i a, __m128i b) {
}
TEST_CONSTEXPR(match_v8si(_mm256_sll_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 2, 4, 6, 8, 10, 12, 14));
TEST_CONSTEXPR(match_v8si(_mm256_sll_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_sll_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){0, 1, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_sll_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){1, 0, 1, 1}), 0, 2, 4, 6, 8, 10, 12, 14));
__m256i test_mm256_sll_epi64(__m256i a, __m128i b) {
// CHECK-LABEL: test_mm256_sll_epi64
@@ -1214,6 +1220,7 @@ __m256i test_mm256_sll_epi64(__m256i a, __m128i b) {
}
TEST_CONSTEXPR(match_v4di(_mm256_sll_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){1, 0}), 0, 2, 4, 6));
TEST_CONSTEXPR(match_v4di(_mm256_sll_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){64, 0}), 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4di(_mm256_sll_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){1, 1}), 0, 2, 4, 6));
__m256i test_mm256_slli_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_slli_epi16
@@ -1309,6 +1316,10 @@ __m256i test_mm256_sra_epi16(__m256i a, __m128i b) {
}
TEST_CONSTEXPR(match_v16hi(_mm256_sra_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), -16384, 16383, -2, -1, -1, 0, 0, 1, -16384, 16383, -2, -1, -1, 0, 0, 1));
TEST_CONSTEXPR(match_v16hi(_mm256_sra_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), -1, 0, -1, -1, -1, 0, 0, 0, -1, 0, -1, -1, -1, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sra_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), -1, 0, -1, -1, -1, 0, 0, 0, -1, 0, -1, -1, -1, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sra_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), -1, 0, -1, -1, -1, 0, 0, 0, -1, 0, -1, -1, -1, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sra_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), -1, 0, -1, -1, -1, 0, 0, 0, -1, 0, -1, -1, -1, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_sra_epi16((__m256i)(__v16hi){-32768, 32767, -3, -2, -1, 0, 1, 2, -32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), -16384, 16383, -2, -1, -1, 0, 0, 1, -16384, 16383, -2, -1, -1, 0, 0, 1));
__m256i test_mm256_sra_epi32(__m256i a, __m128i b) {
// CHECK-LABEL: test_mm256_sra_epi32
@@ -1317,6 +1328,8 @@ __m256i test_mm256_sra_epi32(__m256i a, __m128i b) {
}
TEST_CONSTEXPR(match_v8si(_mm256_sra_epi32((__m256i)(__v8si){-32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v4si){1, 0, 0, 0}), -16384, 16383, -2, -1, -1, 0, 0, 1));
TEST_CONSTEXPR(match_v8si(_mm256_sra_epi32((__m256i)(__v8si){-32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v4si){32, 0, 0, 0}), -1, 0, -1, -1, -1, 0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_sra_epi32((__m256i)(__v8si){-32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v4si){0, 1, 0, 0}), -1, 0, -1, -1, -1, 0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_sra_epi32((__m256i)(__v8si){-32768, 32767, -3, -2, -1, 0, 1, 2}, (__m128i)(__v4si){1, 0, 1, 1}), -16384, 16383, -2, -1, -1, 0, 0, 1));
__m256i test_mm256_srai_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_srai_epi16
@@ -1365,6 +1378,10 @@ __m256i test_mm256_srl_epi16(__m256i a, __m128i b) {
}
TEST_CONSTEXPR(match_v16hi(_mm256_srl_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7));
TEST_CONSTEXPR(match_v16hi(_mm256_srl_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_srl_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_srl_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_srl_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16hi(_mm256_srl_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7));
__m256i test_mm256_srl_epi32(__m256i a, __m128i b) {
// CHECK-LABEL: test_mm256_srl_epi32
@@ -1373,6 +1390,8 @@ __m256i test_mm256_srl_epi32(__m256i a, __m128i b) {
}
TEST_CONSTEXPR(match_v8si(_mm256_srl_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 0, 1, 1, 2, 2, 3, 3));
TEST_CONSTEXPR(match_v8si(_mm256_srl_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_srl_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){0, 1, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_srl_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v4si){1, 0, 1, 1}), 0, 0, 1, 1, 2, 2, 3, 3));
__m256i test_mm256_srl_epi64(__m256i a, __m128i b) {
// CHECK-LABEL: test_mm256_srl_epi64
@@ -1381,6 +1400,7 @@ __m256i test_mm256_srl_epi64(__m256i a, __m128i b) {
}
TEST_CONSTEXPR(match_v4di(_mm256_srl_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){1, 0}), 0, 0, 1, 1));
TEST_CONSTEXPR(match_v4di(_mm256_srl_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){64, 0}), 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4di(_mm256_srl_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m128i)(__v2di){1, 1}), 0, 0, 1, 1));
__m256i test_mm256_srli_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_srli_epi16
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 5a653cb3fa0a3..c9c30dab389db 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -2302,6 +2302,10 @@ __m512i test_mm512_sll_epi16(__m512i __A, __m128i __B) {
TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62));
TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){17, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_sll_epi16((__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62));
__m512i test_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sll_epi16
@@ -2432,6 +2436,10 @@ __m512i test_mm512_sra_epi16(__m512i __A, __m128i __B) {
TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31));
TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){17, 0, 0, 0, 0, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v32hi(_mm512_sra_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), 0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31));
__m512i test_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sra_epi16
@@ -2500,6 +2508,10 @@ __m512i test_mm512_srl_epi16(__m512i __A, __m128i __B) {
TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0, 32767, 2, 32765, 4, 32763, 6, 32761, 8, 32759, 10, 32757, 12, 32755, 14, 32753, 16, 32751, 18, 32749, 20, 32747, 22, 32745, 24, 32743, 26, 32741, 28, 32739, 30, 32737));
TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){17, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v32hi(_mm512_srl_epi16((__m512i)(__v32hi){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30, 32, -34, 36, -38, 40, -42, 44, -46, 48, -50, 52, -54, 56, -58, 60, -62}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), 0, 32767, 2, 32765, 4, 32763, 6, 32761, 8, 32759, 10, 32757, 12, 32755, 14, 32753, 16, 32751, 18, 32749, 20, 32747, 22, 32745, 24, 32743, 26, 32741, 28, 32739, 30, 32737));
__m512i test_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_srl_epi16
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 9b1dae2228eb4..6401a0e55a83b 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -6287,6 +6287,8 @@ __m512i test_mm512_sll_epi32(__m512i __A, __m128i __B) {
TEST_CONSTEXPR(match_v16si(_mm512_sll_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30));
TEST_CONSTEXPR(match_v16si(_mm512_sll_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
TEST_CONSTEXPR(match_v16si(_mm512_sll_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){33, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_sll_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){0, 1, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_sll_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m128i)(__v4si){1, 0, 1, 1}), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30));
__m512i test_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sll_epi32
@@ -6312,6 +6314,7 @@ __m512i test_mm512_sll_epi64(__m512i __A, __m128i __B) {
TEST_CONSTEXPR(match_v8di(_mm512_sll_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v2di){1, 0}), 0, 2, 4, 6, 8, 10, 12, 14));
TEST_CONSTEXPR(match_v8di(_mm512_sll_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v2di){64, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
TEST_CONSTEXPR(match_v8di(_mm512_sll_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v2di){65, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8di(_mm512_sll_epi64((__m512i)(__v8di){0, 1, 2, 3, 4, 5, 6, 7}, (__m128i)(__v2di){1, 1}), 0, 2, 4, 6, 8, 10, 12, 14));
__m512i test_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sll_epi64
@@ -6383,6 +6386,8 @@ __m512i test_mm512_sra_epi32(__m512i __A, __m128i __B) {
TEST_CONSTEXPR(match_v16si(_mm512_sra_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 0, 0}), 0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15));
TEST_CONSTEXPR(match_v16si(_mm512_sra_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){32, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
TEST_CONSTEXPR(match_v16si(_mm512_sra_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){33, 0, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v16si(_mm512_sra_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){0, 1, 0, 0}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v16si(_mm512_sra_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 1, 1}), 0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15));
__m512i test_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sra_epi32
@@ -6408,6 +6413,7 @@ __m512i test_mm512_sra_epi64(__m512i __A, __m128i __B) {
TEST_CONSTEXPR(match_v8di(_mm512_sra_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 0}), 0, -1, 2, -3, 4, -5, 6, -7));
TEST_CONSTEXPR(match_v8di(_mm512_sra_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){64, 0}), 0, -1, 0, -1, 0, -1, 0, -1));
TEST_CONSTEXPR(match_v8di(_mm512_sra_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){65, 0}), 0, -1, 0, -1, 0, -1, 0, -1));
+TEST_CONSTEXPR(match_v8di(_mm512_sra_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 1}), 0, -1, 2, -3, 4, -5, 6, -7));
__m512i test_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_sra_epi64
@@ -6479,6 +6485,8 @@ __m512i test_mm512_srl_epi32(__m512i __A, __m128i __B) {
TEST_CONSTEXPR(match_v16si(_mm512_srl_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 0, 0}), 0, 2147483647, 2, 2147483645, 4, 2147483643, 6, 2147483641, 8, 2147483639, 10, 2147483637, 12, 2147483635, 14, 2147483633));
TEST_CONSTEXPR(match_v16si(_mm512_srl_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
TEST_CONSTEXPR(match_v16si(_mm512_srl_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){33, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_srl_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){0, 1, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_srl_epi32((__m512i)(__v16si){0, -2, 4, -6, 8, -10, 12, -14, 16, -18, 20, -22, 24, -26, 28, -30}, (__m128i)(__v4si){1, 0, 1, 1}), 0, 2147483647, 2, 2147483645, 4, 2147483643, 6, 2147483641, 8, 2147483639, 10, 2147483637, 12, 2147483635, 14, 2147483633));
__m512i test_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_srl_epi32
@@ -6503,7 +6511,7 @@ __m512i test_mm512_srl_epi64(__m512i __A, __m128i __B) {
}
TEST_CONSTEXPR(match_v8di(_mm512_srl_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 0}), 0, 9223372036854775807, 2, 9223372036854775805, 4, 9223372036854775803, 6, 9223372036854775801));
TEST_CONSTEXPR(match_v8di(_mm512_srl_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){64, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
-TEST_CONSTEXPR(match_v8di(_mm512_srl_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){65, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8di(_mm512_srl_epi64((__m512i)(__v8di){0, -2, 4, -6, 8, -10, 12, -14}, (__m128i)(__v2di){1, 1}), 0, 9223372036854775807, 2, 9223372036854775805, 4, 9223372036854775803, 6, 9223372036854775801));
__m512i test_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: test_mm512_mask_srl_epi64
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index c24e8f8fdf9b2..ed1ac84b8c4a3 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -1338,6 +1338,10 @@ __m128i test_mm_sll_epi16(__m128i A, __m128i B) {
}
TEST_CONSTEXPR(match_v8hi(_mm_sll_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 2, 4, 6, 8, 10, 12, 14, 16));
TEST_CONSTEXPR(match_v8hi(_mm_sll_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sll_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sll_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sll_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sll_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), 2, 4, 6, 8, 10, 12, 14, 16));
__m128i test_mm_sll_epi32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_sll_epi32
@@ -1346,6 +1350,8 @@ __m128i test_mm_sll_epi32(__m128i A, __m128i B) {
}
TEST_CONSTEXPR(match_v4si(_mm_sll_epi32((__m128i)(__v4si){1, 2, 3, 4}, (__m128i)(__v4si){1, 0, 0, 0}), 2, 4, 6, 8));
TEST_CONSTEXPR(match_v4si(_mm_sll_epi32((__m128i)(__v4si){1, 2, 3, 4}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4si(_mm_sll_epi32((__m128i)(__v4si){1, 2, 3, 4}, (__m128i)(__v4si){0, 1, 0, 0}), 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4si(_mm_sll_epi32((__m128i)(__v4si){1, 2, 3, 4}, (__m128i)(__v4si){1, 0, 1, 1}), 2, 4, 6, 8));
__m128i test_mm_sll_epi64(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_sll_epi64
@@ -1354,6 +1360,7 @@ __m128i test_mm_sll_epi64(__m128i A, __m128i B) {
}
TEST_CONSTEXPR(match_v2di(_mm_sll_epi64((__m128i)(__v2di){1, 2}, (__m128i)(__v2di){1, 0}), 2, 4));
TEST_CONSTEXPR(match_v2di(_mm_sll_epi64((__m128i)(__v2di){1, 2}, (__m128i)(__v2di){64, 0}), 0, 0));
+TEST_CONSTEXPR(match_v2di(_mm_sll_epi64((__m128i)(__v2di){1, 2}, (__m128i)(__v2di){1, 1}), 2, 4));
__m128i test_mm_slli_epi16(__m128i A) {
// CHECK-LABEL: test_mm_slli_epi16
@@ -1460,6 +1467,10 @@ __m128i test_mm_sra_epi16(__m128i A, __m128i B) {
}
TEST_CONSTEXPR(match_v8hi(_mm_sra_epi16((__m128i)(__v8hi){-16, 16, -8, 8, -4, 4, -2, 2}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), -8, 8, -4, 4, -2, 2, -1, 1));
TEST_CONSTEXPR(match_v8hi(_mm_sra_epi16((__m128i)(__v8hi){-16, 16, -8, 8, -4, 4, -2, 2}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), -1, 0, -1, 0, -1, 0, -1, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sra_epi16((__m128i)(__v8hi){-16, 16, -8, 8, -4, 4, -2, 2}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), -1, 0, -1, 0, -1, 0, -1, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sra_epi16((__m128i)(__v8hi){-16, 16, -8, 8, -4, 4, -2, 2}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), -1, 0, -1, 0, -1, 0, -1, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sra_epi16((__m128i)(__v8hi){-16, 16, -8, 8, -4, 4, -2, 2}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), -1, 0, -1, 0, -1, 0, -1, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_sra_epi16((__m128i)(__v8hi){-16, 16, -8, 8, -4, 4, -2, 2}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), -8, 8, -4, 4, -2, 2, -1, 1));
__m128i test_mm_sra_epi32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_sra_epi32
@@ -1468,6 +1479,8 @@ __m128i test_mm_sra_epi32(__m128i A, __m128i B) {
}
TEST_CONSTEXPR(match_v4si(_mm_sra_epi32((__m128i)(__v4si){-16, 16, -8, 8}, (__m128i)(__v4si){1, 0, 0, 0}), -8, 8, -4, 4));
TEST_CONSTEXPR(match_v4si(_mm_sra_epi32((__m128i)(__v4si){-16, 16, -8, 8}, (__m128i)(__v4si){32, 0, 0, 0}), -1, 0, -1, 0));
+TEST_CONSTEXPR(match_v4si(_mm_sra_epi32((__m128i)(__v4si){-16, 16, -8, 8}, (__m128i)(__v4si){0, 1, 0, 0}), -1, 0, -1, 0));
+TEST_CONSTEXPR(match_v4si(_mm_sra_epi32((__m128i)(__v4si){-16, 16, -8, 8}, (__m128i)(__v4si){1, 0, 1, 1}), -8, 8, -4, 4));
__m128i test_mm_srai_epi16(__m128i A) {
// CHECK-LABEL: test_mm_srai_epi16
@@ -1515,6 +1528,10 @@ __m128i test_mm_srl_epi16(__m128i A, __m128i B) {
}
TEST_CONSTEXPR(match_v8hu(_mm_srl_epi16((__m128i)(__v8hu){0x8000, 16, 8, 4, 2, 1, 0, 0xFFFF}, (__m128i)(__v8hi){1, 0, 0, 0, 0, 0, 0, 0}), 0x4000, 8, 4, 2, 1, 0, 0, 0x7FFF));
TEST_CONSTEXPR(match_v8hi(_mm_srl_epi16((__m128i)(__v8hi){-1, 16, 8, 4, 2, 1, 0, -1}, (__m128i)(__v8hi){16, 0, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_srl_epi16((__m128i)(__v8hi){-1, 16, 8, 4, 2, 1, 0, -1}, (__m128i)(__v8hi){0, 1, 0, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_srl_epi16((__m128i)(__v8hi){-1, 16, 8, 4, 2, 1, 0, -1}, (__m128i)(__v8hi){0, 0, 1, 0, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hi(_mm_srl_epi16((__m128i)(__v8hi){-1, 16, 8, 4, 2, 1, 0, -1}, (__m128i)(__v8hi){0, 0, 0, 1, 0, 0, 0, 0}), 0, 0, 0, 0, 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v8hu(_mm_srl_epi16((__m128i)(__v8hu){0x8000, 16, 8, 4, 2, 1, 0, 0xFFFF}, (__m128i)(__v8hi){1, 0, 0, 0, 1, 1, 1, 1}), 0x4000, 8, 4, 2, 1, 0, 0, 0x7FFF));
__m128i test_mm_srl_epi32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_srl_epi32
@@ -1523,6 +1540,8 @@ __m128i test_mm_srl_epi32(__m128i A, __m128i B) {
}
TEST_CONSTEXPR(match_v4su(_mm_srl_epi32((__m128i)(__v4su){0x80000000, 16, 8, 4}, (__m128i)(__v4si){1, 0, 0, 0}), 0x40000000, 8, 4, 2));
TEST_CONSTEXPR(match_v4si(_mm_srl_epi32((__m128i)(__v4si){-1, 16, 8, 4}, (__m128i)(__v4si){32, 0, 0, 0}), 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4si(_mm_srl_epi32((__m128i)(__v4si){-1, 16, 8, 4}, (__m128i)(__v4si){0, 1, 0, 0}), 0, 0, 0, 0));
+TEST_CONSTEXPR(match_v4su(_mm_srl_epi32((__m128i)(__v4su){0x80000000, 16, 8, 4}, (__m128i)(__v4si){1, 0, 1, 1}), 0x40000000, 8, 4, 2));
__m128i test_mm_srl_epi64(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_srl_epi64
@@ -1531,6 +1550,7 @@ __m128i test_mm_srl_epi64(__m128i A, __m128i B) {
}
TEST_CONSTEXPR(match_v2du(_mm_srl_epi64((__m128i)(__v2du){0x8000000000000000ULL, 16}, (__m128i)(__v2di){1, 0}), 0x4000000000000000ULL, 8));
TEST_CONSTEXPR(match_v2di(_mm_srl_epi64((__m128i)(__v2di){-1, 16}, (__m128i)(__v2di){64, 0}), 0, 0));
+TEST_CONSTEXPR(match_v2du(_mm_srl_epi64((__m128i)(__v2du){0x8000000000000000ULL, 16}, (__m128i)(__v2di){1, 1}), 0x4000000000000000ULL, 8));
__m128i test_mm_srli_epi16(__m128i A) {
// CHECK-LABEL: test_mm_srli_epi16
>From e0b578d919752d268c0ad51ac832d393ced3d2d6 Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Thu, 27 Nov 2025 05:46:07 -0800
Subject: [PATCH 5/5] Remove undef for __anyext128
---
clang/lib/Headers/mmintrin.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index 4829f9ec69cb8..2cf46455d7915 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -1475,7 +1475,6 @@ _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
}
-#undef __anyext128
#undef __trunc64
#undef __DEFAULT_FN_ATTRS_SSE2
More information about the cfe-commits
mailing list