[clang] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow PSLL/PSRA/PSRL var intrinsics to be used in constexpr (PR #169276)
via cfe-commits
cfe-commits at lists.llvm.org
Sun Nov 23 21:09:25 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: NagaChaitanya Vellanki (chaitanyav)
<details>
<summary>Changes</summary>
Resolves:#<!-- -->169176
---
Patch is 88.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169276.diff
13 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsX86.td (+24-36)
- (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+105)
- (modified) clang/lib/AST/ExprConstant.cpp (+106)
- (modified) clang/lib/Headers/avx2intrin.h (+16-24)
- (modified) clang/lib/Headers/avx512bwintrin.h (+18-27)
- (modified) clang/lib/Headers/avx512fintrin.h (+36-54)
- (modified) clang/lib/Headers/avx512vlintrin.h (+52-78)
- (modified) clang/lib/Headers/emmintrin.h (+16-16)
- (modified) clang/test/CodeGen/X86/avx2-builtins.c (+34)
- (modified) clang/test/CodeGen/X86/avx512bw-builtins.c (+21-6)
- (modified) clang/test/CodeGen/X86/avx512f-builtins.c (+42-12)
- (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+34-7)
- (modified) clang/test/CodeGen/X86/sse2-builtins.c (+16)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 4aa3d51931980..253eb3cbd7ee9 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -214,17 +214,6 @@ let Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
def _mm_pause : X86LibBuiltin<"void()">;
}
-let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
- def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
- def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
- def psrld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
- def psrlq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
- def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
- def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
- def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def movmskpd : X86Builtin<"int(_Vector<2, double>)">;
def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">;
@@ -265,6 +254,15 @@ let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">;
def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">;
def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">;
+
+ def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+ def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+ def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+ def psrld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+ def psrlq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+ def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
+ def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
}
let Features = "sse3", Attributes = [NoThrow] in {
@@ -585,14 +583,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def psadbw256
: X86Builtin<
"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
- def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
- def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
- def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
- def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
- def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
- def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
- def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
- def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
@@ -669,6 +659,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
+
+ def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
+ def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
+ def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
+ def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
+ def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
+ def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
+ def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
+ def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
}
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
@@ -1930,16 +1929,13 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
def prorq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
-}
-
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pmaddubsw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, _Vector<64, char>)">;
def pmaddwd512 : X86Builtin<"_Vector<16, int>(_Vector<32, short>, _Vector<32, short>)">;
def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
+ def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
}
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
@@ -1995,7 +1991,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
def psravq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psraw512
: X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
def psrlw512
@@ -2312,25 +2308,17 @@ let Features = "avx512f",
def psraqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psraq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512vl",
- Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psraqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">;
}
-let Features = "avx512vl",
- Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
def psraqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pslld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;
def psllq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">;
def psrad512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 83e40f64fd979..1b1866034b50d 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3468,6 +3468,69 @@ static bool interp__builtin_ia32_shuffle_generic(
return true;
}
+static bool interp__builtin_ia32_shift_with_count(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp,
+ llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) {
+
+ assert(Call->getNumArgs() == 2);
+
+ const Pointer &Count = S.Stk.pop<Pointer>();
+ const Pointer &Source = S.Stk.pop<Pointer>();
+
+ QualType SourceType = Call->getArg(0)->getType();
+ QualType CountType = Call->getArg(1)->getType();
+ assert(SourceType->isVectorType() && CountType->isVectorType());
+
+ const auto *SourceVecT = SourceType->castAs<VectorType>();
+ const auto *CountVecT = CountType->castAs<VectorType>();
+ PrimType SourceElemT = *S.getContext().classify(SourceVecT->getElementType());
+ PrimType CountElemT = *S.getContext().classify(CountVecT->getElementType());
+
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ unsigned DestEltWidth =
+ S.getASTContext().getTypeSize(SourceVecT->getElementType());
+ bool IsDestUnsigned = SourceVecT->getElementType()->isUnsignedIntegerType();
+ unsigned DestLen = SourceVecT->getNumElements();
+ unsigned CountEltWidth =
+ S.getASTContext().getTypeSize(CountVecT->getElementType());
+ unsigned NumBitsInQWord = 64;
+ unsigned NumCountElts = NumBitsInQWord / CountEltWidth;
+
+ uint64_t CountLQWord = 0;
+ for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) {
+ uint64_t Elt = 0;
+ INT_TYPE_SWITCH(CountElemT,
+ { Elt = static_cast<uint64_t>(Count.elem<T>(EltIdx)); });
+ CountLQWord |= (Elt << (EltIdx * CountEltWidth));
+ }
+
+ for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) {
+ APSInt Elt;
+ INT_TYPE_SWITCH(SourceElemT, { Elt = Source.elem<T>(EltIdx).toAPSInt(); });
+
+ APInt Result;
+ if (CountLQWord < DestEltWidth) {
+ Result = ShiftOp(Elt, CountLQWord);
+ } else {
+ Result = OverflowOp(Elt, DestEltWidth);
+ }
+ if (IsDestUnsigned) {
+ INT_TYPE_SWITCH(SourceElemT, {
+ Dst.elem<T>(EltIdx) = T::from(Result.getZExtValue());
+ });
+ } else {
+ INT_TYPE_SWITCH(SourceElemT, {
+ Dst.elem<T>(EltIdx) = T::from(Result.getSExtValue());
+ });
+ }
+ }
+
+ Dst.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
@@ -4826,6 +4889,48 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_phminposuw128:
return interp__builtin_ia32_phminposuw(S, OpPC, Call);
+ case X86::BI__builtin_ia32_psraq128:
+ case X86::BI__builtin_ia32_psraq256:
+ case X86::BI__builtin_ia32_psraq512:
+ case X86::BI__builtin_ia32_psrad128:
+ case X86::BI__builtin_ia32_psrad256:
+ case X86::BI__builtin_ia32_psrad512:
+ case X86::BI__builtin_ia32_psraw128:
+ case X86::BI__builtin_ia32_psraw256:
+ case X86::BI__builtin_ia32_psraw512:
+ return interp__builtin_ia32_shift_with_count(
+ S, OpPC, Call,
+ [](const APInt &Elt, uint64_t Count) { return Elt.ashr(Count); },
+ [](const APInt &Elt, unsigned Width) { return Elt.ashr(Width - 1); });
+
+ case X86::BI__builtin_ia32_psllq128:
+ case X86::BI__builtin_ia32_psllq256:
+ case X86::BI__builtin_ia32_psllq512:
+ case X86::BI__builtin_ia32_pslld128:
+ case X86::BI__builtin_ia32_pslld256:
+ case X86::BI__builtin_ia32_pslld512:
+ case X86::BI__builtin_ia32_psllw128:
+ case X86::BI__builtin_ia32_psllw256:
+ case X86::BI__builtin_ia32_psllw512:
+ return interp__builtin_ia32_shift_with_count(
+ S, OpPC, Call,
+ [](const APInt &Elt, uint64_t Count) { return Elt.shl(Count); },
+ [](const APInt &Elt, unsigned Width) { return APInt::getZero(Width); });
+
+ case X86::BI__builtin_ia32_psrlq128:
+ case X86::BI__builtin_ia32_psrlq256:
+ case X86::BI__builtin_ia32_psrlq512:
+ case X86::BI__builtin_ia32_psrld128:
+ case X86::BI__builtin_ia32_psrld256:
+ case X86::BI__builtin_ia32_psrld512:
+ case X86::BI__builtin_ia32_psrlw128:
+ case X86::BI__builtin_ia32_psrlw256:
+ case X86::BI__builtin_ia32_psrlw512:
+ return interp__builtin_ia32_shift_with_count(
+ S, OpPC, Call,
+ [](const APInt &Elt, uint64_t Count) { return Elt.lshr(Count); },
+ [](const APInt &Elt, unsigned Width) { return APInt::getZero(Width); });
+
case X86::BI__builtin_ia32_pternlogd128_mask:
case X86::BI__builtin_ia32_pternlogd256_mask:
case X86::BI__builtin_ia32_pternlogd512_mask:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3b91678f7d400..7e86f1252a23d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12166,6 +12166,52 @@ static bool evalShuffleGeneric(
return true;
}
+static bool evalShiftWithCount(
+ EvalInfo &Info, const CallExpr *Call, APValue &Out,
+ llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp,
+ llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) {
+
+ APValue Source, Count;
+ if (!EvaluateAsRValue(Info, Call->getArg(0), Source) ||
+ !EvaluateAsRValue(Info, Call->getArg(1), Count))
+ return false;
+
+ assert(Call->getNumArgs() == 2);
+
+ QualType SourceTy = Call->getArg(0)->getType();
+ QualType CountTy = Call->getArg(1)->getType();
+ assert(SourceTy->isVectorType() && CountTy->isVectorType());
+
+ QualType DestEltTy = SourceTy->castAs<VectorType>()->getElementType();
+ unsigned DestEltWidth = Source.getVectorElt(0).getInt().getBitWidth();
+ unsigned DestLen = Source.getVectorLength();
+ bool IsDestUnsigned = DestEltTy->isUnsignedIntegerType();
+ unsigned CountEltWidth = Count.getVectorElt(0).getInt().getBitWidth();
+ unsigned NumBitsInQWord = 64;
+ unsigned NumCountElts = NumBitsInQWord / CountEltWidth;
+ SmallVector<APValue, 64> Result;
+ Result.reserve(DestLen);
+
+ uint64_t CountLQWord = 0;
+ for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) {
+ uint64_t Elt = Count.getVectorElt(EltIdx).getInt().getZExtValue();
+ CountLQWord |= (Elt << (EltIdx * CountEltWidth));
+ }
+
+ for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) {
+ APInt Elt = Source.getVectorElt(EltIdx).getInt();
+ if (CountLQWord < DestEltWidth) {
+ Result.push_back(
+ APValue(APSInt(ShiftOp(Elt, CountLQWord), IsDestUnsigned)));
+ } else {
+ Result.push_back(
+ APValue(APSInt(OverflowOp(Elt, DestEltWidth), IsDestUnsigned)));
+ }
+ }
+ Out = APValue(Result.data(), Result.size());
+ return true;
+}
+
bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!IsConstantEvaluatedBuiltinCall(E))
return ExprEvaluatorBaseTy::VisitCallExpr(E);
@@ -13130,6 +13176,66 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(Result.data(), Result.size()), E);
}
+ case X86::BI__builtin_ia32_psraq128:
+ case X86::BI__builtin_ia32_psraq256:
+ case X86::BI__builtin_ia32_psraq512:
+ case X86::BI__builtin_ia32_psrad128:
+ case X86::BI__builtin_ia32_psrad256:
+ case X86::BI__builtin_ia32_psrad512:
+ case X86::BI__builtin_ia32_psraw128:
+ case X86::BI__builtin_ia32_psraw256:
+ case X86::BI__builtin_ia32_psraw512: {
+ APValue R;
+ if (!evalShiftWithCount(
+ Info, E, R,
+ [](const APInt &Elt, uint64_t Count) { return Elt.ashr(Count); },
+ [](const APInt &Elt, unsigned Width) {
+ return Elt.ashr(Width - 1);
+ }))
+ return false;
+ return Success(R, E);
+ }
+
+ case X86::BI__builtin_ia32_psllq128:
+ case X86::BI__builtin_ia32_psllq256:
+ case X86::BI__builtin_ia32_psllq512:
+ case X86::BI__builtin_ia32_pslld128:
+ case X86::BI__builtin_ia32_pslld256:
+ case X86::BI__builtin_ia32_pslld512:
+ case X86::BI__builtin_ia32_psllw128:
+ case X86::BI__builtin_ia32_psllw256:
+ case X86::BI__builtin_ia32_psllw512: {
+ APValue R;
+ if (!evalShiftWithCount(
+ Info, E, R,
+ [](const APInt &Elt, uint64_t Count) { return Elt.shl(Count); },
+ [](const APInt &Elt, unsigned Width) {
+ return APInt::getZero(Width);
+ }))
+ return false;
+ return Success(R, E);
+ }
+
+ case X86::BI__builtin_ia32_psrlq128:
+ case X86::BI__builtin_ia32_psrlq256:
+ case X86::BI__builtin_ia32_psrlq512:
+ case X86::BI__builtin_ia32_psrld128:
+ case X86::BI__builtin_ia32_psrld256:
+ case X86::BI__builtin_ia32_psrld512:
+ case X86::BI__builtin_ia32_psrlw128:
+ case X86::BI__builtin_ia32_psrlw256:
+ case X86::BI__builtin_ia32_psrlw512: {
+ APValue R;
+ if (!evalShiftWithCount(
+ Info, E, R,
+ [](const APInt &Elt, uint64_t Count) { return Elt.lshr(Count); },
+ [](const APInt &Elt, unsigned Width) {
+ return APInt::getZero(Width);
+ }))
+ return false;
+ return Success(R, E);
+ }
+
case X86::BI__builtin_ia32_pternlogd128_mask:
case X86::BI__builtin_ia32_pternlogd256_mask:
case X86::BI__builtin_ia32_pternlogd512_mask:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 3e3c13d8bd662..d3ceb2327ac62 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -2095,9 +2095,8 @@ _mm256_slli_epi16(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sll_epi16(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sll_epi16(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);
}
@@ -2134,9 +2133,8 @@ _mm256_slli_epi32(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sll_epi32(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sll_epi32(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);
}
@@ -2173,9 +2171,8 @@ _mm256_slli_epi64(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sll_epi64(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sll_epi64(__m256i __a, __m128i __count) {
return __builtin_ia32_psllq256((__v4di)__a, __count);
}
@@ -2214,9 +2211,8 @@ _mm256_srai_epi16(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sra_epi16(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sra_epi16(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);
}
@@ -2255,9 +2251,8 @@ _mm256_srai_epi32(__m256i __a, int __count) {
/// A 128-bit vector of [2 x i64] whose lower element gives the unsigned
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/169276
More information about the cfe-commits
mailing list