[clang] [X86] Add constexpr handling for XOP/AVX512 rotate by immediate intrinsics (PR #156047)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Aug 29 08:31:36 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
---
Patch is 28.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156047.diff
6 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsX86.td (+10-7)
- (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+26)
- (modified) clang/lib/AST/ExprConstant.cpp (+56)
- (modified) clang/test/CodeGen/X86/avx512f-builtins.c (+12-5)
- (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+24-1)
- (modified) clang/test/CodeGen/X86/xop-builtins.c (+4)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index dfff92839da4e..3cac91cf80f5d 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -934,10 +934,6 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vphsubwd : X86Builtin<"_Vector<4, int>(_Vector<8, short>)">;
def vphsubdq : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>)">;
def vpperm : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
- def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
- def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
- def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
- def vprotqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">;
def vpshlb : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
def vpshlw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def vpshld : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
@@ -962,6 +958,13 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vfrczpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
}
+let Features = "xop", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
+ def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
+ def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
+ def vprotqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">;
+}
+
let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vpermil2pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, long long int>, _Constant char)">;
def vpermil2ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, int>, _Constant char)">;
@@ -2045,21 +2048,21 @@ let Features = "avx512dq,evex512", Attributes = [NoThrow, Const, RequiredVectorW
def reduceps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
}
-let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def prold512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def prord512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def prolq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
def prorq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def prold128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def prord128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def prolq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
def prorq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def prold256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
def prord256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
def prolq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 1e7736955c065..c34d768f96211 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3325,6 +3325,32 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return LHS.lshr(RHS.getZExtValue());
});
+ case clang::X86::BI__builtin_ia32_vprotbi:
+ case clang::X86::BI__builtin_ia32_vprotdi:
+ case clang::X86::BI__builtin_ia32_vprotqi:
+ case clang::X86::BI__builtin_ia32_vprotwi:
+ case clang::X86::BI__builtin_ia32_prold128:
+ case clang::X86::BI__builtin_ia32_prold256:
+ case clang::X86::BI__builtin_ia32_prold512:
+ case clang::X86::BI__builtin_ia32_prolq128:
+ case clang::X86::BI__builtin_ia32_prolq256:
+ case clang::X86::BI__builtin_ia32_prolq512:
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, BuiltinID, [](const APSInt &LHS, const APSInt &RHS) {
+ return LHS.rotl(RHS);
+ });
+
+ case clang::X86::BI__builtin_ia32_prord128:
+ case clang::X86::BI__builtin_ia32_prord256:
+ case clang::X86::BI__builtin_ia32_prord512:
+ case clang::X86::BI__builtin_ia32_prorq128:
+ case clang::X86::BI__builtin_ia32_prorq256:
+ case clang::X86::BI__builtin_ia32_prorq512:
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, BuiltinID, [](const APSInt &LHS, const APSInt &RHS) {
+ return LHS.rotr(RHS);
+ });
+
case Builtin::BI__builtin_elementwise_max:
case Builtin::BI__builtin_elementwise_min:
return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3ca4ac4b92ba7..b4f1e76187e25 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11835,6 +11835,62 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case clang::X86::BI__builtin_ia32_vprotbi:
+ case clang::X86::BI__builtin_ia32_vprotdi:
+ case clang::X86::BI__builtin_ia32_vprotqi:
+ case clang::X86::BI__builtin_ia32_vprotwi:
+ case clang::X86::BI__builtin_ia32_prold128:
+ case clang::X86::BI__builtin_ia32_prold256:
+ case clang::X86::BI__builtin_ia32_prold512:
+ case clang::X86::BI__builtin_ia32_prolq128:
+ case clang::X86::BI__builtin_ia32_prolq256:
+ case clang::X86::BI__builtin_ia32_prolq512: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen);
+
+ APSInt RHS = SourceRHS.getInt();
+
+ for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+ const APSInt &LHS = SourceLHS.getVectorElt(EltNum).getInt();
+ ResultElements.push_back(APValue(APSInt(LHS.rotl(RHS), DestUnsigned)));
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+ case clang::X86::BI__builtin_ia32_prord128:
+ case clang::X86::BI__builtin_ia32_prord256:
+ case clang::X86::BI__builtin_ia32_prord512:
+ case clang::X86::BI__builtin_ia32_prorq128:
+ case clang::X86::BI__builtin_ia32_prorq256:
+ case clang::X86::BI__builtin_ia32_prorq512: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen);
+
+ APSInt RHS = SourceRHS.getInt();
+
+ for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+ const APSInt &LHS = SourceLHS.getVectorElt(EltNum).getInt();
+ ResultElements.push_back(APValue(APSInt(LHS.rotr(RHS), DestUnsigned)));
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case Builtin::BI__builtin_elementwise_max:
case Builtin::BI__builtin_elementwise_min: {
APValue SourceLHS, SourceRHS;
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index bddc5d11818ef..a109dad4ebdcc 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -4004,7 +4004,6 @@ __m512i test_mm512_cvtepu32_epi64(__m256i __X) {
// CHECK: zext <8 x i32> %{{.*}} to <8 x i64>
return _mm512_cvtepu32_epi64(__X);
}
-
TEST_CONSTEXPR(match_v8di(_mm512_cvtepu32_epi64(_mm256_setr_epi32(-70000, 2, -1, 0, 1, -2, 3, -4)), 4294897296, 2, 4294967295, 0, 1, 4294967294, 3, 4294967292));
__m512i test_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
@@ -4026,7 +4025,6 @@ __m512i test_mm512_cvtepu16_epi32(__m256i __A) {
// CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
return _mm512_cvtepu16_epi32(__A);
}
-
TEST_CONSTEXPR(match_v16si(_mm512_cvtepu16_epi32(_mm256_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 65236, 2, 65535, 0, 1, 65534, 3, 65532, 5, 65530, 7, 65528, 9, 65526, 11, 65524));
__m512i test_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
@@ -4048,7 +4046,6 @@ __m512i test_mm512_cvtepu16_epi64(__m128i __A) {
// CHECK: zext <8 x i16> %{{.*}} to <8 x i64>
return _mm512_cvtepu16_epi64(__A);
}
-
TEST_CONSTEXPR(match_v8di(_mm512_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0, 1, 65534, 3, 65532));
__m512i test_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
@@ -4065,12 +4062,12 @@ __m512i test_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) {
return _mm512_maskz_cvtepu16_epi64(__U, __A);
}
-
__m512i test_mm512_rol_epi32(__m512i __A) {
// CHECK-LABEL: test_mm512_rol_epi32
// CHECK: @llvm.fshl.v16i32
return _mm512_rol_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_rol_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 96, -97, -129, 192, -193, 256, 288, -289, 352, -353, 416, -417, 480, -481));
__m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_rol_epi32
@@ -4078,6 +4075,7 @@ __m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_rol_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_rol_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 99, 99, -129, 192, -193, 99, 99, 99, 99, -353, 99, 99, 480, -481));
__m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_rol_epi32
@@ -4085,12 +4083,14 @@ __m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_rol_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_rol_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 96, -97, 0, 0, 0, 256, 288, -289, 352, 0, 416, -417, 0, 0));
__m512i test_mm512_rol_epi64(__m512i __A) {
// CHECK-LABEL: test_mm512_rol_epi64
// CHECK: @llvm.fshl.v8i64
return _mm512_rol_epi64(__A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_rol_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, -97, -129, 192, -193, 256));
__m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_rol_epi64
@@ -4098,6 +4098,7 @@ __m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_rol_epi64(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_rol_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 99, 99, -129, 192, -193, 99));
__m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_rol_epi64
@@ -4105,6 +4106,7 @@ __m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_rol_epi64(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_rol_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, 0, -129, 192, 0, 0));
__m512i test_mm512_rolv_epi32(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_rolv_epi32
@@ -4151,6 +4153,7 @@ __m512i test_mm512_ror_epi32(__m512i __A) {
// CHECK: @llvm.fshr.v16i32
return _mm512_ror_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_ror_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 402653184, -402653185, -536870913, 805306368, -805306369, 1073741824, 1207959552, -1207959553, 1476395008, -1476395009, 1744830464, -1744830465, 2013265920, -2013265921));
__m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_ror_epi32
@@ -4158,6 +4161,7 @@ __m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_ror_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_ror_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 99, 99, -536870913, 805306368, -805306369, 99, 99, 99, 99, -1476395009, 99, 99, 2013265920, -2013265921));
__m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_ror_epi32
@@ -4165,12 +4169,14 @@ __m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_ror_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_ror_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 402653184, -402653185, 0, 0, 0, 1073741824, 1207959552, -1207959553, 1476395008, 0, 1744830464, -1744830465, 0, 0));
__m512i test_mm512_ror_epi64(__m512i __A) {
// CHECK-LABEL: test_mm512_ror_epi64
// CHECK: @llvm.fshr.v8i64
return _mm512_ror_epi64(__A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_ror_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, -1729382256910270465LL, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 4611686018427387904LL));
__m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_ror_epi64
@@ -4178,6 +4184,7 @@ __m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_ror_epi64(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_ror_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 99, 99, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 99));
__m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_ror_epi64
@@ -4185,7 +4192,7 @@ __m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_ror_epi64(__U, __A, 5);
}
-
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_ror_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, 0, -2305843009213693953LL, 3458764513820540928LL, 0, 0));
__m512i test_mm512_rorv_epi32(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_rorv_epi32
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 477326f35518a..78e01b944ec5d 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -5666,6 +5666,7 @@ __m128i test_mm_rol_epi32(__m128i __A) {
// CHECK: @llvm.fshl.v4i32
return _mm_rol_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v4si(_mm_rol_epi32(((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 96, -97));
__m128i test_mm_mask_rol_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_rol_epi32
@@ -5673,6 +5674,7 @@ __m128i test_mm_mask_rol_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_rol_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v4si(_mm_mask_rol_epi32(((__m128i)(__v4si){99, 99, 99, 99}), 0x3, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 99, 99));
__m128i test_mm_maskz_rol_epi32(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_rol_epi32
@@ -5680,12 +5682,14 @@ __m128i test_mm_maskz_rol_epi32(__mmask8 __U, __m128i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_rol_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v4si(_mm_maskz_rol_epi32(0x9, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, 0, 0, -97));
__m256i test_mm256_rol_epi32(__m256i __A) {
// CHECK-LABEL: test_mm256_rol_epi32
// CHECK: @llvm.fshl.v8i32
return _mm256_rol_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_rol_epi32(((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, -97, -129, 192, -193, 256));
__m256i test_mm256_mask_rol_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_mask_rol_epi32
@@ -5693,6 +5697,7 @@ __m256i test_mm256_mask_rol_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_rol_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mask_rol_epi32(((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 99, 99, -129, 192, -193, 99));
__m256i test_mm256_maskz_rol_epi32(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_maskz_rol_epi32
@@ -5700,12 +5705,14 @@ __m256i test_mm256_maskz_rol_epi32(__mmask8 __U, __m256i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_rol_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_rol_epi32(0x37, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, 0, -129, 192, 0, 0));
__m128i test_mm_rol_epi64(__m128i __A) {
// CHECK-LABEL: test_mm_rol_epi64
// CHECK: @llvm.fshl.v2i64
return _mm_rol_epi64(__A, 5);
}
+TEST_CONSTEXPR(match_v2di(_mm_rol_epi64(((__m128i)(__v2di){10, -11}), 19), 5242880, -5242881));
__m128i test_mm_mask_rol_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_rol_epi64
@@ -5713,6 +5720,7 @@ __m128i test_mm_mask_rol_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_rol_epi64(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_rol_epi64(((__m128i)(__v2di){99, 99}), 0x1, ((__m128i)(__v2di){10, -11}), 19), 5242880, 99));
__m128i test_mm_maskz_rol_epi64(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_rol_epi64
@@ -5720,12 +5728,14 @@ __m128i test_mm_maskz_rol_epi64(__mmask8 __U, __m128i __A) {
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_maskz_rol_epi64(__U, __...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/156047
More information about the cfe-commits
mailing list