[clang] aa491fc - [X86] Add constexpr handling for XOP/AVX512 rotate by immediate intrinsics (#156047)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Aug 30 00:35:47 PDT 2025
Author: Simon Pilgrim
Date: 2025-08-30T07:35:43Z
New Revision: aa491fceb696255372485ab53a1ec73ee3198fcf
URL: https://github.com/llvm/llvm-project/commit/aa491fceb696255372485ab53a1ec73ee3198fcf
DIFF: https://github.com/llvm/llvm-project/commit/aa491fceb696255372485ab53a1ec73ee3198fcf.diff
LOG: [X86] Add constexpr handling for XOP/AVX512 rotate by immediate intrinsics (#156047)
Added:
Modified:
clang/include/clang/Basic/BuiltinsX86.td
clang/lib/AST/ByteCode/InterpBuiltin.cpp
clang/lib/AST/ExprConstant.cpp
clang/test/CodeGen/X86/avx512f-builtins.c
clang/test/CodeGen/X86/avx512vl-builtins.c
clang/test/CodeGen/X86/xop-builtins.c
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 4ddad63db283b..acd8f70c4a5f2 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -878,10 +878,6 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vphsubwd : X86Builtin<"_Vector<4, int>(_Vector<8, short>)">;
def vphsubdq : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>)">;
def vpperm : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
- def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
- def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
- def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
- def vprotqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">;
def vpshlb : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
def vpshlw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def vpshld : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
@@ -906,6 +902,13 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vfrczpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
}
+let Features = "xop", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
+ def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
+ def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
+ def vprotqi : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant char)">;
+}
+
let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vpermil2pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, long long int>, _Constant char)">;
def vpermil2ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, int>, _Constant char)">;
@@ -1989,21 +1992,21 @@ let Features = "avx512dq,evex512", Attributes = [NoThrow, Const, RequiredVectorW
def reduceps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
}
-let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def prold512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def prord512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def prolq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
def prorq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def prold128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def prord128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def prolq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
def prorq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def prold256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
def prord256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
def prolq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 1e7736955c065..e05b1a88c15bb 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3325,6 +3325,30 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return LHS.lshr(RHS.getZExtValue());
});
+ case clang::X86::BI__builtin_ia32_vprotbi:
+ case clang::X86::BI__builtin_ia32_vprotdi:
+ case clang::X86::BI__builtin_ia32_vprotqi:
+ case clang::X86::BI__builtin_ia32_vprotwi:
+ case clang::X86::BI__builtin_ia32_prold128:
+ case clang::X86::BI__builtin_ia32_prold256:
+ case clang::X86::BI__builtin_ia32_prold512:
+ case clang::X86::BI__builtin_ia32_prolq128:
+ case clang::X86::BI__builtin_ia32_prolq256:
+ case clang::X86::BI__builtin_ia32_prolq512:
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, BuiltinID,
+ [](const APSInt &LHS, const APSInt &RHS) { return LHS.rotl(RHS); });
+
+ case clang::X86::BI__builtin_ia32_prord128:
+ case clang::X86::BI__builtin_ia32_prord256:
+ case clang::X86::BI__builtin_ia32_prord512:
+ case clang::X86::BI__builtin_ia32_prorq128:
+ case clang::X86::BI__builtin_ia32_prorq256:
+ case clang::X86::BI__builtin_ia32_prorq512:
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, BuiltinID,
+ [](const APSInt &LHS, const APSInt &RHS) { return LHS.rotr(RHS); });
+
case Builtin::BI__builtin_elementwise_max:
case Builtin::BI__builtin_elementwise_min:
return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3ca4ac4b92ba7..b4f1e76187e25 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11835,6 +11835,62 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case clang::X86::BI__builtin_ia32_vprotbi:
+ case clang::X86::BI__builtin_ia32_vprotdi:
+ case clang::X86::BI__builtin_ia32_vprotqi:
+ case clang::X86::BI__builtin_ia32_vprotwi:
+ case clang::X86::BI__builtin_ia32_prold128:
+ case clang::X86::BI__builtin_ia32_prold256:
+ case clang::X86::BI__builtin_ia32_prold512:
+ case clang::X86::BI__builtin_ia32_prolq128:
+ case clang::X86::BI__builtin_ia32_prolq256:
+ case clang::X86::BI__builtin_ia32_prolq512: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen);
+
+ APSInt RHS = SourceRHS.getInt();
+
+ for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+ const APSInt &LHS = SourceLHS.getVectorElt(EltNum).getInt();
+ ResultElements.push_back(APValue(APSInt(LHS.rotl(RHS), DestUnsigned)));
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+ case clang::X86::BI__builtin_ia32_prord128:
+ case clang::X86::BI__builtin_ia32_prord256:
+ case clang::X86::BI__builtin_ia32_prord512:
+ case clang::X86::BI__builtin_ia32_prorq128:
+ case clang::X86::BI__builtin_ia32_prorq256:
+ case clang::X86::BI__builtin_ia32_prorq512: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen);
+
+ APSInt RHS = SourceRHS.getInt();
+
+ for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+ const APSInt &LHS = SourceLHS.getVectorElt(EltNum).getInt();
+ ResultElements.push_back(APValue(APSInt(LHS.rotr(RHS), DestUnsigned)));
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case Builtin::BI__builtin_elementwise_max:
case Builtin::BI__builtin_elementwise_min: {
APValue SourceLHS, SourceRHS;
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index bddc5d11818ef..a109dad4ebdcc 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -4004,7 +4004,6 @@ __m512i test_mm512_cvtepu32_epi64(__m256i __X) {
// CHECK: zext <8 x i32> %{{.*}} to <8 x i64>
return _mm512_cvtepu32_epi64(__X);
}
-
TEST_CONSTEXPR(match_v8di(_mm512_cvtepu32_epi64(_mm256_setr_epi32(-70000, 2, -1, 0, 1, -2, 3, -4)), 4294897296, 2, 4294967295, 0, 1, 4294967294, 3, 4294967292));
__m512i test_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
@@ -4026,7 +4025,6 @@ __m512i test_mm512_cvtepu16_epi32(__m256i __A) {
// CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
return _mm512_cvtepu16_epi32(__A);
}
-
TEST_CONSTEXPR(match_v16si(_mm512_cvtepu16_epi32(_mm256_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12)), 65236, 2, 65535, 0, 1, 65534, 3, 65532, 5, 65530, 7, 65528, 9, 65526, 11, 65524));
__m512i test_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
@@ -4048,7 +4046,6 @@ __m512i test_mm512_cvtepu16_epi64(__m128i __A) {
// CHECK: zext <8 x i16> %{{.*}} to <8 x i64>
return _mm512_cvtepu16_epi64(__A);
}
-
TEST_CONSTEXPR(match_v8di(_mm512_cvtepu16_epi64(_mm_setr_epi16(-300, 2, -1, 0, 1, -2, 3, -4)), 65236, 2, 65535, 0, 1, 65534, 3, 65532));
__m512i test_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
@@ -4065,12 +4062,12 @@ __m512i test_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) {
return _mm512_maskz_cvtepu16_epi64(__U, __A);
}
-
__m512i test_mm512_rol_epi32(__m512i __A) {
// CHECK-LABEL: test_mm512_rol_epi32
// CHECK: @llvm.fshl.v16i32
return _mm512_rol_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_rol_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 96, -97, -129, 192, -193, 256, 288, -289, 352, -353, 416, -417, 480, -481));
__m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_rol_epi32
@@ -4078,6 +4075,7 @@ __m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_rol_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_rol_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 32, -33, 99, 99, -129, 192, -193, 99, 99, 99, 99, -353, 99, 99, 480, -481));
__m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_rol_epi32
@@ -4085,12 +4083,14 @@ __m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_rol_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_rol_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 96, -97, 0, 0, 0, 256, 288, -289, 352, 0, 416, -417, 0, 0));
__m512i test_mm512_rol_epi64(__m512i __A) {
// CHECK-LABEL: test_mm512_rol_epi64
// CHECK: @llvm.fshl.v8i64
return _mm512_rol_epi64(__A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_rol_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, -97, -129, 192, -193, 256));
__m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_rol_epi64
@@ -4098,6 +4098,7 @@ __m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_rol_epi64(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_rol_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 99, 99, -129, 192, -193, 99));
__m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_rol_epi64
@@ -4105,6 +4106,7 @@ __m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_rol_epi64(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_rol_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, 0, -129, 192, 0, 0));
__m512i test_mm512_rolv_epi32(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_rolv_epi32
@@ -4151,6 +4153,7 @@ __m512i test_mm512_ror_epi32(__m512i __A) {
// CHECK: @llvm.fshr.v16i32
return _mm512_ror_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_ror_epi32(((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 402653184, -402653185, -536870913, 805306368, -805306369, 1073741824, 1207959552, -1207959553, 1476395008, -1476395009, 1744830464, -1744830465, 2013265920, -2013265921));
__m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_ror_epi32
@@ -4158,6 +4161,7 @@ __m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_ror_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_ror_epi32(((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}), 0xC873, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 134217728, -134217729, 99, 99, -536870913, 805306368, -805306369, 99, 99, 99, 99, -1476395009, 99, 99, 2013265920, -2013265921));
__m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_ror_epi32
@@ -4165,12 +4169,14 @@ __m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_ror_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_ror_epi32(0x378C, ((__m512i)(__v16si){1, -2, 3, -4, -5, 6, -7, 8, 9, -10, 11, -12, 13, -14, 15, -16}), 5), 0, 0, 402653184, -402653185, 0, 0, 0, 1073741824, 1207959552, -1207959553, 1476395008, 0, 1744830464, -1744830465, 0, 0));
__m512i test_mm512_ror_epi64(__m512i __A) {
// CHECK-LABEL: test_mm512_ror_epi64
// CHECK: @llvm.fshr.v8i64
return _mm512_ror_epi64(__A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_ror_epi64(((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, -1729382256910270465LL, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 4611686018427387904LL));
__m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_ror_epi64
@@ -4178,6 +4184,7 @@ __m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_ror_epi64(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_ror_epi64(((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 99, 99, -2305843009213693953LL, 3458764513820540928LL, -3458764513820540929LL, 99));
__m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_ror_epi64
@@ -4185,7 +4192,7 @@ __m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_ror_epi64(__U, __A, 5);
}
-
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_ror_epi64(0x37, ((__m512i)(__v8di){1, -2, 3, -4, -5, 6, -7, 8}), 5), 576460752303423488LL, -576460752303423489LL, 1729382256910270464LL, 0, -2305843009213693953LL, 3458764513820540928LL, 0, 0));
__m512i test_mm512_rorv_epi32(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_rorv_epi32
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 477326f35518a..78e01b944ec5d 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -5666,6 +5666,7 @@ __m128i test_mm_rol_epi32(__m128i __A) {
// CHECK: @llvm.fshl.v4i32
return _mm_rol_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v4si(_mm_rol_epi32(((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 96, -97));
__m128i test_mm_mask_rol_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_rol_epi32
@@ -5673,6 +5674,7 @@ __m128i test_mm_mask_rol_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_rol_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v4si(_mm_mask_rol_epi32(((__m128i)(__v4si){99, 99, 99, 99}), 0x3, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 99, 99));
__m128i test_mm_maskz_rol_epi32(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_rol_epi32
@@ -5680,12 +5682,14 @@ __m128i test_mm_maskz_rol_epi32(__mmask8 __U, __m128i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_rol_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v4si(_mm_maskz_rol_epi32(0x9, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, 0, 0, -97));
__m256i test_mm256_rol_epi32(__m256i __A) {
// CHECK-LABEL: test_mm256_rol_epi32
// CHECK: @llvm.fshl.v8i32
return _mm256_rol_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_rol_epi32(((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, -97, -129, 192, -193, 256));
__m256i test_mm256_mask_rol_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_mask_rol_epi32
@@ -5693,6 +5697,7 @@ __m256i test_mm256_mask_rol_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_rol_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mask_rol_epi32(((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 99, 99, -129, 192, -193, 99));
__m256i test_mm256_maskz_rol_epi32(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_maskz_rol_epi32
@@ -5700,12 +5705,14 @@ __m256i test_mm256_maskz_rol_epi32(__mmask8 __U, __m256i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_rol_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_rol_epi32(0x37, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 32, -33, 96, 0, -129, 192, 0, 0));
__m128i test_mm_rol_epi64(__m128i __A) {
// CHECK-LABEL: test_mm_rol_epi64
// CHECK: @llvm.fshl.v2i64
return _mm_rol_epi64(__A, 5);
}
+TEST_CONSTEXPR(match_v2di(_mm_rol_epi64(((__m128i)(__v2di){10, -11}), 19), 5242880, -5242881));
__m128i test_mm_mask_rol_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_rol_epi64
@@ -5713,6 +5720,7 @@ __m128i test_mm_mask_rol_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_rol_epi64(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_rol_epi64(((__m128i)(__v2di){99, 99}), 0x1, ((__m128i)(__v2di){10, -11}), 19), 5242880, 99));
__m128i test_mm_maskz_rol_epi64(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_rol_epi64
@@ -5720,12 +5728,14 @@ __m128i test_mm_maskz_rol_epi64(__mmask8 __U, __m128i __A) {
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_maskz_rol_epi64(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v2di(_mm_maskz_rol_epi64(0x2, ((__m128i)(__v2di){10, -11}), 19), 0, -5242881));
__m256i test_mm256_rol_epi64(__m256i __A) {
// CHECK-LABEL: test_mm256_rol_epi64
// CHECK: @llvm.fshl.v4i64
return _mm256_rol_epi64(__A, 5);
}
+TEST_CONSTEXPR(match_v4di(_mm256_rol_epi64(((__m256i)(__v4di){10, -11, -12, 13}), 19), 5242880, -5242881, -5767169, 6815744));
__m256i test_mm256_mask_rol_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_mask_rol_epi64
@@ -5733,6 +5743,7 @@ __m256i test_mm256_mask_rol_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_rol_epi64(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v4di(_mm256_mask_rol_epi64(((__m256i)(__v4di){99, 99, 99, 99}), 0x9, ((__m256i)(__v4di){10, -11, -12, 13}), 19), 5242880, 99, 99, 6815744));
__m256i test_mm256_maskz_rol_epi64(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_maskz_rol_epi64
@@ -5740,6 +5751,7 @@ __m256i test_mm256_maskz_rol_epi64(__mmask8 __U, __m256i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_maskz_rol_epi64(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_rol_epi64(0xC, ((__m256i)(__v4di){10, -11, -12, 13}), 19), 0, 0, -5767169, 6815744));
__m128i test_mm_rolv_epi32(__m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_rolv_epi32
@@ -5826,6 +5838,7 @@ __m128i test_mm_ror_epi32(__m128i __A) {
// CHECK: @llvm.fshr.v4i32
return _mm_ror_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v4si(_mm_ror_epi32(((__m128i)(__v4si){1, -2, 3, -4}), 5), 134217728, -134217729, 402653184, -402653185));
__m128i test_mm_mask_ror_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_ror_epi32
@@ -5833,6 +5846,7 @@ __m128i test_mm_mask_ror_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_ror_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v4si(_mm_mask_ror_epi32(((__m128i)(__v4si){99, 99, 99, 99}), 0x3, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 134217728, -134217729, 99, 99));
__m128i test_mm_maskz_ror_epi32(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_ror_epi32
@@ -5840,12 +5854,14 @@ __m128i test_mm_maskz_ror_epi32(__mmask8 __U, __m128i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_ror_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v4si(_mm_maskz_ror_epi32(0x9, ((__m128i)(__v4si){1, -2, 3, -4}), 5), 134217728, 0, 0, -402653185));
__m256i test_mm256_ror_epi32(__m256i __A) {
// CHECK-LABEL: test_mm256_ror_epi32
// CHECK: @llvm.fshr.v8i32
return _mm256_ror_epi32(__A, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_ror_epi32(((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 134217728, -134217729, 402653184, -402653185, -536870913, 805306368, -805306369, 1073741824));
__m256i test_mm256_mask_ror_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_mask_ror_epi32
@@ -5853,6 +5869,7 @@ __m256i test_mm256_mask_ror_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_ror_epi32(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mask_ror_epi32(((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}), 0x73, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 134217728, -134217729, 99, 99, -536870913, 805306368, -805306369, 99));
__m256i test_mm256_maskz_ror_epi32(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_maskz_ror_epi32
@@ -5860,12 +5877,14 @@ __m256i test_mm256_maskz_ror_epi32(__mmask8 __U, __m256i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_ror_epi32(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_ror_epi32(0x37, ((__m256i)(__v8si){1, -2, 3, -4, -5, 6, -7, 8}), 5), 134217728, -134217729, 402653184, 0, -536870913, 805306368, 0, 0));
__m128i test_mm_ror_epi64(__m128i __A) {
// CHECK-LABEL: test_mm_ror_epi64
// CHECK: @llvm.fshr.v2i64
return _mm_ror_epi64(__A, 5);
}
+TEST_CONSTEXPR(match_v2di(_mm_ror_epi64(((__m128i)(__v2di){10, -11}), 19), 351843720888320LL, -351843720888321LL));
__m128i test_mm_mask_ror_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_ror_epi64
@@ -5873,6 +5892,7 @@ __m128i test_mm_mask_ror_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_ror_epi64(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_ror_epi64(((__m128i)(__v2di){99, 99}), 0x1, ((__m128i)(__v2di){10, -11}), 19), 351843720888320LL, 99));
__m128i test_mm_maskz_ror_epi64(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_ror_epi64
@@ -5880,12 +5900,14 @@ __m128i test_mm_maskz_ror_epi64(__mmask8 __U, __m128i __A) {
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_maskz_ror_epi64(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v2di(_mm_maskz_ror_epi64(0x2, ((__m128i)(__v2di){10, -11}), 19), 0, -351843720888321LL));
__m256i test_mm256_ror_epi64(__m256i __A) {
// CHECK-LABEL: test_mm256_ror_epi64
// CHECK: @llvm.fshr.v4i64
return _mm256_ror_epi64(__A, 5);
}
+TEST_CONSTEXPR(match_v4di(_mm256_ror_epi64(((__m256i)(__v4di){10, -11, -12, 13}), 19), 351843720888320LL, -351843720888321LL, -387028092977153LL, 457396837154816LL));
__m256i test_mm256_mask_ror_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_mask_ror_epi64
@@ -5893,6 +5915,7 @@ __m256i test_mm256_mask_ror_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_ror_epi64(__W, __U, __A,5);
}
+TEST_CONSTEXPR(match_v4di(_mm256_mask_ror_epi64(((__m256i)(__v4di){99, 99, 99, 99}), 0x9, ((__m256i)(__v4di){10, -11, -12, 13}), 19), 351843720888320LL, 99, 99, 457396837154816LL));
__m256i test_mm256_maskz_ror_epi64(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_maskz_ror_epi64
@@ -5900,7 +5923,7 @@ __m256i test_mm256_maskz_ror_epi64(__mmask8 __U, __m256i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_maskz_ror_epi64(__U, __A, 5);
}
-
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_ror_epi64(0xC, ((__m256i)(__v4di){10, -11, -12, 13}), 19), 0, 0, -387028092977153LL, 457396837154816LL));
__m128i test_mm_rorv_epi32(__m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_rorv_epi32
diff --git a/clang/test/CodeGen/X86/xop-builtins.c b/clang/test/CodeGen/X86/xop-builtins.c
index cd403d5876fa3..994fc7b3e136a 100644
--- a/clang/test/CodeGen/X86/xop-builtins.c
+++ b/clang/test/CodeGen/X86/xop-builtins.c
@@ -239,24 +239,28 @@ __m128i test_mm_roti_epi8(__m128i a) {
// CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> splat (i8 1))
return _mm_roti_epi8(a, 1);
}
+TEST_CONSTEXPR(match_v16qi(_mm_roti_epi8(((__m128i)(__v16qs){0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15}), 3), 0, 8, -9, 24, -25, 40, -41, 56, -57, 72, -73, 88, -89, 104, -105, 120));
__m128i test_mm_roti_epi16(__m128i a) {
// CHECK-LABEL: test_mm_roti_epi16
// CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 50))
return _mm_roti_epi16(a, 50);
}
+TEST_CONSTEXPR(match_v8hi(_mm_roti_epi16(((__m128i)(__v8hi){2, -3, 4, -5, 6, -7, 8, -9}), 1), 4, -5, 8, -9, 12, -13, 16, -17));
__m128i test_mm_roti_epi32(__m128i a) {
// CHECK-LABEL: test_mm_roti_epi32
// CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 226))
return _mm_roti_epi32(a, -30);
}
+TEST_CONSTEXPR(match_v4si(_mm_roti_epi32(((__m128i)(__v4si){1, -2, 3, -4}), 5), 32, -33, 96, -97));
__m128i test_mm_roti_epi64(__m128i a) {
// CHECK-LABEL: test_mm_roti_epi64
// CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 100))
return _mm_roti_epi64(a, 100);
}
+TEST_CONSTEXPR(match_v2di(_mm_roti_epi64(((__m128i)(__v2di){99, -55}), 19), 51904512, -28311553));
__m128i test_mm_shl_epi8(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_shl_epi8
More information about the cfe-commits
mailing list