[clang] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow AVX/AVX512 subvector extraction intrinsics to be used in constexpr #157712 (PR #162836)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Oct 16 19:40:10 PDT 2025
https://github.com/SeongjaeP updated https://github.com/llvm/llvm-project/pull/162836
>From 3128a37fa669b00235b06f577dd5bf8354b2265e Mon Sep 17 00:00:00 2001
From: SeongjaeP <psjj960507 at gmail.com>
Date: Thu, 9 Oct 2025 14:25:24 +0900
Subject: [PATCH 01/11] Apply style fixes and rebase onto upstream
---
clang/lib/AST/ExprConstant.cpp | 75 ++++++++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 35a866ea5010f..5ac7837aa0fe3 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11769,6 +11769,81 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return EvaluateBinOpExpr([](const APSInt &LHS, const APSInt &RHS) {
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});
+
+ case X86::BI__builtin_ia32_extract128i256:
+ case X86::BI__builtin_ia32_vextractf128_pd256:
+ case X86::BI__builtin_ia32_vextractf128_ps256:
+ case X86::BI__builtin_ia32_vextractf128_si256: {
+ APValue SourceVec, SourceImm;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceImm))
+ return false;
+
+ if (!SourceVec.isVector())
+ return false;
+
+ const auto *RetVT = E->getType()->castAs<VectorType>();
+ if (!RetVT) return false;
+
+ unsigned RetLen = RetVT->getNumElements();
+ unsigned SrcLen = SourceVec.getVectorLength();
+ if (SrcLen != RetLen * 2)
+ return false;
+
+ unsigned Idx = SourceImm.getInt().getZExtValue() & 1;
+
+ SmallVector<APValue, 32> ResultElements;
+ ResultElements.reserve(RetLen);
+
+ for (unsigned I = 0; I < RetLen; I++)
+ ResultElements.push_back(SourceVec.getVectorElt(Idx * RetLen + I));
+
+ return Success(APValue(ResultElements.data(), RetLen), E);
+ }
+
+ case X86::BI__builtin_ia32_extracti32x4_256_mask:
+ case X86::BI__builtin_ia32_extractf32x4_256_mask:
+ case X86::BI__builtin_ia32_extracti32x4_mask:
+ case X86::BI__builtin_ia32_extractf32x4_mask:
+ case X86::BI__builtin_ia32_extracti32x8_mask:
+ case X86::BI__builtin_ia32_extractf32x8_mask:
+ case X86::BI__builtin_ia32_extracti64x2_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_256_mask:
+ case X86::BI__builtin_ia32_extracti64x2_512_mask:
+ case X86::BI__builtin_ia32_extractf64x2_512_mask:
+ case X86::BI__builtin_ia32_extracti64x4_mask:
+ case X86::BI__builtin_ia32_extractf64x4_mask:{
+ APValue SourceVec, MergeVec;
+ APSInt Imm, MaskImm;
+
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
+ !EvaluateInteger(E->getArg(1), Imm, Info) ||
+ !EvaluateAsRValue(Info, E->getArg(2), MergeVec) ||
+ !EvaluateInteger(E->getArg(3), MaskImm, Info))
+ return false;
+
+ const auto *RetVT = E->getType()->castAs<VectorType>();
+ unsigned RetLen = RetVT->getNumElements();
+
+ if (!SourceVec.isVector() || !MergeVec.isVector()) return false;
+ unsigned SrcLen = SourceVec.getVectorLength();
+ if (!SrcLen || !RetLen || (SrcLen % RetLen) != 0) return false;
+
+ unsigned Lanes = SrcLen / RetLen;
+ unsigned Lane = static_cast<unsigned>(Imm.getZExtValue() % Lanes);
+ unsigned Base = Lane * RetLen;
+ uint64_t Mask = MaskImm.getZExtValue();
+
+ SmallVector<APValue, 32> ResultElements;
+ ResultElements.reserve(RetLen);
+ for (unsigned I = 0; I < RetLen; ++I) {
+ if ((Mask >> I) & 1)
+ ResultElements.push_back(SourceVec.getVectorElt(Base + I));
+ else
+ ResultElements.push_back(MergeVec.getVectorElt(I));
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case clang::X86::BI__builtin_ia32_pavgb128:
case clang::X86::BI__builtin_ia32_pavgw128:
>From d6b83cc0817fb596393f8f72f0f8bd6d99669f4e Mon Sep 17 00:00:00 2001
From: SeongjaeP <psjj960507 at gmail.com>
Date: Fri, 26 Sep 2025 11:20:04 +0900
Subject: [PATCH 02/11] Refactoring and Test Pass
---
clang/test/CodeGen/X86/avx2-builtins.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 55f18f947b96f..de33b72995f5c 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -466,6 +466,8 @@ __m128i test0_mm256_extracti128_si256_0(__m256i a) {
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
return _mm256_extracti128_si256(a, 0);
}
+TEST_CONSTEXPR(match_m128i(_mm256_extracti128_si256(((__m256i){1ULL, 2ULL, 3ULL, 4ULL}), 0),
+ 1ULL, 2ULL));
__m128i test1_mm256_extracti128_si256_1(__m256i a) {
// CHECK-LABEL: test1_mm256_extracti128_si256
>From 7525eecb29aaa916d0192a34ced9b3b6b3226458 Mon Sep 17 00:00:00 2001
From: seongjaep <psjj960507 at gmail.com>
Date: Fri, 10 Oct 2025 20:55:43 +0900
Subject: [PATCH 03/11] Add Constexpr
---
clang/include/clang/Basic/BuiltinsX86.td | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 217589d7add1d..b3b4d5e076fd8 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -471,7 +471,7 @@ let Features = "avx512f,vpclmulqdq", Attributes = [NoThrow, Const, RequiredVecto
def pclmulqdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant char)">;
}
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">;
def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">;
def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
@@ -576,7 +576,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">;
}
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
@@ -1065,7 +1065,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
def alignq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def extractf64x4_mask : X86Builtin<"_Vector<4, double>(_Vector<8, double>, _Constant int, _Vector<4, double>, unsigned char)">;
def extractf32x4_mask : X86Builtin<"_Vector<4, float>(_Vector<16, float>, _Constant int, _Vector<4, float>, unsigned char)">;
}
@@ -2944,24 +2944,24 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
def pmovqw256mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<4, long long int>, unsigned char)">;
}
-let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def extractf32x8_mask : X86Builtin<"_Vector<8, float>(_Vector<16, float>, _Constant int, _Vector<8, float>, unsigned char)">;
def extractf64x2_512_mask : X86Builtin<"_Vector<2, double>(_Vector<8, double>, _Constant int, _Vector<2, double>, unsigned char)">;
def extracti32x8_mask : X86Builtin<"_Vector<8, int>(_Vector<16, int>, _Constant int, _Vector<8, int>, unsigned char)">;
def extracti64x2_512_mask : X86Builtin<"_Vector<2, long long int>(_Vector<8, long long int>, _Constant int, _Vector<2, long long int>, unsigned char)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def extracti32x4_mask : X86Builtin<"_Vector<4, int>(_Vector<16, int>, _Constant int, _Vector<4, int>, unsigned char)">;
def extracti64x4_mask : X86Builtin<"_Vector<4, long long int>(_Vector<8, long long int>, _Constant int, _Vector<4, long long int>, unsigned char)">;
}
-let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def extractf64x2_256_mask : X86Builtin<"_Vector<2, double>(_Vector<4, double>, _Constant int, _Vector<2, double>, unsigned char)">;
def extracti64x2_256_mask : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int, _Vector<2, long long int>, unsigned char)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def extractf32x4_256_mask : X86Builtin<"_Vector<4, float>(_Vector<8, float>, _Constant int, _Vector<4, float>, unsigned char)">;
def extracti32x4_256_mask : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int, _Vector<4, int>, unsigned char)">;
}
>From 3a468d7d64a69273a1232ddc77727aaa386a9cef Mon Sep 17 00:00:00 2001
From: seongjaep <psjj960507 at gmail.com>
Date: Fri, 10 Oct 2025 21:01:28 +0900
Subject: [PATCH 04/11] [Clang] Rebase AVX feature branch onto main
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 105 +++++++++++++++++++++++
1 file changed, 105 insertions(+)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 922d67940e22f..d844f93d1b983 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2819,6 +2819,92 @@ static bool interp__builtin_elementwise_triop(
return true;
}
+static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call,
+ unsigned ID) {
+ assert(Call->getNumArgs() == 2);
+
+ APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
+ uint64_t Index = ImmAPS.getZExtValue();
+
+ const Pointer &Src = S.Stk.pop<Pointer>();
+ if (!Src.getFieldDesc()->isPrimitiveArray())
+ return false;
+
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ if (!Dst.getFieldDesc()->isPrimitiveArray())
+ return false;
+
+ unsigned SrcElems = Src.getNumElems();
+ unsigned DstElems = Dst.getNumElems();
+
+ if (SrcElems == 0 || DstElems == 0 || (SrcElems % DstElems) != 0)
+ return false;
+
+ unsigned NumLanes = SrcElems / DstElems;
+ unsigned Lane = static_cast<unsigned>(Index % NumLanes);
+ unsigned ExtractPos = Lane * DstElems;
+
+ PrimType ElemT = Src.getFieldDesc()->getPrimType();
+ if (ElemT != Dst.getFieldDesc()->getPrimType())
+ return false;
+
+ TYPE_SWITCH(ElemT, {
+ for (unsigned I = 0; I != DstElems; ++I) {
+ Dst.elem<T>(I) = Src.elem<T>(ExtractPos + I);
+ }
+ });
+
+ Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp__builtin_x86_extract_vector_masked(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call,
+ unsigned ID) {
+ assert(Call->getNumArgs() == 4);
+
+ APSInt MaskAPS = popToAPSInt(S, Call->getArg(3));
+ const Pointer &Merge = S.Stk.pop<Pointer>();
+ APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
+ const Pointer &Src = S.Stk.pop<Pointer>();
+
+ if (!Src.getFieldDesc()->isPrimitiveArray() || !Merge.getFieldDesc()->isPrimitiveArray())
+ return false;
+
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ if (!Dst.getFieldDesc()->isPrimitiveArray())
+ return false;
+
+ unsigned SrcElems = Src.getNumElems();
+ unsigned DstElems = Dst.getNumElems();
+ if (!SrcElems || !DstElems || (SrcElems % DstElems) != 0)
+ return false;
+
+ PrimType ElemT = Src.getFieldDesc()->getPrimType();
+ if (ElemT != Dst.getFieldDesc()->getPrimType() ||
+ ElemT != Merge.getFieldDesc()->getPrimType())
+ return false;
+
+ unsigned NumLanes = SrcElems / DstElems;
+ unsigned Lane = static_cast<unsigned>(ImmAPS.getZExtValue() % NumLanes);
+ unsigned Base = Lane * DstElems;
+
+ uint64_t Mask = MaskAPS.getZExtValue();
+
+ TYPE_SWITCH(ElemT, {
+ for (unsigned I = 0; I != DstElems; ++I) {
+ if ((Mask >> I) & 1)
+ Dst.elem<T>(I) = Src.elem<T>(Base + I);
+ else
+ Dst.elem<T>(I) = Merge.elem<T>(I);
+ }
+ });
+
+ Dst.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
const CallExpr *Call,
unsigned ID) {
@@ -3451,6 +3537,25 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) {
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});
+ case X86::BI__builtin_ia32_extract128i256:
+ case X86::BI__builtin_ia32_vextractf128_pd256:
+ case X86::BI__builtin_ia32_vextractf128_ps256:
+ case X86::BI__builtin_ia32_vextractf128_si256:
+ return interp__builtin_x86_extract_vector(S, OpPC, Call, BuiltinID);
+
+ case X86::BI__builtin_ia32_extractf32x4_256_mask:
+ case X86::BI__builtin_ia32_extractf32x4_mask:
+ case X86::BI__builtin_ia32_extractf32x8_mask:
+ case X86::BI__builtin_ia32_extractf64x2_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_512_mask:
+ case X86::BI__builtin_ia32_extractf64x4_mask:
+ case X86::BI__builtin_ia32_extracti32x4_256_mask:
+ case X86::BI__builtin_ia32_extracti32x4_mask:
+ case X86::BI__builtin_ia32_extracti32x8_mask:
+ case X86::BI__builtin_ia32_extracti64x2_256_mask:
+ case X86::BI__builtin_ia32_extracti64x2_512_mask:
+ case X86::BI__builtin_ia32_extracti64x4_mask:
+ return interp__builtin_x86_extract_vector_masked(S, OpPC, Call, BuiltinID);
case clang::X86::BI__builtin_ia32_pavgb128:
case clang::X86::BI__builtin_ia32_pavgw128:
>From e73b88c8a450a27d4d3300cf4c342593d6915799 Mon Sep 17 00:00:00 2001
From: seongjaep <psjj960507 at gmail.com>
Date: Fri, 10 Oct 2025 21:02:57 +0900
Subject: [PATCH 05/11] [Clang] Refactor: Use setzero for explicit
initialization
---
clang/lib/Headers/avx512dqintrin.h | 8 ++++----
clang/lib/Headers/avx512fintrin.h | 8 ++++----
clang/lib/Headers/avx512vldqintrin.h | 4 ++--
clang/lib/Headers/avx512vlintrin.h | 4 ++--
4 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index fb65bf933b8ad..953285d6ab414 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -1214,7 +1214,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_extractf32x8_ps(A, imm) \
((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
- (__v8sf)_mm256_undefined_ps(), \
+ (__v8sf)_mm_setzero_pd(), \
(__mmask8)-1))
#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
@@ -1230,7 +1230,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_extractf64x2_pd(A, imm) \
((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
(int)(imm), \
- (__v2df)_mm_undefined_pd(), \
+ (__v2df)_mm_setzero_pd(), \
(__mmask8)-1))
#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
@@ -1247,7 +1247,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_extracti32x8_epi32(A, imm) \
((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v8si)_mm256_undefined_si256(), \
+ (__v8si)_mm256_setzero_si256(), \
(__mmask8)-1))
#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
@@ -1263,7 +1263,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_extracti64x2_epi64(A, imm) \
((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
(int)(imm), \
- (__v2di)_mm_undefined_si128(), \
+ (__v2di)_mm_setzero_si128(), \
(__mmask8)-1))
#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 80e58425cdd71..2768a5bae887d 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -3166,7 +3166,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
#define _mm512_extractf64x4_pd(A, I) \
((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
- (__v4df)_mm256_undefined_pd(), \
+ (__v4df)_mm256_setzero_pd(), \
(__mmask8)-1))
#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
@@ -3181,7 +3181,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
#define _mm512_extractf32x4_ps(A, I) \
((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
- (__v4sf)_mm_undefined_ps(), \
+ (__v4sf)_mm_setzero_ps(), \
(__mmask8)-1))
#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
@@ -7107,7 +7107,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
#define _mm512_extracti32x4_epi32(A, imm) \
((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v4si)_mm_undefined_si128(), \
+ (__v4si)_mm_setzero_si128(), \
(__mmask8)-1))
#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
@@ -7122,7 +7122,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
#define _mm512_extracti64x4_epi64(A, imm) \
((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
- (__v4di)_mm256_undefined_si256(), \
+ (__v4di)_mm256_setzero_si256(), \
(__mmask8)-1))
#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h
index 68bd52e43981a..2d3c4b551e3b0 100644
--- a/clang/lib/Headers/avx512vldqintrin.h
+++ b/clang/lib/Headers/avx512vldqintrin.h
@@ -1075,7 +1075,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
#define _mm256_extractf64x2_pd(A, imm) \
((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
(int)(imm), \
- (__v2df)_mm_undefined_pd(), \
+ (__v2df)_mm_setzero_pd(), \
(__mmask8)-1))
#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
@@ -1093,7 +1093,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
#define _mm256_extracti64x2_epi64(A, imm) \
((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
(int)(imm), \
- (__v2di)_mm_undefined_si128(), \
+ (__v2di)_mm_setzero_si128(), \
(__mmask8)-1))
#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 965741f0ff944..252fb111988b0 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -7609,7 +7609,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
#define _mm256_extractf32x4_ps(A, imm) \
((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
(int)(imm), \
- (__v4sf)_mm_undefined_ps(), \
+ (__v4sf)_mm_setzero_ps(), \
(__mmask8)-1))
#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
@@ -7627,7 +7627,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
#define _mm256_extracti32x4_epi32(A, imm) \
((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
(int)(imm), \
- (__v4si)_mm_undefined_si128(), \
+ (__v4si)_mm_setzero_si128(), \
(__mmask8)-1))
#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
>From 5b7988f3fa4a82e0b6a3b77f3bb0a5919549129d Mon Sep 17 00:00:00 2001
From: seongjaep <psjj960507 at gmail.com>
Date: Fri, 10 Oct 2025 21:05:28 +0900
Subject: [PATCH 06/11] [Clang] Add TEST_CONSTEXPR macro for testing
---
clang/test/CodeGen/X86/avx-builtins.c | 8 +++++++-
clang/test/CodeGen/X86/avx2-builtins.c | 3 +--
clang/test/CodeGen/X86/avx512dq-builtins.c | 12 ++++++++++++
clang/test/CodeGen/X86/avx512f-builtins.c | 13 ++++++++++++-
clang/test/CodeGen/X86/avx512vl-builtins.c | 6 ++++++
clang/test/CodeGen/X86/avx512vldq-builtins.c | 6 ++++++
6 files changed, 44 insertions(+), 4 deletions(-)
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 5f08b6be81ab7..11ed8498b8ecd 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1070,19 +1070,25 @@ __m128d test_mm256_extractf128_pd(__m256d A) {
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
return _mm256_extractf128_pd(A, 1);
}
+TEST_CONSTEXPR(match_m128d(_mm256_extractf128_pd(((__m256d){0.0, 1.0, 2.0, 3.0}), 1),
+ 2.0, 3.0));
__m128 test_mm256_extractf128_ps(__m256 A) {
// CHECK-LABEL: test_mm256_extractf128_ps
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm256_extractf128_ps(A, 1);
}
+TEST_CONSTEXPR(match_m128(_mm256_extractf128_ps(((__m256){0,1,2,3,4,5,6,7}), 1),
+ 4.0f, 5.0f, 6.0f, 7.0f));
__m128i test_mm256_extractf128_si256(__m256i A) {
// CHECK-LABEL: test_mm256_extractf128_si256
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm256_extractf128_si256(A, 1);
}
-
+TEST_CONSTEXPR(match_m128i(_mm256_extractf128_si256(((__m256i){0ULL, 1ULL, 2ULL, 3ULL}), 1),
+ 2ULL, 3ULL));
+
__m256d test_mm256_floor_pd(__m256d x) {
// CHECK-LABEL: test_mm256_floor_pd
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 1)
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index de33b72995f5c..b2e10e11128dc 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -466,8 +466,7 @@ __m128i test0_mm256_extracti128_si256_0(__m256i a) {
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
return _mm256_extracti128_si256(a, 0);
}
-TEST_CONSTEXPR(match_m128i(_mm256_extracti128_si256(((__m256i){1ULL, 2ULL, 3ULL, 4ULL}), 0),
- 1ULL, 2ULL));
+TEST_CONSTEXPR(match_m128i(_mm256_extracti128_si256(((__m256i){1ULL, 2ULL, 3ULL, 4ULL}), 0),1ULL, 2ULL));
__m128i test1_mm256_extracti128_si256_1(__m256i a) {
// CHECK-LABEL: test1_mm256_extracti128_si256
diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index 4112561216af8..4122a27a7542e 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -1402,6 +1402,7 @@ __m256 test_mm512_extractf32x8_ps(__m512 __A) {
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
return _mm512_extractf32x8_ps(__A, 1);
}
+TEST_CONSTEXPR(match_m256(_mm512_extractf32x8_ps(((__m512){0.0f,1.0f,2.0f,3.0f, 4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f, 12.0f,13.0f,14.0f,15.0f}), 1),8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f));
__m256 test_mm512_mask_extractf32x8_ps(__m256 __W, __mmask8 __U, __m512 __A) {
// CHECK-LABEL: test_mm512_mask_extractf32x8_ps
@@ -1409,6 +1410,7 @@ __m256 test_mm512_mask_extractf32x8_ps(__m256 __W, __mmask8 __U, __m512 __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm512_mask_extractf32x8_ps(__W, __U, __A, 1);
}
+TEST_CONSTEXPR(match_m256(_mm512_mask_extractf32x8_ps(((__m256)(__v8sf){0,0,0,0,0,0,0,0}), (__mmask8)0xFF,((__m512)(__v16sf){0.0f,1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f,12.0f,13.0f,14.0f,15.0f}),1),8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f));
__m256 test_mm512_maskz_extractf32x8_ps(__mmask8 __U, __m512 __A) {
// CHECK-LABEL: test_mm512_maskz_extractf32x8_ps
@@ -1416,12 +1418,14 @@ __m256 test_mm512_maskz_extractf32x8_ps(__mmask8 __U, __m512 __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm512_maskz_extractf32x8_ps(__U, __A, 1);
}
+TEST_CONSTEXPR(match_m256(_mm512_maskz_extractf32x8_ps((__mmask8)0x0F, ((__m512)(__v16sf){0.0f,1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f,12.0f,13.0f,14.0f,15.0f}),1),8.0f, 9.0f, 10.0f, 11.0f, 0.0f, 0.0f, 0.0f, 0.0f));
__m128d test_mm512_extractf64x2_pd(__m512d __A) {
// CHECK-LABEL: test_mm512_extractf64x2_pd
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <2 x i32> <i32 6, i32 7>
return _mm512_extractf64x2_pd(__A, 3);
}
+TEST_CONSTEXPR(match_m128d(_mm512_extractf64x2_pd(((__m512d){0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0}), 3),6.0, 7.0));
__m128d test_mm512_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m512d __A) {
// CHECK-LABEL: test_mm512_mask_extractf64x2_pd
@@ -1429,6 +1433,7 @@ __m128d test_mm512_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m512d __A)
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm512_mask_extractf64x2_pd(__W, __U, __A, 3);
}
+TEST_CONSTEXPR(match_m128d(_mm512_mask_extractf64x2_pd(((__m128d)(__v2df){100.0, 101.0}),(__mmask8)0x1,((__m512d)(__v8df){0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0}),3),6.0, 101.0));
__m128d test_mm512_maskz_extractf64x2_pd(__mmask8 __U, __m512d __A) {
// CHECK-LABEL: test_mm512_maskz_extractf64x2_pd
@@ -1436,12 +1441,14 @@ __m128d test_mm512_maskz_extractf64x2_pd(__mmask8 __U, __m512d __A) {
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm512_maskz_extractf64x2_pd(__U, __A, 3);
}
+TEST_CONSTEXPR(match_m128d(_mm512_maskz_extractf64x2_pd((__mmask8)0x2,((__m512d){0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0}),3),0.0, 7.0));
__m256i test_mm512_extracti32x8_epi32(__m512i __A) {
// CHECK-LABEL: test_mm512_extracti32x8_epi32
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
return _mm512_extracti32x8_epi32(__A, 1);
}
+TEST_CONSTEXPR(match_v8si(_mm512_extracti32x8_epi32(((__m512i)(__v16si){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 1),8, 9,10,11,12,13,14,15));
__m256i test_mm512_mask_extracti32x8_epi32(__m256i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_extracti32x8_epi32
@@ -1449,6 +1456,7 @@ __m256i test_mm512_mask_extracti32x8_epi32(__m256i __W, __mmask8 __U, __m512i __
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm512_mask_extracti32x8_epi32(__W, __U, __A, 1);
}
+TEST_CONSTEXPR(match_v8si(_mm512_mask_extracti32x8_epi32(((__m256i)(__v8si){100,101,102,103,104,105,106,107}), (__mmask8)0xAA,((__m512i)(__v16si){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}),1),100, 9, 102, 11, 104, 13, 106, 15));
__m256i test_mm512_maskz_extracti32x8_epi32(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_extracti32x8_epi32
@@ -1456,12 +1464,14 @@ __m256i test_mm512_maskz_extracti32x8_epi32(__mmask8 __U, __m512i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm512_maskz_extracti32x8_epi32(__U, __A, 1);
}
+TEST_CONSTEXPR(match_v8si(_mm512_maskz_extracti32x8_epi32((__mmask8)0x0F,((__m512i)(__v16si){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}),1),8, 9, 10, 11, 0, 0, 0, 0));
__m128i test_mm512_extracti64x2_epi64(__m512i __A) {
// CHECK-LABEL: test_mm512_extracti64x2_epi64
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <2 x i32> <i32 6, i32 7>
return _mm512_extracti64x2_epi64(__A, 3);
}
+TEST_CONSTEXPR(match_m128i(_mm512_extracti64x2_epi64(((__m512i)(__v8di){0ULL,1ULL,2ULL,3ULL, 4ULL,5ULL,6ULL,7ULL}), 3),6ULL, 7ULL));
__m128i test_mm512_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_extracti64x2_epi64
@@ -1469,6 +1479,7 @@ __m128i test_mm512_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m512i __
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm512_mask_extracti64x2_epi64(__W, __U, __A, 3);
}
+TEST_CONSTEXPR(match_m128i(_mm512_mask_extracti64x2_epi64(((__m128i)(__v2di){100ULL, 101ULL}), (__mmask8)0x1,((__m512i)(__v8di){0ULL,1ULL,2ULL,3ULL, 4ULL,5ULL,6ULL,7ULL}),3),6ULL, 101ULL));
__m128i test_mm512_maskz_extracti64x2_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_extracti64x2_epi64
@@ -1476,6 +1487,7 @@ __m128i test_mm512_maskz_extracti64x2_epi64(__mmask8 __U, __m512i __A) {
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm512_maskz_extracti64x2_epi64(__U, __A, 3);
}
+TEST_CONSTEXPR(match_m128i(_mm512_maskz_extracti64x2_epi64((__mmask8)0x2,((__m512i)(__v8di){0ULL,1ULL,2ULL,3ULL, 4ULL,5ULL,6ULL,7ULL}),3),0ULL, 7ULL));
__m512 test_mm512_insertf32x8(__m512 __A, __m256 __B) {
// CHECK-LABEL: test_mm512_insertf32x8
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 7756f0da18c03..0f572f6967812 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -2452,6 +2452,7 @@ __m256d test_mm512_extractf64x4_pd(__m512d a)
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm512_extractf64x4_pd(a, 1);
}
+TEST_CONSTEXPR(match_m256d(_mm512_extractf64x4_pd(((__m512d){0.0,1.0,2.0,3.0, 4.0,5.0,6.0,7.0}),1),4.0, 5.0, 6.0, 7.0));
__m256d test_mm512_mask_extractf64x4_pd(__m256d __W,__mmask8 __U,__m512d __A){
// CHECK-LABEL: test_mm512_mask_extractf64x4_pd
@@ -2459,6 +2460,7 @@ __m256d test_mm512_mask_extractf64x4_pd(__m256d __W,__mmask8 __U,__m512d __A){
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1);
}
+TEST_CONSTEXPR(match_m256d(_mm512_mask_extractf64x4_pd(((__m256d){100.0,101.0,102.0,103.0}), (__mmask8)0x5,((__m512d){0.0,1.0,2.0,3.0, 4.0,5.0,6.0,7.0}), 1), 4.0, 101.0, 6.0, 103.0));
__m256d test_mm512_maskz_extractf64x4_pd(__mmask8 __U,__m512d __A){
// CHECK-LABEL: test_mm512_maskz_extractf64x4_pd
@@ -2466,6 +2468,7 @@ __m256d test_mm512_maskz_extractf64x4_pd(__mmask8 __U,__m512d __A){
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm512_maskz_extractf64x4_pd( __U, __A, 1);
}
+TEST_CONSTEXPR(match_m256d(_mm512_maskz_extractf64x4_pd((__mmask8)0x3,((__m512d){0.0,1.0,2.0,3.0, 4.0,5.0,6.0,7.0}),1),4.0, 5.0, 0.0, 0.0));
__m128 test_mm512_extractf32x4_ps(__m512 a)
{
@@ -2473,6 +2476,7 @@ __m128 test_mm512_extractf32x4_ps(__m512 a)
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm512_extractf32x4_ps(a, 1);
}
+TEST_CONSTEXPR(match_m128(_mm512_extractf32x4_ps(((__m512){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}),1),4.0f, 5.0f, 6.0f, 7.0f));
__m128 test_mm512_mask_extractf32x4_ps(__m128 __W, __mmask8 __U,__m512 __A){
// CHECK-LABEL: test_mm512_mask_extractf32x4_ps
@@ -2480,6 +2484,7 @@ __m128 test_mm512_mask_extractf32x4_ps(__m128 __W, __mmask8 __U,__m512 __A){
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1);
}
+TEST_CONSTEXPR(match_m128(_mm512_mask_extractf32x4_ps(((__m128){100,101,102,103}),(__mmask8)0x5,((__m512){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}),1),4.0f, 101.0f, 6.0f, 103.0f));
__m128 test_mm512_maskz_extractf32x4_ps( __mmask8 __U,__m512 __A){
// CHECK-LABEL: test_mm512_maskz_extractf32x4_ps
@@ -2487,6 +2492,7 @@ __m128 test_mm512_maskz_extractf32x4_ps( __mmask8 __U,__m512 __A){
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm512_maskz_extractf32x4_ps(__U, __A, 1);
}
+TEST_CONSTEXPR(match_m128(_mm512_maskz_extractf32x4_ps((__mmask8)0x3,((__m512){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}),1),4.0f, 5.0f, 0.0f, 0.0f));
__mmask16 test_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
// CHECK-LABEL: test_mm512_cmpeq_epu32_mask
@@ -7357,6 +7363,7 @@ __m128i test_mm512_extracti32x4_epi32(__m512i __A) {
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
return _mm512_extracti32x4_epi32(__A, 3);
}
+TEST_CONSTEXPR(match_m128i(_mm512_extracti32x4_epi32(((__m512i)(__v16si){0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 3), 0x0000000D0000000CULL, 0x0000000F0000000EULL));
__m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_extracti32x4_epi32
@@ -7364,6 +7371,7 @@ __m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm512_mask_extracti32x4_epi32(__W, __U, __A, 3);
}
+TEST_CONSTEXPR(match_m128i(_mm512_mask_extracti32x4_epi32(((__m128i)(__v4si){100,101,102,103}), (__mmask8)0x5, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 3), 0x000000650000000CULL, 0x000000670000000EULL));
__m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_extracti32x4_epi32
@@ -7371,12 +7379,14 @@ __m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm512_maskz_extracti32x4_epi32(__U, __A, 3);
}
+TEST_CONSTEXPR(match_m128i(_mm512_maskz_extracti32x4_epi32((__mmask8)0x3, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 3), 0x0000000D0000000CULL, 0x0000000000000000ULL));
__m256i test_mm512_extracti64x4_epi64(__m512i __A) {
// CHECK-LABEL: test_mm512_extracti64x4_epi64
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm512_extracti64x4_epi64(__A, 1);
}
+TEST_CONSTEXPR(match_m256i(_mm512_extracti64x4_epi64(((__m512i)(__v8di){0,1,2,3,4,5,6,7}), 1), 4ULL, 5ULL, 6ULL, 7ULL));
__m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_extracti64x4_epi64
@@ -7384,6 +7394,7 @@ __m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm512_mask_extracti64x4_epi64(__W, __U, __A, 1);
}
+TEST_CONSTEXPR(match_m256i(_mm512_mask_extracti64x4_epi64(((__m256i)(__v4di){100ULL,101ULL,102ULL,103ULL}), (__mmask8)0x5, (((__m512i)(__v8di){0ULL,1ULL,2ULL,3ULL, 4ULL,5ULL,6ULL,7ULL})), 1), 4ULL, 101ULL, 6ULL, 103ULL));
__m256i test_mm512_maskz_extracti64x4_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_extracti64x4_epi64
@@ -7391,7 +7402,7 @@ __m256i test_mm512_maskz_extracti64x4_epi64(__mmask8 __U, __m512i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm512_maskz_extracti64x4_epi64(__U, __A, 1);
}
-
+TEST_CONSTEXPR(match_m256i(_mm512_maskz_extracti64x4_epi64((__mmask8)0x3, (((__m512i)(__v8di){0ULL,1ULL,2ULL,3ULL, 4ULL,5ULL,6ULL,7ULL})), 1), 4ULL, 5ULL, 0ULL, 0ULL));
__m512d test_mm512_insertf64x4(__m512d __A, __m256d __B) {
// CHECK-LABEL: test_mm512_insertf64x4
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 51385d57d2944..09687f3419732 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -9875,6 +9875,7 @@ __m128 test_mm256_extractf32x4_ps(__m256 __A) {
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm256_extractf32x4_ps(__A, 1);
}
+TEST_CONSTEXPR(match_m128(_mm256_extractf32x4_ps(((__m256){0,1,2,3, 4,5,6,7}), 1),4.0f, 5.0f, 6.0f, 7.0f));
__m128 test_mm256_mask_extractf32x4_ps(__m128 __W, __mmask8 __U, __m256 __A) {
// CHECK-LABEL: test_mm256_mask_extractf32x4_ps
@@ -9882,6 +9883,7 @@ __m128 test_mm256_mask_extractf32x4_ps(__m128 __W, __mmask8 __U, __m256 __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm256_mask_extractf32x4_ps(__W, __U, __A, 1);
}
+TEST_CONSTEXPR(match_m128(_mm256_mask_extractf32x4_ps((((__m128){100,101,102,103})), (__mmask8)0x5, (((__m256){0,1,2,3, 4,5,6,7})), 1), 4.0f, 101.0f, 6.0f, 103.0f));
__m128 test_mm256_maskz_extractf32x4_ps(__mmask8 __U, __m256 __A) {
// CHECK-LABEL: test_mm256_maskz_extractf32x4_ps
@@ -9889,12 +9891,14 @@ __m128 test_mm256_maskz_extractf32x4_ps(__mmask8 __U, __m256 __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm256_maskz_extractf32x4_ps(__U, __A, 1);
}
+TEST_CONSTEXPR(match_m128(_mm256_maskz_extractf32x4_ps((__mmask8)0x3, (((__m256){0,1,2,3, 4,5,6,7})), 1), 4.0f, 5.0f, 0.0f, 0.0f));
__m128i test_mm256_extracti32x4_epi32(__m256i __A) {
// CHECK-LABEL: test_mm256_extracti32x4_epi32
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm256_extracti32x4_epi32(__A, 1);
}
+TEST_CONSTEXPR(match_m128i(_mm256_extracti32x4_epi32((((__m256i)(__v8si){0,1,2,3, 4,5,6,7})), 1), 0x0000000500000004ULL, 0x0000000700000006ULL));
__m128i test_mm256_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_mask_extracti32x4_epi32
@@ -9902,6 +9906,7 @@ __m128i test_mm256_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m256i __
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm256_mask_extracti32x4_epi32(__W, __U, __A, 1);
}
+TEST_CONSTEXPR(match_m128i(_mm256_mask_extracti32x4_epi32((((__m128i)(__v4si){100,101,102,103})), (__mmask8)0xA, (((__m256i)(__v8si){0,1,2,3, 4,5,6,7})), 1),0x0000000500000064ULL, 0x0000000700000066ULL));
__m128i test_mm256_maskz_extracti32x4_epi32(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_maskz_extracti32x4_epi32
@@ -9909,6 +9914,7 @@ __m128i test_mm256_maskz_extracti32x4_epi32(__mmask8 __U, __m256i __A) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm256_maskz_extracti32x4_epi32(__U, __A, 1);
}
+TEST_CONSTEXPR(match_m128i(_mm256_maskz_extracti32x4_epi32((__mmask8)0x3, (((__m256i)(__v8si){0,1,2,3, 4,5,6,7})), 1), 0x0000000500000004ULL, 0x0000000000000000ULL));
__m256 test_mm256_insertf32x4(__m256 __A, __m128 __B) {
// CHECK-LABEL: test_mm256_insertf32x4
diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c
index 938845799acf5..9b2822a07153c 100644
--- a/clang/test/CodeGen/X86/avx512vldq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c
@@ -1083,6 +1083,7 @@ __m128d test_mm256_extractf64x2_pd(__m256d __A) {
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
return _mm256_extractf64x2_pd(__A, 1);
}
+TEST_CONSTEXPR(match_m128d(_mm256_extractf64x2_pd(((__m256d){0.0,1.0,2.0,3.0}), 1), 2.0, 3.0));
__m128d test_mm256_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m256d __A) {
// CHECK-LABEL: test_mm256_mask_extractf64x2_pd
@@ -1090,6 +1091,7 @@ __m128d test_mm256_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m256d __A)
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm256_mask_extractf64x2_pd(__W, __U, __A, 1);
}
+TEST_CONSTEXPR(match_m128d(_mm256_mask_extractf64x2_pd((((__m128d){100.0, 101.0})), (__mmask8)0x1, (((__m256d){0.0,1.0,2.0,3.0})),1), 2.0, 101.0));
__m128d test_mm256_maskz_extractf64x2_pd(__mmask8 __U, __m256d __A) {
// CHECK-LABEL: test_mm256_maskz_extractf64x2_pd
@@ -1097,12 +1099,14 @@ __m128d test_mm256_maskz_extractf64x2_pd(__mmask8 __U, __m256d __A) {
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm256_maskz_extractf64x2_pd(__U, __A, 1);
}
+TEST_CONSTEXPR(match_m128d(_mm256_maskz_extractf64x2_pd((__mmask8)0x2,(((__m256d){0.0,1.0,2.0,3.0})),1), 0.0, 3.0));
__m128i test_mm256_extracti64x2_epi64(__m256i __A) {
// CHECK-LABEL: test_mm256_extracti64x2_epi64
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
return _mm256_extracti64x2_epi64(__A, 1);
}
+TEST_CONSTEXPR(match_m128i(_mm256_extracti64x2_epi64(((__m256i){0ULL,1ULL,2ULL,3ULL}), 1), 2ULL, 3ULL));
__m128i test_mm256_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_mask_extracti64x2_epi64
@@ -1110,6 +1114,7 @@ __m128i test_mm256_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m256i __
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm256_mask_extracti64x2_epi64(__W, __U, __A, 1);
}
+TEST_CONSTEXPR(match_m128i(_mm256_mask_extracti64x2_epi64((((__m128i){100ULL, 101ULL})), (__mmask8)0x1, (((__m256i){0ULL,1ULL,2ULL,3ULL})), 1), 2ULL, 101ULL));
__m128i test_mm256_maskz_extracti64x2_epi64(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_maskz_extracti64x2_epi64
@@ -1117,6 +1122,7 @@ __m128i test_mm256_maskz_extracti64x2_epi64(__mmask8 __U, __m256i __A) {
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm256_maskz_extracti64x2_epi64(__U, __A, 1);
}
+TEST_CONSTEXPR(match_m128i(_mm256_maskz_extracti64x2_epi64((__mmask8)0x2, (((__m256i){0ULL,1ULL,2ULL,3ULL})),1), 0ULL, 3ULL));
__m256d test_mm256_insertf64x2(__m256d __A, __m128d __B) {
// CHECK-LABEL: test_mm256_insertf64x2
>From 454431b4f38b6eae0592cfd31a9fc0590e2dd5d0 Mon Sep 17 00:00:00 2001
From: SeongjaeP <psjj960507 at gmail.com>
Date: Wed, 15 Oct 2025 03:22:45 +0900
Subject: [PATCH 07/11] Move relevant AVX builtins to a Constexpr attribute
block
---
clang/include/clang/Basic/BuiltinsX86.td | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 1dc44314494b4..b967a888463de 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -479,7 +479,7 @@ let Features = "avx512f,vpclmulqdq", Attributes = [NoThrow, Const, RequiredVecto
def pclmulqdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant char)">;
}
-let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">;
def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">;
def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
@@ -489,9 +489,6 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
- def vextractf128_pd256 : X86Builtin<"_Vector<2, double>(_Vector<4, double>, _Constant int)">;
- def vextractf128_ps256 : X86Builtin<"_Vector<4, float>(_Vector<8, float>, _Constant int)">;
- def vextractf128_si256 : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int)">;
def cvtpd2ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, double>)">;
def cvtps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">;
def cvttpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
@@ -512,6 +509,9 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def blendps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
def blendvpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">;
def blendvps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">;
+ def vextractf128_pd256 : X86Builtin<"_Vector<2, double>(_Vector<4, double>, _Constant int)">;
+ def vextractf128_ps256 : X86Builtin<"_Vector<4, float>(_Vector<8, float>, _Constant int)">;
+ def vextractf128_si256 : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int)">;
def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">;
def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">;
def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">;
@@ -589,7 +589,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">;
}
-let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
@@ -620,7 +620,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
- def extract128i256 : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int)">;
}
@@ -677,6 +676,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def psrlv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psllv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
def psrlv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def extract128i256 : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int)">;
}
let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
>From 26cb15c4af2d53d0c4b1764fd5675064caf85879 Mon Sep 17 00:00:00 2001
From: SeongjaeP <psjj960507 at gmail.com>
Date: Wed, 15 Oct 2025 03:33:21 +0900
Subject: [PATCH 08/11] Refactor and remove unnecessary code
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 9 +--------
clang/lib/AST/ExprConstant.cpp | 20 ++++++--------------
2 files changed, 7 insertions(+), 22 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b64586364b680..53a2f46f6d4b8 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2877,9 +2877,6 @@ static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC,
unsigned SrcElems = Src.getNumElems();
unsigned DstElems = Dst.getNumElems();
- if (SrcElems == 0 || DstElems == 0 || (SrcElems % DstElems) != 0)
- return false;
-
unsigned NumLanes = SrcElems / DstElems;
unsigned Lane = static_cast<unsigned>(Index % NumLanes);
unsigned ExtractPos = Lane * DstElems;
@@ -2917,8 +2914,6 @@ static bool interp__builtin_x86_extract_vector_masked(InterpState &S, CodePtr Op
unsigned SrcElems = Src.getNumElems();
unsigned DstElems = Dst.getNumElems();
- if (!SrcElems || !DstElems || (SrcElems % DstElems) != 0)
- return false;
PrimType ElemT = Src.getFieldDesc()->getPrimType();
if (ElemT != Dst.getFieldDesc()->getPrimType() ||
@@ -2929,11 +2924,9 @@ static bool interp__builtin_x86_extract_vector_masked(InterpState &S, CodePtr Op
unsigned Lane = static_cast<unsigned>(ImmAPS.getZExtValue() % NumLanes);
unsigned Base = Lane * DstElems;
- uint64_t Mask = MaskAPS.getZExtValue();
-
TYPE_SWITCH(ElemT, {
for (unsigned I = 0; I != DstElems; ++I) {
- if ((Mask >> I) & 1)
+ if (MaskAPS[I])
Dst.elem<T>(I) = Src.elem<T>(Base + I);
else
Dst.elem<T>(I) = Merge.elem<T>(I);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 9799ddfb4d369..8762c375a280a 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11787,13 +11787,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
const auto *RetVT = E->getType()->castAs<VectorType>();
- if (!RetVT) return false;
-
unsigned RetLen = RetVT->getNumElements();
unsigned SrcLen = SourceVec.getVectorLength();
- if (SrcLen != RetLen * 2)
- return false;
-
unsigned Idx = SourceImm.getInt().getZExtValue() & 1;
SmallVector<APValue, 32> ResultElements;
@@ -11820,28 +11815,25 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APValue SourceVec, MergeVec;
APSInt Imm, MaskImm;
- if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
- !EvaluateInteger(E->getArg(1), Imm, Info) ||
- !EvaluateAsRValue(Info, E->getArg(2), MergeVec) ||
- !EvaluateInteger(E->getArg(3), MaskImm, Info))
- return false;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
+ !EvaluateInteger(E->getArg(1), Imm, Info) ||
+ !EvaluateAsRValue(Info, E->getArg(2), MergeVec) ||
+ !EvaluateInteger(E->getArg(3), MaskImm, Info))
+ return false;
const auto *RetVT = E->getType()->castAs<VectorType>();
unsigned RetLen = RetVT->getNumElements();
if (!SourceVec.isVector() || !MergeVec.isVector()) return false;
unsigned SrcLen = SourceVec.getVectorLength();
- if (!SrcLen || !RetLen || (SrcLen % RetLen) != 0) return false;
-
unsigned Lanes = SrcLen / RetLen;
unsigned Lane = static_cast<unsigned>(Imm.getZExtValue() % Lanes);
unsigned Base = Lane * RetLen;
- uint64_t Mask = MaskImm.getZExtValue();
SmallVector<APValue, 32> ResultElements;
ResultElements.reserve(RetLen);
for (unsigned I = 0; I < RetLen; ++I) {
- if ((Mask >> I) & 1)
+ if (MaskImm[I])
ResultElements.push_back(SourceVec.getVectorElt(Base + I));
else
ResultElements.push_back(MergeVec.getVectorElt(I));
>From aa54ae510744d2e931e79442061421c1e862a915 Mon Sep 17 00:00:00 2001
From: SeongjaeP <psjj960507 at gmail.com>
Date: Thu, 16 Oct 2025 11:21:55 +0900
Subject: [PATCH 09/11] Remove unnecessary code and move code for better
structure
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 53a2f46f6d4b8..2a1558c29dab5 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2882,8 +2882,6 @@ static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC,
unsigned ExtractPos = Lane * DstElems;
PrimType ElemT = Src.getFieldDesc()->getPrimType();
- if (ElemT != Dst.getFieldDesc()->getPrimType())
- return false;
TYPE_SWITCH(ElemT, {
for (unsigned I = 0; I != DstElems; ++I) {
@@ -2915,15 +2913,12 @@ static bool interp__builtin_x86_extract_vector_masked(InterpState &S, CodePtr Op
unsigned SrcElems = Src.getNumElems();
unsigned DstElems = Dst.getNumElems();
- PrimType ElemT = Src.getFieldDesc()->getPrimType();
- if (ElemT != Dst.getFieldDesc()->getPrimType() ||
- ElemT != Merge.getFieldDesc()->getPrimType())
- return false;
-
unsigned NumLanes = SrcElems / DstElems;
unsigned Lane = static_cast<unsigned>(ImmAPS.getZExtValue() % NumLanes);
unsigned Base = Lane * DstElems;
+ PrimType ElemT = Src.getFieldDesc()->getPrimType();
+
TYPE_SWITCH(ElemT, {
for (unsigned I = 0; I != DstElems; ++I) {
if (MaskAPS[I])
>From 656c74fdd66357198e031c1dd1b3115032b3ce22 Mon Sep 17 00:00:00 2001
From: SeongjaeP <psjj960507 at gmail.com>
Date: Thu, 16 Oct 2025 14:25:05 +0900
Subject: [PATCH 10/11] Refactor: apply clang-format to recent changes
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 20 +++++++-------
clang/lib/AST/ExprConstant.cpp | 33 ++++++++++++-----------
clang/lib/Headers/avx512dqintrin.h | 34 +++++++++++-------------
clang/lib/Headers/avx512fintrin.h | 30 ++++++++++-----------
clang/lib/Headers/avx512vldqintrin.h | 18 ++++++-------
clang/lib/Headers/avx512vlintrin.h | 18 ++++++-------
6 files changed, 75 insertions(+), 78 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 2a1558c29dab5..8df9a62ae42fc 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2859,8 +2859,8 @@ static bool interp__builtin_elementwise_triop(
}
static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC,
- const CallExpr *Call,
- unsigned ID) {
+ const CallExpr *Call,
+ unsigned ID) {
assert(Call->getNumArgs() == 2);
APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
@@ -2893,7 +2893,8 @@ static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC,
return true;
}
-static bool interp__builtin_x86_extract_vector_masked(InterpState &S, CodePtr OpPC,
+static bool interp__builtin_x86_extract_vector_masked(InterpState &S,
+ CodePtr OpPC,
const CallExpr *Call,
unsigned ID) {
assert(Call->getNumArgs() == 4);
@@ -2903,7 +2904,8 @@ static bool interp__builtin_x86_extract_vector_masked(InterpState &S, CodePtr Op
APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
const Pointer &Src = S.Stk.pop<Pointer>();
- if (!Src.getFieldDesc()->isPrimitiveArray() || !Merge.getFieldDesc()->isPrimitiveArray())
+ if (!Src.getFieldDesc()->isPrimitiveArray() ||
+ !Merge.getFieldDesc()->isPrimitiveArray())
return false;
const Pointer &Dst = S.Stk.peek<Pointer>();
@@ -2924,7 +2926,7 @@ static bool interp__builtin_x86_extract_vector_masked(InterpState &S, CodePtr Op
if (MaskAPS[I])
Dst.elem<T>(I) = Src.elem<T>(Base + I);
else
- Dst.elem<T>(I) = Merge.elem<T>(I);
+ Dst.elem<T>(I) = Merge.elem<T>(I);
}
});
@@ -3564,10 +3566,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) {
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});
- case X86::BI__builtin_ia32_extract128i256:
- case X86::BI__builtin_ia32_vextractf128_pd256:
- case X86::BI__builtin_ia32_vextractf128_ps256:
- case X86::BI__builtin_ia32_vextractf128_si256:
+ case X86::BI__builtin_ia32_extract128i256:
+ case X86::BI__builtin_ia32_vextractf128_pd256:
+ case X86::BI__builtin_ia32_vextractf128_ps256:
+ case X86::BI__builtin_ia32_vextractf128_si256:
return interp__builtin_x86_extract_vector(S, OpPC, Call, BuiltinID);
case X86::BI__builtin_ia32_extractf32x4_256_mask:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 8762c375a280a..6dafa2964aaa0 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11773,8 +11773,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return EvaluateBinOpExpr([](const APSInt &LHS, const APSInt &RHS) {
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});
-
- case X86::BI__builtin_ia32_extract128i256:
+
+ case X86::BI__builtin_ia32_extract128i256:
case X86::BI__builtin_ia32_vextractf128_pd256:
case X86::BI__builtin_ia32_vextractf128_ps256:
case X86::BI__builtin_ia32_vextractf128_si256: {
@@ -11782,7 +11782,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceImm))
return false;
-
+
if (!SourceVec.isVector())
return false;
@@ -11790,28 +11790,28 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
unsigned RetLen = RetVT->getNumElements();
unsigned SrcLen = SourceVec.getVectorLength();
unsigned Idx = SourceImm.getInt().getZExtValue() & 1;
-
+
SmallVector<APValue, 32> ResultElements;
ResultElements.reserve(RetLen);
for (unsigned I = 0; I < RetLen; I++)
ResultElements.push_back(SourceVec.getVectorElt(Idx * RetLen + I));
-
+
return Success(APValue(ResultElements.data(), RetLen), E);
}
- case X86::BI__builtin_ia32_extracti32x4_256_mask:
+ case X86::BI__builtin_ia32_extracti32x4_256_mask:
case X86::BI__builtin_ia32_extractf32x4_256_mask:
- case X86::BI__builtin_ia32_extracti32x4_mask:
- case X86::BI__builtin_ia32_extractf32x4_mask:
- case X86::BI__builtin_ia32_extracti32x8_mask:
- case X86::BI__builtin_ia32_extractf32x8_mask:
- case X86::BI__builtin_ia32_extracti64x2_256_mask:
- case X86::BI__builtin_ia32_extractf64x2_256_mask:
- case X86::BI__builtin_ia32_extracti64x2_512_mask:
+ case X86::BI__builtin_ia32_extracti32x4_mask:
+ case X86::BI__builtin_ia32_extractf32x4_mask:
+ case X86::BI__builtin_ia32_extracti32x8_mask:
+ case X86::BI__builtin_ia32_extractf32x8_mask:
+ case X86::BI__builtin_ia32_extracti64x2_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_256_mask:
+ case X86::BI__builtin_ia32_extracti64x2_512_mask:
case X86::BI__builtin_ia32_extractf64x2_512_mask:
case X86::BI__builtin_ia32_extracti64x4_mask:
- case X86::BI__builtin_ia32_extractf64x4_mask:{
+ case X86::BI__builtin_ia32_extractf64x4_mask: {
APValue SourceVec, MergeVec;
APSInt Imm, MaskImm;
@@ -11824,7 +11824,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
const auto *RetVT = E->getType()->castAs<VectorType>();
unsigned RetLen = RetVT->getNumElements();
- if (!SourceVec.isVector() || !MergeVec.isVector()) return false;
+ if (!SourceVec.isVector() || !MergeVec.isVector())
+ return false;
unsigned SrcLen = SourceVec.getVectorLength();
unsigned Lanes = SrcLen / RetLen;
unsigned Lane = static_cast<unsigned>(Imm.getZExtValue() % Lanes);
@@ -11836,7 +11837,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (MaskImm[I])
ResultElements.push_back(SourceVec.getVectorElt(Base + I));
else
- ResultElements.push_back(MergeVec.getVectorElt(I));
+ ResultElements.push_back(MergeVec.getVectorElt(I));
}
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index 953285d6ab414..2615cd95c8c07 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -1212,10 +1212,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v8di)_mm512_setzero_si512());
}
-#define _mm512_extractf32x8_ps(A, imm) \
- ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
- (__v8sf)_mm_setzero_pd(), \
- (__mmask8)-1))
+#define _mm512_extractf32x8_ps(A, imm) \
+ ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v8sf)_mm_setzero_pd(), \
+ (__mmask8) - 1))
#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
@@ -1227,11 +1227,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v8sf)_mm256_setzero_ps(), \
(__mmask8)(U)))
-#define _mm512_extractf64x2_pd(A, imm) \
- ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
- (int)(imm), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1))
+#define _mm512_extractf64x2_pd(A, imm) \
+ ((__m128d)__builtin_ia32_extractf64x2_512_mask( \
+ (__v8df)(__m512d)(A), (int)(imm), (__v2df)_mm_setzero_pd(), \
+ (__mmask8) - 1))
#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
@@ -1245,10 +1244,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v2df)_mm_setzero_pd(), \
(__mmask8)(U)))
-#define _mm512_extracti32x8_epi32(A, imm) \
- ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)-1))
+#define _mm512_extracti32x8_epi32(A, imm) \
+ ((__m256i)__builtin_ia32_extracti32x8_mask( \
+ (__v16si)(__m512i)(A), (int)(imm), (__v8si)_mm256_setzero_si256(), \
+ (__mmask8) - 1))
#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
@@ -1260,11 +1259,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v8si)_mm256_setzero_si256(), \
(__mmask8)(U)))
-#define _mm512_extracti64x2_epi64(A, imm) \
- ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
- (int)(imm), \
- (__v2di)_mm_setzero_si128(), \
- (__mmask8)-1))
+#define _mm512_extracti64x2_epi64(A, imm) \
+ ((__m128i)__builtin_ia32_extracti64x2_512_mask( \
+ (__v8di)(__m512i)(A), (int)(imm), (__v2di)_mm_setzero_si128(), \
+ (__mmask8) - 1))
#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 2768a5bae887d..13370aaef9316 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -3164,10 +3164,10 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
(__v16si)_mm512_setzero_si512()))
/* Vector Extract */
-#define _mm512_extractf64x4_pd(A, I) \
- ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)-1))
+#define _mm512_extractf64x4_pd(A, I) \
+ ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
+ (__v4df)_mm256_setzero_pd(), \
+ (__mmask8) - 1))
#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
@@ -3179,10 +3179,10 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
(__v4df)_mm256_setzero_pd(), \
(__mmask8)(U)))
-#define _mm512_extractf32x4_ps(A, I) \
- ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1))
+#define _mm512_extractf32x4_ps(A, I) \
+ ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8) - 1))
#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
@@ -7105,10 +7105,10 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
__builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
}
-#define _mm512_extracti32x4_epi32(A, imm) \
- ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v4si)_mm_setzero_si128(), \
- (__mmask8)-1))
+#define _mm512_extracti32x4_epi32(A, imm) \
+ ((__m128i)__builtin_ia32_extracti32x4_mask( \
+ (__v16si)(__m512i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \
+ (__mmask8) - 1))
#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
@@ -7120,10 +7120,10 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
(__v4si)_mm_setzero_si128(), \
(__mmask8)(U)))
-#define _mm512_extracti64x4_epi64(A, imm) \
+#define _mm512_extracti64x4_epi64(A, imm) \
((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
- (__v4di)_mm256_setzero_si256(), \
- (__mmask8)-1))
+ (__v4di)_mm256_setzero_si256(), \
+ (__mmask8) - 1))
#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h
index 2d3c4b551e3b0..8aded1c4774eb 100644
--- a/clang/lib/Headers/avx512vldqintrin.h
+++ b/clang/lib/Headers/avx512vldqintrin.h
@@ -1072,11 +1072,10 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
(__v4di)_mm256_setzero_si256());
}
-#define _mm256_extractf64x2_pd(A, imm) \
- ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
- (int)(imm), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1))
+#define _mm256_extractf64x2_pd(A, imm) \
+ ((__m128d)__builtin_ia32_extractf64x2_256_mask( \
+ (__v4df)(__m256d)(A), (int)(imm), (__v2df)_mm_setzero_pd(), \
+ (__mmask8) - 1))
#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
@@ -1090,11 +1089,10 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
(__v2df)_mm_setzero_pd(), \
(__mmask8)(U)))
-#define _mm256_extracti64x2_epi64(A, imm) \
- ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
- (int)(imm), \
- (__v2di)_mm_setzero_si128(), \
- (__mmask8)-1))
+#define _mm256_extracti64x2_epi64(A, imm) \
+ ((__m128i)__builtin_ia32_extracti64x2_256_mask( \
+ (__v4di)(__m256i)(A), (int)(imm), (__v2di)_mm_setzero_si128(), \
+ (__mmask8) - 1))
#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 252fb111988b0..eefeb1dad89eb 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -7606,11 +7606,10 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
__builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
}
-#define _mm256_extractf32x4_ps(A, imm) \
- ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
- (int)(imm), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1))
+#define _mm256_extractf32x4_ps(A, imm) \
+ ((__m128)__builtin_ia32_extractf32x4_256_mask( \
+ (__v8sf)(__m256)(A), (int)(imm), (__v4sf)_mm_setzero_ps(), \
+ (__mmask8) - 1))
#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
@@ -7624,11 +7623,10 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U)))
-#define _mm256_extracti32x4_epi32(A, imm) \
- ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
- (int)(imm), \
- (__v4si)_mm_setzero_si128(), \
- (__mmask8)-1))
+#define _mm256_extracti32x4_epi32(A, imm) \
+ ((__m128i)__builtin_ia32_extracti32x4_256_mask( \
+ (__v8si)(__m256i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \
+ (__mmask8) - 1))
#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
>From 9bb4796c470d968b20bbfa679e4c6e9bc61a0618 Mon Sep 17 00:00:00 2001
From: SeongjaeP <psjj960507 at gmail.com>
Date: Fri, 17 Oct 2025 11:39:06 +0900
Subject: [PATCH 11/11] Remove unused code
---
clang/lib/AST/ExprConstant.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 6dafa2964aaa0..7da4747999ee1 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11788,7 +11788,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
const auto *RetVT = E->getType()->castAs<VectorType>();
unsigned RetLen = RetVT->getNumElements();
- unsigned SrcLen = SourceVec.getVectorLength();
unsigned Idx = SourceImm.getInt().getZExtValue() & 1;
SmallVector<APValue, 32> ResultElements;
More information about the cfe-commits
mailing list