[clang] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow AVX/AVX512 subvector extraction intrinsics to be used in constexpr #157712 (PR #162836)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Oct 10 05:15:32 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: SeongJaePark (SeongjaeP)
<details>
<summary>Changes</summary>
**This PR supersedes and replaces PR #<!-- -->158853**
The original branch diverged too far from the main branch, resulting in significant merge conflicts that were difficult to resolve cleanly. To provide a clean and reviewable history, this new PR was created by cherry-picking the necessary commits onto a fresh branch based on the latest `main`.
---
*(Original Description)*
This patch enables the use of AVX/AVX512 subvector extraction intrinsics within `constexpr` functions. This is achieved by implementing the evaluation logic for these intrinsics in `VectorExprEvaluator::VisitCallExpr` and `InterpretBuiltin`.
The original discussion and review comments can be found in the previous pull request for context: #<!-- -->158853
---
Patch is 40.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162836.diff
13 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsX86.td (+7-7)
- (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+105)
- (modified) clang/lib/AST/ExprConstant.cpp (+75)
- (modified) clang/lib/Headers/avx512dqintrin.h (+4-4)
- (modified) clang/lib/Headers/avx512fintrin.h (+4-4)
- (modified) clang/lib/Headers/avx512vldqintrin.h (+2-2)
- (modified) clang/lib/Headers/avx512vlintrin.h (+2-2)
- (modified) clang/test/CodeGen/X86/avx-builtins.c (+7-1)
- (modified) clang/test/CodeGen/X86/avx2-builtins.c (+1)
- (modified) clang/test/CodeGen/X86/avx512dq-builtins.c (+12)
- (modified) clang/test/CodeGen/X86/avx512f-builtins.c (+12-1)
- (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+6)
- (modified) clang/test/CodeGen/X86/avx512vldq-builtins.c (+6)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 217589d7add1d..b3b4d5e076fd8 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -471,7 +471,7 @@ let Features = "avx512f,vpclmulqdq", Attributes = [NoThrow, Const, RequiredVecto
def pclmulqdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant char)">;
}
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">;
def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">;
def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
@@ -576,7 +576,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">;
}
-let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
@@ -1065,7 +1065,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
def alignq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def extractf64x4_mask : X86Builtin<"_Vector<4, double>(_Vector<8, double>, _Constant int, _Vector<4, double>, unsigned char)">;
def extractf32x4_mask : X86Builtin<"_Vector<4, float>(_Vector<16, float>, _Constant int, _Vector<4, float>, unsigned char)">;
}
@@ -2944,24 +2944,24 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
def pmovqw256mem_mask : X86Builtin<"void(_Vector<8, short *>, _Vector<4, long long int>, unsigned char)">;
}
-let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def extractf32x8_mask : X86Builtin<"_Vector<8, float>(_Vector<16, float>, _Constant int, _Vector<8, float>, unsigned char)">;
def extractf64x2_512_mask : X86Builtin<"_Vector<2, double>(_Vector<8, double>, _Constant int, _Vector<2, double>, unsigned char)">;
def extracti32x8_mask : X86Builtin<"_Vector<8, int>(_Vector<16, int>, _Constant int, _Vector<8, int>, unsigned char)">;
def extracti64x2_512_mask : X86Builtin<"_Vector<2, long long int>(_Vector<8, long long int>, _Constant int, _Vector<2, long long int>, unsigned char)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def extracti32x4_mask : X86Builtin<"_Vector<4, int>(_Vector<16, int>, _Constant int, _Vector<4, int>, unsigned char)">;
def extracti64x4_mask : X86Builtin<"_Vector<4, long long int>(_Vector<8, long long int>, _Constant int, _Vector<4, long long int>, unsigned char)">;
}
-let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def extractf64x2_256_mask : X86Builtin<"_Vector<2, double>(_Vector<4, double>, _Constant int, _Vector<2, double>, unsigned char)">;
def extracti64x2_256_mask : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int, _Vector<2, long long int>, unsigned char)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def extractf32x4_256_mask : X86Builtin<"_Vector<4, float>(_Vector<8, float>, _Constant int, _Vector<4, float>, unsigned char)">;
def extracti32x4_256_mask : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int, _Vector<4, int>, unsigned char)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 922d67940e22f..d844f93d1b983 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2819,6 +2819,92 @@ static bool interp__builtin_elementwise_triop(
return true;
}
+static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call,
+ unsigned ID) {
+ assert(Call->getNumArgs() == 2);
+
+ APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
+ uint64_t Index = ImmAPS.getZExtValue();
+
+ const Pointer &Src = S.Stk.pop<Pointer>();
+ if (!Src.getFieldDesc()->isPrimitiveArray())
+ return false;
+
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ if (!Dst.getFieldDesc()->isPrimitiveArray())
+ return false;
+
+ unsigned SrcElems = Src.getNumElems();
+ unsigned DstElems = Dst.getNumElems();
+
+ if (SrcElems == 0 || DstElems == 0 || (SrcElems % DstElems) != 0)
+ return false;
+
+ unsigned NumLanes = SrcElems / DstElems;
+ unsigned Lane = static_cast<unsigned>(Index % NumLanes);
+ unsigned ExtractPos = Lane * DstElems;
+
+ PrimType ElemT = Src.getFieldDesc()->getPrimType();
+ if (ElemT != Dst.getFieldDesc()->getPrimType())
+ return false;
+
+ TYPE_SWITCH(ElemT, {
+ for (unsigned I = 0; I != DstElems; ++I) {
+ Dst.elem<T>(I) = Src.elem<T>(ExtractPos + I);
+ }
+ });
+
+ Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp__builtin_x86_extract_vector_masked(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call,
+ unsigned ID) {
+ assert(Call->getNumArgs() == 4);
+
+ APSInt MaskAPS = popToAPSInt(S, Call->getArg(3));
+ const Pointer &Merge = S.Stk.pop<Pointer>();
+ APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
+ const Pointer &Src = S.Stk.pop<Pointer>();
+
+ if (!Src.getFieldDesc()->isPrimitiveArray() || !Merge.getFieldDesc()->isPrimitiveArray())
+ return false;
+
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ if (!Dst.getFieldDesc()->isPrimitiveArray())
+ return false;
+
+ unsigned SrcElems = Src.getNumElems();
+ unsigned DstElems = Dst.getNumElems();
+ if (!SrcElems || !DstElems || (SrcElems % DstElems) != 0)
+ return false;
+
+ PrimType ElemT = Src.getFieldDesc()->getPrimType();
+ if (ElemT != Dst.getFieldDesc()->getPrimType() ||
+ ElemT != Merge.getFieldDesc()->getPrimType())
+ return false;
+
+ unsigned NumLanes = SrcElems / DstElems;
+ unsigned Lane = static_cast<unsigned>(ImmAPS.getZExtValue() % NumLanes);
+ unsigned Base = Lane * DstElems;
+
+ uint64_t Mask = MaskAPS.getZExtValue();
+
+ TYPE_SWITCH(ElemT, {
+ for (unsigned I = 0; I != DstElems; ++I) {
+ if ((Mask >> I) & 1)
+ Dst.elem<T>(I) = Src.elem<T>(Base + I);
+ else
+ Dst.elem<T>(I) = Merge.elem<T>(I);
+ }
+ });
+
+ Dst.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
const CallExpr *Call,
unsigned ID) {
@@ -3451,6 +3537,25 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) {
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});
+ case X86::BI__builtin_ia32_extract128i256:
+ case X86::BI__builtin_ia32_vextractf128_pd256:
+ case X86::BI__builtin_ia32_vextractf128_ps256:
+ case X86::BI__builtin_ia32_vextractf128_si256:
+ return interp__builtin_x86_extract_vector(S, OpPC, Call, BuiltinID);
+
+ case X86::BI__builtin_ia32_extractf32x4_256_mask:
+ case X86::BI__builtin_ia32_extractf32x4_mask:
+ case X86::BI__builtin_ia32_extractf32x8_mask:
+ case X86::BI__builtin_ia32_extractf64x2_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_512_mask:
+ case X86::BI__builtin_ia32_extractf64x4_mask:
+ case X86::BI__builtin_ia32_extracti32x4_256_mask:
+ case X86::BI__builtin_ia32_extracti32x4_mask:
+ case X86::BI__builtin_ia32_extracti32x8_mask:
+ case X86::BI__builtin_ia32_extracti64x2_256_mask:
+ case X86::BI__builtin_ia32_extracti64x2_512_mask:
+ case X86::BI__builtin_ia32_extracti64x4_mask:
+ return interp__builtin_x86_extract_vector_masked(S, OpPC, Call, BuiltinID);
case clang::X86::BI__builtin_ia32_pavgb128:
case clang::X86::BI__builtin_ia32_pavgw128:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 35a866ea5010f..5ac7837aa0fe3 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11769,6 +11769,81 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return EvaluateBinOpExpr([](const APSInt &LHS, const APSInt &RHS) {
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});
+
+ case X86::BI__builtin_ia32_extract128i256:
+ case X86::BI__builtin_ia32_vextractf128_pd256:
+ case X86::BI__builtin_ia32_vextractf128_ps256:
+ case X86::BI__builtin_ia32_vextractf128_si256: {
+ APValue SourceVec, SourceImm;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceImm))
+ return false;
+
+ if (!SourceVec.isVector())
+ return false;
+
+ const auto *RetVT = E->getType()->castAs<VectorType>();
+ if (!RetVT) return false;
+
+ unsigned RetLen = RetVT->getNumElements();
+ unsigned SrcLen = SourceVec.getVectorLength();
+ if (SrcLen != RetLen * 2)
+ return false;
+
+ unsigned Idx = SourceImm.getInt().getZExtValue() & 1;
+
+ SmallVector<APValue, 32> ResultElements;
+ ResultElements.reserve(RetLen);
+
+ for (unsigned I = 0; I < RetLen; I++)
+ ResultElements.push_back(SourceVec.getVectorElt(Idx * RetLen + I));
+
+ return Success(APValue(ResultElements.data(), RetLen), E);
+ }
+
+ case X86::BI__builtin_ia32_extracti32x4_256_mask:
+ case X86::BI__builtin_ia32_extractf32x4_256_mask:
+ case X86::BI__builtin_ia32_extracti32x4_mask:
+ case X86::BI__builtin_ia32_extractf32x4_mask:
+ case X86::BI__builtin_ia32_extracti32x8_mask:
+ case X86::BI__builtin_ia32_extractf32x8_mask:
+ case X86::BI__builtin_ia32_extracti64x2_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_256_mask:
+ case X86::BI__builtin_ia32_extracti64x2_512_mask:
+ case X86::BI__builtin_ia32_extractf64x2_512_mask:
+ case X86::BI__builtin_ia32_extracti64x4_mask:
+ case X86::BI__builtin_ia32_extractf64x4_mask:{
+ APValue SourceVec, MergeVec;
+ APSInt Imm, MaskImm;
+
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
+ !EvaluateInteger(E->getArg(1), Imm, Info) ||
+ !EvaluateAsRValue(Info, E->getArg(2), MergeVec) ||
+ !EvaluateInteger(E->getArg(3), MaskImm, Info))
+ return false;
+
+ const auto *RetVT = E->getType()->castAs<VectorType>();
+ unsigned RetLen = RetVT->getNumElements();
+
+ if (!SourceVec.isVector() || !MergeVec.isVector()) return false;
+ unsigned SrcLen = SourceVec.getVectorLength();
+ if (!SrcLen || !RetLen || (SrcLen % RetLen) != 0) return false;
+
+ unsigned Lanes = SrcLen / RetLen;
+ unsigned Lane = static_cast<unsigned>(Imm.getZExtValue() % Lanes);
+ unsigned Base = Lane * RetLen;
+ uint64_t Mask = MaskImm.getZExtValue();
+
+ SmallVector<APValue, 32> ResultElements;
+ ResultElements.reserve(RetLen);
+ for (unsigned I = 0; I < RetLen; ++I) {
+ if ((Mask >> I) & 1)
+ ResultElements.push_back(SourceVec.getVectorElt(Base + I));
+ else
+ ResultElements.push_back(MergeVec.getVectorElt(I));
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case clang::X86::BI__builtin_ia32_pavgb128:
case clang::X86::BI__builtin_ia32_pavgw128:
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index fb65bf933b8ad..953285d6ab414 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -1214,7 +1214,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_extractf32x8_ps(A, imm) \
((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
- (__v8sf)_mm256_undefined_ps(), \
+ (__v8sf)_mm_setzero_pd(), \
(__mmask8)-1))
#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
@@ -1230,7 +1230,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_extractf64x2_pd(A, imm) \
((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
(int)(imm), \
- (__v2df)_mm_undefined_pd(), \
+ (__v2df)_mm_setzero_pd(), \
(__mmask8)-1))
#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
@@ -1247,7 +1247,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_extracti32x8_epi32(A, imm) \
((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v8si)_mm256_undefined_si256(), \
+ (__v8si)_mm256_setzero_si256(), \
(__mmask8)-1))
#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
@@ -1263,7 +1263,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_extracti64x2_epi64(A, imm) \
((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
(int)(imm), \
- (__v2di)_mm_undefined_si128(), \
+ (__v2di)_mm_setzero_si128(), \
(__mmask8)-1))
#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 80e58425cdd71..2768a5bae887d 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -3166,7 +3166,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
#define _mm512_extractf64x4_pd(A, I) \
((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
- (__v4df)_mm256_undefined_pd(), \
+ (__v4df)_mm256_setzero_pd(), \
(__mmask8)-1))
#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
@@ -3181,7 +3181,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
#define _mm512_extractf32x4_ps(A, I) \
((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
- (__v4sf)_mm_undefined_ps(), \
+ (__v4sf)_mm_setzero_ps(), \
(__mmask8)-1))
#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
@@ -7107,7 +7107,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
#define _mm512_extracti32x4_epi32(A, imm) \
((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v4si)_mm_undefined_si128(), \
+ (__v4si)_mm_setzero_si128(), \
(__mmask8)-1))
#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
@@ -7122,7 +7122,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
#define _mm512_extracti64x4_epi64(A, imm) \
((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
- (__v4di)_mm256_undefined_si256(), \
+ (__v4di)_mm256_setzero_si256(), \
(__mmask8)-1))
#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h
index 68bd52e43981a..2d3c4b551e3b0 100644
--- a/clang/lib/Headers/avx512vldqintrin.h
+++ b/clang/lib/Headers/avx512vldqintrin.h
@@ -1075,7 +1075,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
#define _mm256_extractf64x2_pd(A, imm) \
((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
(int)(imm), \
- (__v2df)_mm_undefined_pd(), \
+ (__v2df)_mm_setzero_pd(), \
(__mmask8)-1))
#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
@@ -1093,7 +1093,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
#define _mm256_extracti64x2_epi64(A, imm) \
((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
(int)(imm), \
- (__v2di)_mm_undefined_si128(), \
+ (__v2di)_mm_setzero_si128(), \
(__mmask8)-1))
#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 965741f0ff944..252fb111988b0 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -7609,7 +7609,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
#define _mm256_extractf32x4_ps(A, imm) \
((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
(int)(imm), \
- (__v4sf)_mm_undefined_ps(), \
+ (__v4sf)_mm_setzero_ps(), \
(__mmask8)-1))
#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
@@ -7627,7 +7627,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
#define _mm256_extracti32x4_epi32(A, imm) \
((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
(int)(imm), \
- (__v4si)_mm_undefined_si128(), \
+ (__v4si)_mm_setzero_si128(), \
(__mmask8)-1))
#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 5f08b6be81ab7..11ed8498b8ecd 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1070,19 +1070,25 @@ __m128d test_mm256_extractf128_pd(__m256d A) {
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
return _mm256_extractf128_pd(A, 1);
}
+TEST_CONSTEXPR(match_m128d(_mm256_extractf128_pd(((__m256d){0.0, 1.0, 2.0, 3.0}), 1),
+ 2.0, 3.0));
__m128 test_mm256_extractf128_ps(__m256 A) {
// CHECK-LABEL: test_mm256_extractf128_ps
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/162836
More information about the cfe-commits
mailing list