[clang] [llvm] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow AVX/AVX512 subvector extraction intrinsics to be used in constexpr #157712 (PR #158853)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 9 01:40:17 PDT 2025
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff origin/main HEAD --extensions cpp,c,h -- clang/lib/AST/ByteCode/InterpBuiltin.cpp clang/lib/AST/ExprConstant.cpp clang/lib/Headers/avx512dqintrin.h clang/lib/Headers/avx512fintrin.h clang/lib/Headers/avx512vldqintrin.h clang/lib/Headers/avx512vlintrin.h clang/test/CodeGen/X86/avx-builtins.c clang/test/CodeGen/X86/avx2-builtins.c clang/test/CodeGen/X86/avx512dq-builtins.c clang/test/CodeGen/X86/avx512f-builtins.c clang/test/CodeGen/X86/avx512vl-builtins.c clang/test/CodeGen/X86/avx512vldq-builtins.c
``````````
:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ff51166f6..0fa7fa3da 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -736,7 +736,6 @@ static bool interp__builtin_expect(InterpState &S, CodePtr OpPC,
return true;
}
-
/// rotateleft(value, amount)
static bool interp__builtin_rotate(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
@@ -2839,8 +2838,8 @@ static bool interp__builtin_elementwise_triop(
//_builtin_extract
static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC,
- const CallExpr *Call,
- unsigned ID) {
+ const CallExpr *Call,
+ unsigned ID) {
assert(Call->getNumArgs() == 2);
APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
@@ -2878,7 +2877,8 @@ static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC,
return true;
}
-static bool interp__builtin_x86_extract_vector_masked(InterpState &S, CodePtr OpPC,
+static bool interp__builtin_x86_extract_vector_masked(InterpState &S,
+ CodePtr OpPC,
const CallExpr *Call,
unsigned ID) {
assert(Call->getNumArgs() == 4);
@@ -2888,7 +2888,8 @@ static bool interp__builtin_x86_extract_vector_masked(InterpState &S, CodePtr Op
APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
const Pointer &Src = S.Stk.pop<Pointer>();
- if (!Src.getFieldDesc()->isPrimitiveArray() || !Merge.getFieldDesc()->isPrimitiveArray())
+ if (!Src.getFieldDesc()->isPrimitiveArray() ||
+ !Merge.getFieldDesc()->isPrimitiveArray())
return false;
const Pointer &Dst = S.Stk.peek<Pointer>();
@@ -2916,7 +2917,7 @@ static bool interp__builtin_x86_extract_vector_masked(InterpState &S, CodePtr Op
if ((Mask >> I) & 1)
Dst.elem<T>(I) = Src.elem<T>(Base + I);
else
- Dst.elem<T>(I) = Merge.elem<T>(I);
+ Dst.elem<T>(I) = Merge.elem<T>(I);
}
});
@@ -2924,7 +2925,6 @@ static bool interp__builtin_x86_extract_vector_masked(InterpState &S, CodePtr Op
return true;
}
-
static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
const CallExpr *Call,
unsigned ID) {
@@ -3444,13 +3444,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});
- case X86::BI__builtin_ia32_extract128i256: // _mm256_extracti128
- case X86::BI__builtin_ia32_vextractf128_pd256: // _mm256_extractf128_ps
- case X86::BI__builtin_ia32_vextractf128_ps256: // _mm256_extractf128_pd
- case X86::BI__builtin_ia32_vextractf128_si256: // _mm256_extracti128_si256
+ case X86::BI__builtin_ia32_extract128i256: // _mm256_extracti128
+ case X86::BI__builtin_ia32_vextractf128_pd256: // _mm256_extractf128_ps
+ case X86::BI__builtin_ia32_vextractf128_ps256: // _mm256_extractf128_pd
+ case X86::BI__builtin_ia32_vextractf128_si256: // _mm256_extracti128_si256
return interp__builtin_x86_extract_vector(S, OpPC, Call, BuiltinID);
- // AVX-512 / AVX-512VL / AVX-512DQ
+ // AVX-512 / AVX-512VL / AVX-512DQ
case X86::BI__builtin_ia32_extractf32x4_256_mask:
case X86::BI__builtin_ia32_extractf32x4_mask:
case X86::BI__builtin_ia32_extractf32x8_mask:
@@ -3465,7 +3465,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_extracti64x4_mask:
return interp__builtin_x86_extract_vector_masked(S, OpPC, Call, BuiltinID);
-
case clang::X86::BI__builtin_ia32_pavgb128:
case clang::X86::BI__builtin_ia32_pavgw128:
case clang::X86::BI__builtin_ia32_pavgb256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 281f9a360..510011d0f 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12037,56 +12037,59 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceImm))
return false;
-
+
if (!SourceVec.isVector())
return false;
const auto *RetVT = E->getType()->castAs<VectorType>();
- if (!RetVT) return false;
+ if (!RetVT)
+ return false;
unsigned RetLen = RetVT->getNumElements();
unsigned SrcLen = SourceVec.getVectorLength();
- if (SrcLen != RetLen * 2)
+ if (SrcLen != RetLen * 2)
return false;
unsigned Idx = SourceImm.getInt().getZExtValue() & 1;
-
+
SmallVector<APValue, 32> ResultElements;
ResultElements.reserve(RetLen);
for (unsigned I = 0; I < RetLen; I++)
ResultElements.push_back(SourceVec.getVectorElt(Idx * RetLen + I));
-
+
return Success(APValue(ResultElements.data(), RetLen), E);
}
- case X86::BI__builtin_ia32_extracti32x4_256_mask:
+ case X86::BI__builtin_ia32_extracti32x4_256_mask:
case X86::BI__builtin_ia32_extractf32x4_256_mask:
- case X86::BI__builtin_ia32_extracti32x4_mask:
- case X86::BI__builtin_ia32_extractf32x4_mask:
- case X86::BI__builtin_ia32_extracti32x8_mask:
- case X86::BI__builtin_ia32_extractf32x8_mask:
- case X86::BI__builtin_ia32_extracti64x2_256_mask:
- case X86::BI__builtin_ia32_extractf64x2_256_mask:
- case X86::BI__builtin_ia32_extracti64x2_512_mask:
+ case X86::BI__builtin_ia32_extracti32x4_mask:
+ case X86::BI__builtin_ia32_extractf32x4_mask:
+ case X86::BI__builtin_ia32_extracti32x8_mask:
+ case X86::BI__builtin_ia32_extractf32x8_mask:
+ case X86::BI__builtin_ia32_extracti64x2_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_256_mask:
+ case X86::BI__builtin_ia32_extracti64x2_512_mask:
case X86::BI__builtin_ia32_extractf64x2_512_mask:
case X86::BI__builtin_ia32_extracti64x4_mask:
- case X86::BI__builtin_ia32_extractf64x4_mask:{
+ case X86::BI__builtin_ia32_extractf64x4_mask: {
APValue SourceVec, MergeVec;
APSInt Imm, MaskImm;
- if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
- !EvaluateInteger(E->getArg(1), Imm, Info) ||
- !EvaluateAsRValue(Info, E->getArg(2), MergeVec) ||
- !EvaluateInteger(E->getArg(3), MaskImm, Info))
- return false;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceVec) ||
+ !EvaluateInteger(E->getArg(1), Imm, Info) ||
+ !EvaluateAsRValue(Info, E->getArg(2), MergeVec) ||
+ !EvaluateInteger(E->getArg(3), MaskImm, Info))
+ return false;
const auto *RetVT = E->getType()->castAs<VectorType>();
unsigned RetLen = RetVT->getNumElements();
- if (!SourceVec.isVector() || !MergeVec.isVector()) return false;
+ if (!SourceVec.isVector() || !MergeVec.isVector())
+ return false;
unsigned SrcLen = SourceVec.getVectorLength();
- if (!SrcLen || !RetLen || (SrcLen % RetLen) != 0) return false;
+ if (!SrcLen || !RetLen || (SrcLen % RetLen) != 0)
+ return false;
unsigned Lanes = SrcLen / RetLen;
unsigned Lane = static_cast<unsigned>(Imm.getZExtValue() % Lanes);
@@ -12099,11 +12102,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if ((Mask >> I) & 1)
ResultElements.push_back(SourceVec.getVectorElt(Base + I));
else
- ResultElements.push_back(MergeVec.getVectorElt(I));
+ ResultElements.push_back(MergeVec.getVectorElt(I));
}
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
-
case X86::BI__builtin_ia32_vpshldd128:
case X86::BI__builtin_ia32_vpshldd256:
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index 0ff776b36..b2fb02ab1 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -1212,10 +1212,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v8di)_mm512_setzero_si512());
}
-#define _mm512_extractf32x8_ps(A, imm) \
- ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)-1))
+#define _mm512_extractf32x8_ps(A, imm) \
+ ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8) - 1))
#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
@@ -1227,11 +1227,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v8sf)_mm256_setzero_ps(), \
(__mmask8)(U)))
-#define _mm512_extractf64x2_pd(A, imm) \
- ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
- (int)(imm), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1))
+#define _mm512_extractf64x2_pd(A, imm) \
+ ((__m128d)__builtin_ia32_extractf64x2_512_mask( \
+ (__v8df)(__m512d)(A), (int)(imm), (__v2df)_mm_setzero_pd(), \
+ (__mmask8) - 1))
#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
@@ -1245,10 +1244,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v2df)_mm_setzero_pd(), \
(__mmask8)(U)))
-#define _mm512_extracti32x8_epi32(A, imm) \
- ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)-1))
+#define _mm512_extracti32x8_epi32(A, imm) \
+ ((__m256i)__builtin_ia32_extracti32x8_mask( \
+ (__v16si)(__m512i)(A), (int)(imm), (__v8si)_mm256_setzero_si256(), \
+ (__mmask8) - 1))
#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
@@ -1260,11 +1259,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
(__v8si)_mm256_setzero_si256(), \
(__mmask8)(U)))
-#define _mm512_extracti64x2_epi64(A, imm) \
- ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
- (int)(imm), \
- (__v2di)_mm_setzero_si128(), \
- (__mmask8)-1))
+#define _mm512_extracti64x2_epi64(A, imm) \
+ ((__m128i)__builtin_ia32_extracti64x2_512_mask( \
+ (__v8di)(__m512i)(A), (int)(imm), (__v2di)_mm_setzero_si128(), \
+ (__mmask8) - 1))
#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 2768a5bae..13370aaef 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -3164,10 +3164,10 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
(__v16si)_mm512_setzero_si512()))
/* Vector Extract */
-#define _mm512_extractf64x4_pd(A, I) \
- ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)-1))
+#define _mm512_extractf64x4_pd(A, I) \
+ ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
+ (__v4df)_mm256_setzero_pd(), \
+ (__mmask8) - 1))
#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
@@ -3179,10 +3179,10 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
(__v4df)_mm256_setzero_pd(), \
(__mmask8)(U)))
-#define _mm512_extractf32x4_ps(A, I) \
- ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1))
+#define _mm512_extractf32x4_ps(A, I) \
+ ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8) - 1))
#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
@@ -7105,10 +7105,10 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
__builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
}
-#define _mm512_extracti32x4_epi32(A, imm) \
- ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v4si)_mm_setzero_si128(), \
- (__mmask8)-1))
+#define _mm512_extracti32x4_epi32(A, imm) \
+ ((__m128i)__builtin_ia32_extracti32x4_mask( \
+ (__v16si)(__m512i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \
+ (__mmask8) - 1))
#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
@@ -7120,10 +7120,10 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
(__v4si)_mm_setzero_si128(), \
(__mmask8)(U)))
-#define _mm512_extracti64x4_epi64(A, imm) \
+#define _mm512_extracti64x4_epi64(A, imm) \
((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
- (__v4di)_mm256_setzero_si256(), \
- (__mmask8)-1))
+ (__v4di)_mm256_setzero_si256(), \
+ (__mmask8) - 1))
#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h
index 2d3c4b551..8aded1c47 100644
--- a/clang/lib/Headers/avx512vldqintrin.h
+++ b/clang/lib/Headers/avx512vldqintrin.h
@@ -1072,11 +1072,10 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
(__v4di)_mm256_setzero_si256());
}
-#define _mm256_extractf64x2_pd(A, imm) \
- ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
- (int)(imm), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1))
+#define _mm256_extractf64x2_pd(A, imm) \
+ ((__m128d)__builtin_ia32_extractf64x2_256_mask( \
+ (__v4df)(__m256d)(A), (int)(imm), (__v2df)_mm_setzero_pd(), \
+ (__mmask8) - 1))
#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
@@ -1090,11 +1089,10 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
(__v2df)_mm_setzero_pd(), \
(__mmask8)(U)))
-#define _mm256_extracti64x2_epi64(A, imm) \
- ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
- (int)(imm), \
- (__v2di)_mm_setzero_si128(), \
- (__mmask8)-1))
+#define _mm256_extracti64x2_epi64(A, imm) \
+ ((__m128i)__builtin_ia32_extracti64x2_256_mask( \
+ (__v4di)(__m256i)(A), (int)(imm), (__v2di)_mm_setzero_si128(), \
+ (__mmask8) - 1))
#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 252fb1119..eefeb1dad 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -7606,11 +7606,10 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
__builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
}
-#define _mm256_extractf32x4_ps(A, imm) \
- ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
- (int)(imm), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1))
+#define _mm256_extractf32x4_ps(A, imm) \
+ ((__m128)__builtin_ia32_extractf32x4_256_mask( \
+ (__v8sf)(__m256)(A), (int)(imm), (__v4sf)_mm_setzero_ps(), \
+ (__mmask8) - 1))
#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
@@ -7624,11 +7623,10 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U)))
-#define _mm256_extracti32x4_epi32(A, imm) \
- ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
- (int)(imm), \
- (__v4si)_mm_setzero_si128(), \
- (__mmask8)-1))
+#define _mm256_extracti32x4_epi32(A, imm) \
+ ((__m128i)__builtin_ia32_extracti32x4_256_mask( \
+ (__v8si)(__m256i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \
+ (__mmask8) - 1))
#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
``````````
</details>
https://github.com/llvm/llvm-project/pull/158853
More information about the llvm-commits
mailing list