[clang] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow insertps intrinsic to be used in constexp (PR #165513)
Ahmed Nour via cfe-commits
cfe-commits at lists.llvm.org
Thu Oct 30 11:42:20 PDT 2025
https://github.com/ahmednoursphinx updated https://github.com/llvm/llvm-project/pull/165513
>From eca52c0e1d9e79911f79d7339926e295fcbcf84f Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Tue, 28 Oct 2025 23:49:56 +0300
Subject: [PATCH 1/5] feat: VectorExprEvaluator::VisitCallExpr /
InterpretBuiltin - Allow insertps intrinsic to be used in constexpr
---
clang/include/clang/Basic/BuiltinsX86.td | 5 ++-
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 40 +++++++++++++++++++++---
clang/lib/AST/ExprConstant.cpp | 39 ++++++++++++++++++++---
clang/test/CodeGen/X86/sse41-builtins.c | 10 ++++++
4 files changed, 85 insertions(+), 9 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 0c85e280e748b..a431fc36b41c1 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -327,8 +327,11 @@ let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorW
}
}
-let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
+}
+
+let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 8f23001ea5a39..b1f0832860476 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3358,7 +3358,8 @@ static bool interp__builtin_x86_byteshift(
static bool interp__builtin_ia32_shuffle_generic(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)>
- GetSourceIndex) {
+ GetSourceIndex,
+ llvm::function_ref<bool(unsigned, unsigned)> ShouldZero = nullptr) {
assert(Call->getNumArgs() == 3);
unsigned ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();
@@ -3373,9 +3374,20 @@ static bool interp__builtin_ia32_shuffle_generic(
const Pointer &Dst = S.Stk.peek<Pointer>();
for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {
- auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
- const Pointer &Src = (SrcVecIdx == 0) ? A : B;
- TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
+ if (ShouldZero && ShouldZero(DstIdx, ShuffleMask)) {
+ // Zero out this element
+ if (ElemT == PT_Float) {
+ Dst.elem<Floating>(DstIdx) = Floating(S.getASTContext().getFloatTypeSemantics(VecT->getElementType()));
+ } else {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ Dst.elem<T>(DstIdx) = T::from(0);
+ });
+ }
+ } else {
+ auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
+ const Pointer &Src = (SrcVecIdx == 0) ? A : B;
+ TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
+ }
}
Dst.initializeAllElements();
@@ -4348,6 +4360,26 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index};
});
+ case X86::BI__builtin_ia32_insertps128:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call,
+ [](unsigned DstIdx, unsigned Mask) {
+ // Bits [7:6]: select element from source vector Y (0-3)
+ // Bits [5:4]: select destination position (0-3)
+ unsigned SrcElem = (Mask >> 6) & 0x3;
+ unsigned DstElem = (Mask >> 4) & 0x3;
+ if (DstIdx == DstElem) {
+ // Insert element from source vector (B) at this position
+ return std::pair<unsigned, unsigned>{1, SrcElem};
+ } else {
+ // Copy from destination vector (A)
+ return std::pair<unsigned, unsigned>{0, DstIdx};
+ }
+ },
+ [](unsigned DstIdx, unsigned Mask) {
+ // Bits [3:0]: zero mask
+ return (Mask & (1 << DstIdx)) != 0;
+ });
case X86::BI__builtin_ia32_pshufb128:
case X86::BI__builtin_ia32_pshufb256:
case X86::BI__builtin_ia32_pshufb512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 29ee089505125..17c966b8c9f4c 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11622,7 +11622,8 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
static bool evalShuffleGeneric(
EvalInfo &Info, const CallExpr *Call, APValue &Out,
llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)>
- GetSourceIndex) {
+ GetSourceIndex,
+ llvm::function_ref<bool(unsigned, unsigned)> ShouldZero = nullptr) {
const auto *VT = Call->getType()->getAs<VectorType>();
if (!VT)
@@ -11643,9 +11644,15 @@ static bool evalShuffleGeneric(
ResultElements.reserve(NumElts);
for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) {
- auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
- const APValue &Src = (SrcVecIdx == 0) ? A : B;
- ResultElements.push_back(Src.getVectorElt(SrcIdx));
+ if (ShouldZero && ShouldZero(DstIdx, ShuffleMask)) {
+ // Zero out this element
+ QualType ElemTy = VT->getElementType();
+ ResultElements.push_back(APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy))));
+ } else {
+ auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
+ const APValue &Src = (SrcVecIdx == 0) ? A : B;
+ ResultElements.push_back(Src.getVectorElt(SrcIdx));
+ }
}
Out = APValue(ResultElements.data(), ResultElements.size());
@@ -12481,6 +12488,30 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
return Success(R, E);
}
+ case X86::BI__builtin_ia32_insertps128: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R,
+ [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, unsigned> {
+ // Bits [7:6]: select element from source vector Y (0-3)
+ // Bits [5:4]: select destination position (0-3)
+ unsigned SrcElem = (Mask >> 6) & 0x3;
+ unsigned DstElem = (Mask >> 4) & 0x3;
+ if (DstIdx == DstElem) {
+ // Insert element from source vector (B) at this position
+ return {1, SrcElem};
+ } else {
+ // Copy from destination vector (A)
+ return {0, DstIdx};
+ }
+ },
+ [](unsigned DstIdx, unsigned Mask) -> bool {
+ // Bits [3:0]: zero mask
+ return (Mask & (1 << DstIdx)) != 0;
+ }))
+ return false;
+ return Success(R, E);
+ }
case X86::BI__builtin_ia32_pshufb128:
case X86::BI__builtin_ia32_pshufb256:
case X86::BI__builtin_ia32_pshufb512: {
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 62cd392824bb2..35fa65a99836b 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -307,6 +307,16 @@ __m128 test_mm_insert_ps(__m128 x, __m128 y) {
return _mm_insert_ps(x, y, 4);
}
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x10), 1.0f, 10.0f, 3.0f, 4.0f))); // Insert Y[0] into X[1]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x00), 10.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x20), 1.0f, 2.0f, 10.0f, 4.0f))); // Insert Y[0] into X[2]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x30), 1.0f, 2.0f, 3.0f, 10.0f))); // Insert Y[0] into X[3]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x80), 30.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[2] into X[0]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x01), 0.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0], zero X[0]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0A), 10.0f, 0.0f, 3.0f, 0.0f))); // Insert Y[0] into X[0], zero X[1] and X[3]
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0F), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[0] into X[0], zero all
+TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0xCF), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[3] into X[0], zero all
+
__m128i test_mm_max_epi8(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_max_epi8
// CHECK: call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
>From 578e7f7152c4ed04c9008d02f17e10b63930840b Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Wed, 29 Oct 2025 10:36:13 +0300
Subject: [PATCH 2/5] chore: apply formatting
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 7 +++----
clang/lib/AST/ExprConstant.cpp | 6 ++++--
2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index c4d92804d9c4a..117b10b1c6c0b 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3431,11 +3431,10 @@ static bool interp__builtin_ia32_shuffle_generic(
if (ShouldZero && ShouldZero(DstIdx, ShuffleMask)) {
// Zero out this element
if (ElemT == PT_Float) {
- Dst.elem<Floating>(DstIdx) = Floating(S.getASTContext().getFloatTypeSemantics(VecT->getElementType()));
+ Dst.elem<Floating>(DstIdx) = Floating(
+ S.getASTContext().getFloatTypeSemantics(VecT->getElementType()));
} else {
- INT_TYPE_SWITCH_NO_BOOL(ElemT, {
- Dst.elem<T>(DstIdx) = T::from(0);
- });
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(0); });
}
} else {
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index d27e923949c1f..d406ac52f1121 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11647,7 +11647,8 @@ static bool evalShuffleGeneric(
if (ShouldZero && ShouldZero(DstIdx, ShuffleMask)) {
// Zero out this element
QualType ElemTy = VT->getElementType();
- ResultElements.push_back(APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy))));
+ ResultElements.push_back(
+ APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy))));
} else {
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
const APValue &Src = (SrcVecIdx == 0) ? A : B;
@@ -12492,7 +12493,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APValue R;
if (!evalShuffleGeneric(
Info, E, R,
- [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, unsigned> {
+ [](unsigned DstIdx,
+ unsigned Mask) -> std::pair<unsigned, unsigned> {
// Bits [7:6]: select element from source vector Y (0-3)
// Bits [5:4]: select destination position (0-3)
unsigned SrcElem = (Mask >> 6) & 0x3;
>From 11e06ea5c30034288dd2ec84f6a178523b5c5202 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Thu, 30 Oct 2025 16:24:23 +0300
Subject: [PATCH 3/5] chore: PR Feedback
---
clang/include/clang/Basic/BuiltinsX86.td | 5 +----
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 26 ++++++++++++------------
2 files changed, 14 insertions(+), 17 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index b6cb475e25ab1..d9e9c91b8141b 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -327,10 +327,6 @@ let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorW
}
}
-let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
- def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
-}
-
let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
@@ -345,6 +341,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
let Features = "sse4.1",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
def ptestz128
: X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
def ptestc128
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 117b10b1c6c0b..a0f0a1c11607d 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3411,9 +3411,8 @@ static bool interp__builtin_x86_byteshift(
static bool interp__builtin_ia32_shuffle_generic(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
- llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)>
- GetSourceIndex,
- llvm::function_ref<bool(unsigned, unsigned)> ShouldZero = nullptr) {
+ llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>
+ GetSourceIndex) {
assert(Call->getNumArgs() == 3);
unsigned ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();
@@ -3428,7 +3427,9 @@ static bool interp__builtin_ia32_shuffle_generic(
const Pointer &Dst = S.Stk.peek<Pointer>();
for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {
- if (ShouldZero && ShouldZero(DstIdx, ShuffleMask)) {
+ auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
+
+ if (SrcIdx < 0) {
// Zero out this element
if (ElemT == PT_Float) {
Dst.elem<Floating>(DstIdx) = Floating(
@@ -3437,7 +3438,6 @@ static bool interp__builtin_ia32_shuffle_generic(
INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(0); });
}
} else {
- auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
const Pointer &Src = (SrcVecIdx == 0) ? A : B;
TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
}
@@ -4393,7 +4393,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
- return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index};
+ return std::pair<unsigned, int>{SrcIdx, static_cast<int>(LaneOffset + Index)};
});
case X86::BI__builtin_ia32_shufpd:
case X86::BI__builtin_ia32_shufpd256:
@@ -4411,27 +4411,27 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
- return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index};
+ return std::pair<unsigned, int>{SrcIdx, static_cast<int>(LaneOffset + Index)};
});
case X86::BI__builtin_ia32_insertps128:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call,
[](unsigned DstIdx, unsigned Mask) {
+ // Bits [3:0]: zero mask - if bit is set, zero this element
+ if ((Mask & (1 << DstIdx)) != 0) {
+ return std::pair<unsigned, int>{0, -1};
+ }
// Bits [7:6]: select element from source vector Y (0-3)
// Bits [5:4]: select destination position (0-3)
unsigned SrcElem = (Mask >> 6) & 0x3;
unsigned DstElem = (Mask >> 4) & 0x3;
if (DstIdx == DstElem) {
// Insert element from source vector (B) at this position
- return std::pair<unsigned, unsigned>{1, SrcElem};
+ return std::pair<unsigned, int>{1, static_cast<int>(SrcElem)};
} else {
// Copy from destination vector (A)
- return std::pair<unsigned, unsigned>{0, DstIdx};
+ return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)};
}
- },
- [](unsigned DstIdx, unsigned Mask) {
- // Bits [3:0]: zero mask
- return (Mask & (1 << DstIdx)) != 0;
});
case X86::BI__builtin_ia32_pshufb128:
case X86::BI__builtin_ia32_pshufb256:
>From b1b98bd0ab4f54ab533b6db9f1d90a69977776eb Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Thu, 30 Oct 2025 16:30:40 +0300
Subject: [PATCH 4/5] chore: Format files
---
clang/include/clang/Basic/BuiltinsX86.td | 3 ++-
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 11 ++++++-----
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index d9e9c91b8141b..9e877b92eac68 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -341,7 +341,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
let Features = "sse4.1",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
- def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
+ def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, "
+ "_Vector<4, float>, _Constant char)">;
def ptestz128
: X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
def ptestc128
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a0f0a1c11607d..31b48172401f9 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3428,7 +3428,7 @@ static bool interp__builtin_ia32_shuffle_generic(
for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
-
+
if (SrcIdx < 0) {
// Zero out this element
if (ElemT == PT_Float) {
@@ -4393,7 +4393,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
- return std::pair<unsigned, int>{SrcIdx, static_cast<int>(LaneOffset + Index)};
+ return std::pair<unsigned, int>{SrcIdx,
+ static_cast<int>(LaneOffset + Index)};
});
case X86::BI__builtin_ia32_shufpd:
case X86::BI__builtin_ia32_shufpd256:
@@ -4411,12 +4412,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
- return std::pair<unsigned, int>{SrcIdx, static_cast<int>(LaneOffset + Index)};
+ return std::pair<unsigned, int>{SrcIdx,
+ static_cast<int>(LaneOffset + Index)};
});
case X86::BI__builtin_ia32_insertps128:
return interp__builtin_ia32_shuffle_generic(
- S, OpPC, Call,
- [](unsigned DstIdx, unsigned Mask) {
+ S, OpPC, Call, [](unsigned DstIdx, unsigned Mask) {
// Bits [3:0]: zero mask - if bit is set, zero this element
if ((Mask & (1 << DstIdx)) != 0) {
return std::pair<unsigned, int>{0, -1};
>From d0d22764405032d89c142da31e2feee300b8af05 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Thu, 30 Oct 2025 19:11:27 +0300
Subject: [PATCH 5/5] chore: PR Feedback
---
clang/lib/AST/ExprConstant.cpp | 33 ++++++++++++++++-----------------
1 file changed, 16 insertions(+), 17 deletions(-)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index d406ac52f1121..97eeba8b9d6cc 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11621,9 +11621,8 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
static bool evalShuffleGeneric(
EvalInfo &Info, const CallExpr *Call, APValue &Out,
- llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)>
- GetSourceIndex,
- llvm::function_ref<bool(unsigned, unsigned)> ShouldZero = nullptr) {
+ llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>
+ GetSourceIndex) {
const auto *VT = Call->getType()->getAs<VectorType>();
if (!VT)
@@ -11644,13 +11643,14 @@ static bool evalShuffleGeneric(
ResultElements.reserve(NumElts);
for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) {
- if (ShouldZero && ShouldZero(DstIdx, ShuffleMask)) {
+ auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
+
+ if (SrcIdx < 0) {
// Zero out this element
QualType ElemTy = VT->getElementType();
ResultElements.push_back(
APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy))));
} else {
- auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
const APValue &Src = (SrcVecIdx == 0) ? A : B;
ResultElements.push_back(Src.getVectorElt(SrcIdx));
}
@@ -12446,7 +12446,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!evalShuffleGeneric(
Info, E, R,
[](unsigned DstIdx,
- unsigned ShuffleMask) -> std::pair<unsigned, unsigned> {
+ unsigned ShuffleMask) -> std::pair<unsigned, int> {
constexpr unsigned LaneBits = 128u;
unsigned NumElemPerLane = LaneBits / 32;
unsigned NumSelectableElems = NumElemPerLane / 2;
@@ -12459,7 +12459,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1;
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
- return {SrcIdx, LaneOffset + Index};
+ return {SrcIdx, static_cast<int>(LaneOffset + Index)};
}))
return false;
return Success(R, E);
@@ -12471,7 +12471,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!evalShuffleGeneric(
Info, E, R,
[](unsigned DstIdx,
- unsigned ShuffleMask) -> std::pair<unsigned, unsigned> {
+ unsigned ShuffleMask) -> std::pair<unsigned, int> {
constexpr unsigned LaneBits = 128u;
unsigned NumElemPerLane = LaneBits / 64;
unsigned NumSelectableElems = NumElemPerLane / 2;
@@ -12484,7 +12484,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1;
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
- return {SrcIdx, LaneOffset + Index};
+ return {SrcIdx, static_cast<int>(LaneOffset + Index)};
}))
return false;
return Success(R, E);
@@ -12493,23 +12493,22 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APValue R;
if (!evalShuffleGeneric(
Info, E, R,
- [](unsigned DstIdx,
- unsigned Mask) -> std::pair<unsigned, unsigned> {
+ [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+ // Bits [3:0]: zero mask - if bit is set, zero this element
+ if ((Mask & (1 << DstIdx)) != 0) {
+ return {0, -1};
+ }
// Bits [7:6]: select element from source vector Y (0-3)
// Bits [5:4]: select destination position (0-3)
unsigned SrcElem = (Mask >> 6) & 0x3;
unsigned DstElem = (Mask >> 4) & 0x3;
if (DstIdx == DstElem) {
// Insert element from source vector (B) at this position
- return {1, SrcElem};
+ return {1, static_cast<int>(SrcElem)};
} else {
// Copy from destination vector (A)
- return {0, DstIdx};
+ return {0, static_cast<int>(DstIdx)};
}
- },
- [](unsigned DstIdx, unsigned Mask) -> bool {
- // Bits [3:0]: zero mask
- return (Mask & (1 << DstIdx)) != 0;
}))
return false;
return Success(R, E);
More information about the cfe-commits
mailing list