[clang] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow SSE41 phminposuw intrinsic to be used in constexp (PR #163041)
Shawn K via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 14 12:34:27 PDT 2025
https://github.com/kimsh02 updated https://github.com/llvm/llvm-project/pull/163041
>From 13c9d31683a39b60f02c56cd512e86fcf39d7389 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 14 Oct 2025 11:16:21 -0700
Subject: [PATCH 1/3] Squash
---
clang/include/clang/Basic/BuiltinsX86.td | 5 +--
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 42 ++++++++++++++++++++++++
clang/lib/AST/ExprConstant.cpp | 34 +++++++++++++++++++
clang/lib/Headers/smmintrin.h | 3 +-
clang/test/CodeGen/X86/sse41-builtins.c | 10 ++++++
5 files changed, 91 insertions(+), 3 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 006a45347ff1a..5aec4138487b7 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -334,8 +334,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, "
"_Vector<2,double>, _Constant char)">;
- def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
- def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
+ def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, "
+ "_Vector<16, char>, _Constant char)">;
}
let Features = "sse4.1",
@@ -358,6 +358,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVector
def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
+ def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
def vec_set_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, char, _Constant int)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 2d3cb6a68d7e2..c61c828be1a50 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2975,6 +2975,45 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_phminposuw(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ assert(Call->getNumArgs() == 1);
+
+ const Pointer &Source = S.Stk.pop<Pointer>();
+ const Pointer &Dest = S.Stk.peek<Pointer>();
+
+ unsigned SourceLen = Source.getNumElems();
+ QualType ElemQT = getElemType(Source);
+ OptPrimType ElemT = S.getContext().classify(ElemQT);
+ unsigned LaneBitWidth = S.getASTContext().getTypeSize(ElemQT);
+
+ bool DestUnsigned = Call->getCallReturnType(S.getASTContext())
+ ->castAs<VectorType>()
+ ->getElementType()
+ ->isUnsignedIntegerOrEnumerationType();
+
+ INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
+ APSInt MinIndex(LaneBitWidth, DestUnsigned);
+ APSInt MinVal = Source.elem<T>(0).toAPSInt();
+
+ for (unsigned I = 1; I != SourceLen; ++I) {
+ APSInt Val = Source.elem<T>(I).toAPSInt();
+ if (MinVal.ugt(Val)) {
+ MinVal = Val;
+ MinIndex = I;
+ }
+ }
+
+ Dest.elem<T>(0) = static_cast<T>(MinVal);
+ Dest.elem<T>(1) = static_cast<T>(MinIndex);
+ for (unsigned I = 2; I != SourceLen; ++I) {
+ Dest.elem<T>(I) = static_cast<T>(APSInt(LaneBitWidth, DestUnsigned));
+ }
+ });
+ Dest.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
const CallExpr *Call, bool MaskZ) {
assert(Call->getNumArgs() == 5);
@@ -4013,6 +4052,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call,
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
+ case X86::BI__builtin_ia32_phminposuw128:
+ return interp__builtin_ia32_phminposuw(S, OpPC, Call);
+
case X86::BI__builtin_ia32_pternlogd128_mask:
case X86::BI__builtin_ia32_pternlogd256_mask:
case X86::BI__builtin_ia32_pternlogd512_mask:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 51c038274fd36..95c4a9422385d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12268,6 +12268,40 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(R, E);
}
+ case X86::BI__builtin_ia32_phminposuw128: {
+ APValue Source;
+ if (!Evaluate(Source, Info, E->getArg(0)))
+ return false;
+ unsigned SourceLen = Source.getVectorLength();
+ const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
+ QualType ElemQT = VT->getElementType();
+ unsigned LaneBitWidth = Info.Ctx.getTypeSize(ElemQT);
+
+ APInt MinIndex(LaneBitWidth, 0);
+ APInt MinVal = Source.getVectorElt(0).getInt();
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt Val = Source.getVectorElt(I).getInt();
+ if (MinVal.ugt(Val)) {
+ MinVal = Val;
+ MinIndex = I;
+ }
+ }
+
+ bool ResultUnsigned = E->getCallReturnType(Info.Ctx)
+ ->castAs<VectorType>()
+ ->getElementType()
+ ->isUnsignedIntegerOrEnumerationType();
+
+ SmallVector<APValue, 8> Result;
+ Result.reserve(SourceLen);
+ Result.emplace_back(APSInt(MinVal, ResultUnsigned));
+ Result.emplace_back(APSInt(MinIndex, ResultUnsigned));
+ for (unsigned I = 0; I != SourceLen - 2; ++I) {
+ Result.emplace_back(APSInt(APInt(LaneBitWidth, 0), ResultUnsigned));
+ }
+ return Success(APValue(Result.data(), Result.size()), E);
+ }
+
case X86::BI__builtin_ia32_pternlogd128_mask:
case X86::BI__builtin_ia32_pternlogd256_mask:
case X86::BI__builtin_ia32_pternlogd512_mask:
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 4f197d5ecaff9..511a135375295 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -1524,7 +1524,8 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2) {
/// \returns A 128-bit value where bits [15:0] contain the minimum value found
/// in parameter \a __V, bits [18:16] contain the index of the minimum value
/// and the remaining bits are set to 0.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_minpos_epu16(__m128i __V) {
return (__m128i)__builtin_ia32_phminposuw128((__v8hi)__V);
}
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 89a7ac29e7db7..62cd392824bb2 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -376,6 +376,16 @@ __m128i test_mm_minpos_epu16(__m128i x) {
// CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %{{.*}})
return _mm_minpos_epu16(x);
}
+TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){0,0,0,0, 0,0,0,0}), 0,0,0,0, 0,0,0,0));
+TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1,0,0,0, 0,0,0,0}), 0,1,0,0, 0,0,0,0));
+TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){65535,65535,65535,65535,65535,65535,65535,65535}), 65535,0,0,0, 0,0,0,0));
+TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){9,8,7,6,5,4,3,2}), 2,7,0,0, 0,0,0,0));
+TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){5,5,5,5,5,5,5,5}), 5,0,0,0, 0,0,0,0));
+TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){5,7,9,4,10,4,11,12}), 4,3,0,0, 0,0,0,0));
+TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){6,0,0,0,0,0,0,0}), 0,1,0,0, 0,0,0,0));
+TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1000,2000,3000,4000,5000,6000,7000,1}), 1,7,0,0, 0,0,0,0));
+TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1234,5678,42,9999,65535,0,4242,42}), 0,5,0,0, 0,0,0,0));
+TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){400,500,12,600,12,700,800,900}), 12,2,0,0, 0,0,0,0));
__m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_mpsadbw_epu8
>From 92e40071cdba5d30434944a9e56567734fd8c60c Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 14 Oct 2025 11:22:51 -0700
Subject: [PATCH 2/3] Apply feedback
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 6 +++---
clang/lib/AST/ExprConstant.cpp | 6 +++---
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index c61c828be1a50..b4a69a8f3e8bc 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2985,7 +2985,7 @@ static bool interp__builtin_ia32_phminposuw(InterpState &S, CodePtr OpPC,
unsigned SourceLen = Source.getNumElems();
QualType ElemQT = getElemType(Source);
OptPrimType ElemT = S.getContext().classify(ElemQT);
- unsigned LaneBitWidth = S.getASTContext().getTypeSize(ElemQT);
+ unsigned ElemBitWidth = S.getASTContext().getTypeSize(ElemQT);
bool DestUnsigned = Call->getCallReturnType(S.getASTContext())
->castAs<VectorType>()
@@ -2993,7 +2993,7 @@ static bool interp__builtin_ia32_phminposuw(InterpState &S, CodePtr OpPC,
->isUnsignedIntegerOrEnumerationType();
INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
- APSInt MinIndex(LaneBitWidth, DestUnsigned);
+ APSInt MinIndex(ElemBitWidth, DestUnsigned);
APSInt MinVal = Source.elem<T>(0).toAPSInt();
for (unsigned I = 1; I != SourceLen; ++I) {
@@ -3007,7 +3007,7 @@ static bool interp__builtin_ia32_phminposuw(InterpState &S, CodePtr OpPC,
Dest.elem<T>(0) = static_cast<T>(MinVal);
Dest.elem<T>(1) = static_cast<T>(MinIndex);
for (unsigned I = 2; I != SourceLen; ++I) {
- Dest.elem<T>(I) = static_cast<T>(APSInt(LaneBitWidth, DestUnsigned));
+ Dest.elem<T>(I) = static_cast<T>(APSInt(ElemBitWidth, DestUnsigned));
}
});
Dest.initializeAllElements();
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 95c4a9422385d..45a9546585aec 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12275,9 +12275,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
unsigned SourceLen = Source.getVectorLength();
const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
QualType ElemQT = VT->getElementType();
- unsigned LaneBitWidth = Info.Ctx.getTypeSize(ElemQT);
+ unsigned ElemBitWidth = Info.Ctx.getTypeSize(ElemQT);
- APInt MinIndex(LaneBitWidth, 0);
+ APInt MinIndex(ElemBitWidth, 0);
APInt MinVal = Source.getVectorElt(0).getInt();
for (unsigned I = 0; I != SourceLen; ++I) {
APInt Val = Source.getVectorElt(I).getInt();
@@ -12297,7 +12297,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
Result.emplace_back(APSInt(MinVal, ResultUnsigned));
Result.emplace_back(APSInt(MinIndex, ResultUnsigned));
for (unsigned I = 0; I != SourceLen - 2; ++I) {
- Result.emplace_back(APSInt(APInt(LaneBitWidth, 0), ResultUnsigned));
+ Result.emplace_back(APSInt(APInt(ElemBitWidth, 0), ResultUnsigned));
}
return Success(APValue(Result.data(), Result.size()), E);
}
>From 5eace3eb7462ec759ba658947c02eddea11d64db Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 14 Oct 2025 12:34:16 -0700
Subject: [PATCH 3/3] Apply feedback
---
clang/lib/AST/ExprConstant.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 45a9546585aec..d54689715c369 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12279,7 +12279,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APInt MinIndex(ElemBitWidth, 0);
APInt MinVal = Source.getVectorElt(0).getInt();
- for (unsigned I = 0; I != SourceLen; ++I) {
+ for (unsigned I = 1; I != SourceLen; ++I) {
APInt Val = Source.getVectorElt(I).getInt();
if (MinVal.ugt(Val)) {
MinVal = Val;
More information about the cfe-commits
mailing list