[clang] [Headers][X86] Allow AVX512 masked arithmetic ss/sd intrinsics to be used in constexpr (PR #162816)
Simon Pilgrim via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 10 06:20:42 PST 2025
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/162816
>From a14bb33c6b9bc57b1a3019e22e34ab88f68bc4da Mon Sep 17 00:00:00 2001
From: liuzhenya <zyliu at siorigin.com>
Date: Sun, 9 Nov 2025 22:22:50 -1000
Subject: [PATCH 1/2] [Headers][X86] Allow AVX512 masked arithmetic ss/sd
intrinsics to be used in constexpr
---
clang/include/clang/Basic/BuiltinsX86.td | 2 +-
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 24 +++++++
clang/lib/AST/ExprConstant.cpp | 22 +++++++
clang/lib/Headers/avx512fintrin.h | 64 +++++++++----------
clang/test/CodeGen/X86/avx512f-builtins.c | 32 ++++++++++
clang/test/CodeGen/X86/builtin_test_helpers.h | 10 +++
6 files changed, 121 insertions(+), 33 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index edff241a98738..ee1060dff5497 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -4128,7 +4128,7 @@ let Features = "avx512bf16", Attributes = [NoThrow, Const, RequiredVectorWidth<1
def selectsbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectss_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">;
def selectsd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0ef130c0a55df..0f0e3e87b1e8a 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2419,6 +2419,27 @@ static bool interp__builtin_elementwise_int_unaryop(
return false;
}
+static bool interp__builtin_select_scalar(InterpState &S,
+ const CallExpr *Call) {
+ unsigned N =
+ Call->getArg(1)->getType()->getAs<VectorType>()->getNumElements();
+
+ const Pointer &W = S.Stk.pop<Pointer>();
+ const Pointer &A = S.Stk.pop<Pointer>();
+ APSInt U = popToAPSInt(S, Call->getArg(0));
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ bool TakeA0 = U.getZExtValue() & 1ULL;
+
+ for (unsigned I = 0; I < N; ++I)
+ Dst.elem<Floating>(I) = W.elem<Floating>(I);
+ if (TakeA0)
+ Dst.elem<Floating>(0) = A.elem<Floating>(0);
+
+ Dst.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_elementwise_int_binop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
@@ -4205,6 +4226,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return APInt::getAllOnes(DstBits);
});
+ case clang::X86::BI__builtin_ia32_selectss_128:
+ case clang::X86::BI__builtin_ia32_selectsd_128:
+ return interp__builtin_select_scalar(S, Call);
case clang::X86::BI__builtin_ia32_vprotbi:
case clang::X86::BI__builtin_ia32_vprotdi:
case clang::X86::BI__builtin_ia32_vprotqi:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 972d9fe3b5e4f..2f11b1d23f9a3 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12264,6 +12264,24 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), SourceLen), E);
};
+ auto EvalSelectScalar = [&](unsigned Len) -> bool {
+ APSInt Mask;
+ APValue AVal, WVal;
+ if (!EvaluateInteger(E->getArg(0), Mask, Info) ||
+ !EvaluateAsRValue(Info, E->getArg(1), AVal) ||
+ !EvaluateAsRValue(Info, E->getArg(2), WVal))
+ return false;
+
+ bool TakeA0 = (Mask.getZExtValue() & 1u) != 0;
+ SmallVector<APValue, 4> Res;
+ Res.reserve(Len);
+ Res.push_back(TakeA0 ? AVal.getVectorElt(0) : WVal.getVectorElt(0));
+ for (unsigned i = 1; i < Len; ++i)
+ Res.push_back(WVal.getVectorElt(i));
+ APValue V(Res.data(), Res.size());
+ return Success(V, E);
+ };
+
switch (E->getBuiltinCallee()) {
default:
return false;
@@ -12567,6 +12585,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return APInt((Src).trunc(DstBits));
return APInt::getAllOnes(DstBits);
});
+ case clang::X86::BI__builtin_ia32_selectss_128:
+ return EvalSelectScalar(4);
+ case clang::X86::BI__builtin_ia32_selectsd_128:
+ return EvalSelectScalar(2);
case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
case clang::X86::BI__builtin_ia32_pmuldq512:
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 997e9608e112f..5a9e045ac3add 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1834,14 +1834,14 @@ _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A) {
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_add_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_add_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
@@ -1864,14 +1864,14 @@ _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_add_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_add_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
@@ -1949,14 +1949,14 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_add_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps()))
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_sub_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_sub_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
@@ -1978,14 +1978,14 @@ _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_sub_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_sub_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
@@ -2064,14 +2064,14 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps()))
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_mul_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_mul_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
@@ -2093,14 +2093,14 @@ _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_mul_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_mul_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
@@ -2179,14 +2179,14 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps()))
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_div_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_div_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
@@ -2209,14 +2209,14 @@ _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_div_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_div_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 17778b52d3671..26296f474466a 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3302,6 +3302,8 @@ __m128 test_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_mask_add_ss(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v4sf(_mm_mask_add_ss((__m128)(__v4sf){10.0f, 100.0f, 200.0f, 300.0f}, 0x1,(__m128)(__v4sf){1.25f, 3.0f, 4.0f, 5.0f},(__m128)(__v4sf){2.75f, 6.0f, 7.0f, 8.0f}),4.0f, 100.0f, 200.0f, 300.0f));
+
__m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_add_ss
// CHECK-NOT: @llvm.x86.avx512.mask.add.ss.round
@@ -3317,6 +3319,8 @@ __m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_maskz_add_ss(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v4sf(_mm_maskz_add_ss(0x1, (__m128)(__v4sf){1.25f, 3.0f, 4.0f, 5.0f}, (__m128)(__v4sf){2.75f, 6.0f, 7.0f, 8.0f}), 4.0f, 0.0f, 0.0f, 0.0f));
+
__m128d test_mm_add_round_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_add_round_sd
// CHECK: @llvm.x86.avx512.mask.add.sd.round
@@ -3347,6 +3351,8 @@ __m128d test_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_mask_add_sd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v2df(_mm_mask_add_sd((__m128d)(__v2df){10.0, 999.0}, 0x1, (__m128d)(__v2df){5.5, 77.0}, (__m128d)(__v2df){0.25, 88.0}), 5.75, 999.0));
+
__m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_add_sd
// CHECK-NOT: @llvm.x86.avx512.mask.add.sd.round
@@ -3362,6 +3368,8 @@ __m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_maskz_add_sd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v2df(_mm_maskz_add_sd(0x1, (__m128d)(__v2df){5.5, 77.0}, (__m128d)(__v2df){0.25, 88.0}), 5.75, 0.0));
+
__m512d test_mm512_sub_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_sub_round_pd
// CHECK: @llvm.x86.avx512.sub.pd.512
@@ -3450,6 +3458,8 @@ __m128 test_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_mask_sub_ss(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v4sf(_mm_mask_sub_ss((__m128)(__v4sf){-1.0f, 10.0f, 20.0f, 30.0f}, 0x1, (__m128)(__v4sf){7.0f, 3.0f, 4.0f, 5.0f}, (__m128)(__v4sf){2.5f, 6.0f, 7.0f, 8.0f}), 4.5f, 10.0f, 20.0f, 30.0f));
+
__m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_sub_ss
// CHECK-NOT: @llvm.x86.avx512.mask.sub.ss.round
@@ -3465,6 +3475,8 @@ __m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_maskz_sub_ss(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v4sf(_mm_maskz_sub_ss(0x1, (__m128)(__v4sf){7.0f, 3.0f, 4.0f, 5.0f}, (__m128)(__v4sf){2.5f, 6.0f, 7.0f, 8.0f}), 4.5f, 0.0f, 0.0f, 0.0f));
+
__m128d test_mm_sub_round_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_sub_round_sd
// CHECK: @llvm.x86.avx512.mask.sub.sd.round
@@ -3495,6 +3507,8 @@ __m128d test_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_mask_sub_sd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v2df(_mm_mask_sub_sd((__m128d)(__v2df){-1.0, 111.0}, 0x1, (__m128d)(__v2df){9.0, 70.0}, (__m128d)(__v2df){3.5, 80.0}), 5.5, 111.0));
+
__m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_sub_sd
// CHECK-NOT: @llvm.x86.avx512.mask.sub.sd.round
@@ -3510,6 +3524,8 @@ __m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_maskz_sub_sd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v2df(_mm_maskz_sub_sd(0x1, (__m128d)(__v2df){9.0, 70.0}, (__m128d)(__v2df){3.5, 80.0}), 5.5, 0.0));
+
__m512d test_mm512_mul_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_mul_round_pd
// CHECK: @llvm.x86.avx512.mul.pd.512
@@ -3598,6 +3614,8 @@ __m128 test_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_mask_mul_ss(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v4sf(_mm_mask_mul_ss((__m128)(__v4sf){42.0f, -1.0f, -2.0f, -3.0f}, 0x1, (__m128)(__v4sf){6.0f, 9.0f, 9.0f, 9.0f}, (__m128)(__v4sf){7.0f, 8.0f, 8.0f, 8.0f}), 42.0f, -1.0f, -2.0f, -3.0f));
+
__m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_mul_ss
// CHECK-NOT: @llvm.x86.avx512.mask.mul.ss.round
@@ -3613,6 +3631,8 @@ __m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_maskz_mul_ss(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v4sf(_mm_maskz_mul_ss(0x1, (__m128)(__v4sf){6.0f, 9.0f, 9.0f, 9.0f}, (__m128)(__v4sf){7.0f, 8.0f, 8.0f, 8.0f}), 42.0f, 0.0f, 0.0f, 0.0f));
+
__m128d test_mm_mul_round_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_mul_round_sd
// CHECK: @llvm.x86.avx512.mask.mul.sd.round
@@ -3643,6 +3663,8 @@ __m128d test_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_mask_mul_sd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v2df(_mm_mask_mul_sd((__m128d)(__v2df){123.0, -9.0}, 0x1, (__m128d)(__v2df){2.5, 1.0}, (__m128d)(__v2df){4.0, 2.0}), 10.0, -9.0));
+
__m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_mul_sd
// CHECK-NOT: @llvm.x86.avx512.mask.mul.sd.round
@@ -3658,6 +3680,8 @@ __m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_maskz_mul_sd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v2df(_mm_maskz_mul_sd(0x1, (__m128d)(__v2df){2.5, 1.0}, (__m128d)(__v2df){4.0, 2.0}), 10.0, 0.0));
+
__m512d test_mm512_div_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_div_round_pd
// CHECK: @llvm.x86.avx512.div.pd.512
@@ -3757,6 +3781,8 @@ __m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_mask_div_ss(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v4sf(_mm_mask_div_ss((__m128)(__v4sf){-7.0f, 5.0f, 6.0f, 7.0f}, 0x1, (__m128)(__v4sf){9.0f, 1.0f, 1.0f, 1.0f}, (__m128)(__v4sf){3.0f, 2.0f, 2.0f, 2.0f}), 3.0f, 5.0f, 6.0f, 7.0f));
+
__m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_div_ss
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
@@ -3771,6 +3797,8 @@ __m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_maskz_div_ss(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v4sf(_mm_maskz_div_ss(0x1, (__m128)(__v4sf){9.0f, 1.0f, 1.0f, 1.0f}, (__m128)(__v4sf){3.0f, 2.0f, 2.0f, 2.0f}), 3.0f, 0.0f, 0.0f, 0.0f));
+
__m128d test_mm_div_round_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_div_round_sd
// CHECK: @llvm.x86.avx512.mask.div.sd.round
@@ -3800,6 +3828,8 @@ __m128d test_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_mask_div_sd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v2df(_mm_mask_div_sd((__m128d)(__v2df){-8.0, 44.0}, 0x1, (__m128d)(__v2df){8.0, 10.0}, (__m128d)(__v2df){2.0, 20.0}), 4.0, 44.0));
+
__m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_div_sd
// CHECK: extractelement <2 x double> %{{.*}}, i32 0
@@ -3814,6 +3844,8 @@ __m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_maskz_div_sd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v2df(_mm_maskz_div_sd(0x1, (__m128d)(__v2df){8.0, 10.0}, (__m128d)(__v2df){2.0, 20.0}), 4.0, 0.0));
+
__m128 test_mm_max_round_ss(__m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_max_round_ss
// CHECK: @llvm.x86.avx512.mask.max.ss.round
diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h
index fcaf360626a2d..a0ef6d3028aa9 100644
--- a/clang/test/CodeGen/X86/builtin_test_helpers.h
+++ b/clang/test/CodeGen/X86/builtin_test_helpers.h
@@ -80,6 +80,11 @@ constexpr bool match_v2du(__m128i _v, unsigned long long a, unsigned long long b
return v[0] == a && v[1] == b;
}
+constexpr bool match_v2df(__m128d _v, double a, double b){
+ __v2df v = (__v2df)_v;
+ return v[0] == a && v[1] == b;
+}
+
constexpr bool match_v4si(__m128i _v, int a, int b, int c, int d) {
__v4si v = (__v4si)_v;
return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
@@ -90,6 +95,11 @@ constexpr bool match_v4su(__m128i _v, unsigned a, unsigned b, unsigned c, unsign
return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
}
+constexpr bool match_v4sf(__m128 _v, float a, float b, float c, float d) {
+ __v4sf v = (__v4sf)_v;
+ return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
+}
+
constexpr bool match_v8hi(__m128i _v, short a, short b, short c, short d, short e, short f, short g, short h) {
__v8hi v = (__v8hi)_v;
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
>From 679c6e69949eb062da9a0ba6aa2b588342c95bb8 Mon Sep 17 00:00:00 2001
From: liuzhenya <zyliu at siorigin.com>
Date: Sun, 9 Nov 2025 22:25:54 -1000
Subject: [PATCH 2/2] fix: format
---
clang/lib/AST/ExprConstant.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 2f11b1d23f9a3..9724237b715cf 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12585,7 +12585,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return APInt((Src).trunc(DstBits));
return APInt::getAllOnes(DstBits);
});
- case clang::X86::BI__builtin_ia32_selectss_128:
+ case clang::X86::BI__builtin_ia32_selectss_128:
return EvalSelectScalar(4);
case clang::X86::BI__builtin_ia32_selectsd_128:
return EvalSelectScalar(2);
More information about the cfe-commits
mailing list