[clang] [X86] Add MMX/SSE/AVX PHADD/SUB & HADDPS/D intrinsics to be used in constexpr (PR #156822)
Timm Baeder via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 26 00:00:42 PDT 2025
================
@@ -2742,6 +2742,86 @@ static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp_builtin_horizontal_int_binop(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
+ assert(Call->getNumArgs() == 2);
+
+ assert(Call->getArg(0)->getType()->isVectorType() &&
+ Call->getArg(1)->getType()->isVectorType());
+ const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+ assert(VT->getElementType()->isIntegralOrEnumerationType());
+ PrimType ElemT = *S.getContext().classify(VT->getElementType());
+ bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
+
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ unsigned SourceLen = VT->getNumElements();
+ assert(SourceLen % 2 == 0 &&
+ Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+ SourceLen);
+ unsigned DstElem = 0;
+
+ for (unsigned I = 0; I != SourceLen; I += 2) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ APSInt Elem1 = LHS.elem<T>(I).toAPSInt();
+ APSInt Elem2 = LHS.elem<T>(I + 1).toAPSInt();
+ Dst.elem<T>(DstElem) =
+ static_cast<T>(APSInt(Fn(Elem1, Elem2), DestUnsigned));
+ });
+ ++DstElem;
+ }
+ for (unsigned I = 0; I != SourceLen; I += 2) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ APSInt Elem1 = RHS.elem<T>(I).toAPSInt();
+ APSInt Elem2 = RHS.elem<T>(I + 1).toAPSInt();
+ Dst.elem<T>(DstElem) =
+ static_cast<T>(APSInt(Fn(Elem1, Elem2), DestUnsigned));
+ });
+ ++DstElem;
+ }
+ Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp_builtin_horizontal_fp_binop(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<APFloat(const APFloat &, const APFloat &,
+ llvm::RoundingMode)>
+ Fn) {
+ assert(Call->getNumArgs() == 2);
+ assert(Call->getArg(0)->getType()->isVectorType() &&
+ Call->getArg(1)->getType()->isVectorType());
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
+ llvm::RoundingMode RM = getRoundingMode(FPO);
+ const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+ unsigned SourceLen = VT->getNumElements();
+ assert(SourceLen % 2 == 0 &&
+ Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+ SourceLen);
+ unsigned DstElem = 0;
+ for (unsigned I = 0; I != SourceLen; I += 2) {
+ using T = PrimConv<PT_Float>::T;
+ APFloat Elem1 = LHS.elem<T>(I).getAPFloat();
+ APFloat Elem2 = LHS.elem<T>(I + 1).getAPFloat();
+ Dst.elem<T>(DstElem++) = static_cast<T>(APFloat(Fn(Elem1, Elem2, RM)));
----------------
tbaederr wrote:
`Fn` already returns an `APFloat`, doesn't it? Why the extra constructor call here?
https://github.com/llvm/llvm-project/pull/156822
More information about the cfe-commits
mailing list