[clang] [Clang] Add `__builtin_reduce_[in|any]_order_fadd` for floating-point reductions (PR #176160)
Benjamin Maxwell via cfe-commits
cfe-commits at lists.llvm.org
Fri Feb 20 07:24:48 PST 2026
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/176160
>From 2fad251f3919d131495800802ea545e0aa40112b Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 14 Jan 2026 17:53:39 +0000
Subject: [PATCH 1/8] [Clang] Add `__builtin_reduce_addf` for ordered/unordered
fp reductions
This adds `__builtin_reduce_addf` to expose the `llvm.vector.reduce.fadd.*`
intrinsic directly in Clang, for the full range of supported FP types.
Given a floating-point vector `vec` and a scalar floating-point value `acc`:
- `__builtin_reduce_addf(vec)` corresponds to an unordered/fast reduction
* i.e, the lanes can be summed in any order
- `__builtin_reduce_addf(vec, acc)` corresponds to an ordered redunction
* i.e, the result is as-if an accumulator was initialized with `acc`
and each lane was added to it in-order, starting from lane 0
The `acc` is only used for ordered reductions as the original motivation
for adding the "start_value/acc" in the intrinsic was to distinguish
between ordered/unordered reductions, see: https://reviews.llvm.org/D30086.
---
clang/docs/LanguageExtensions.rst | 4 ++
clang/include/clang/Basic/Builtins.td | 6 +++
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 1 +
clang/lib/CodeGen/CGBuiltin.cpp | 22 ++++++++
clang/lib/Sema/SemaChecking.cpp | 53 +++++++++++++++++---
clang/test/CodeGen/builtins-reduction-math.c | 23 +++++++++
clang/test/Sema/builtins-reduction-math.c | 17 +++++++
7 files changed, 119 insertions(+), 7 deletions(-)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 29328355c3e6f..2109679d20dcd 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -946,6 +946,10 @@ Let ``VT`` be a vector type and ``ET`` the element type of ``VT``.
semantics, see `LangRef
<http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
for the comparison.
+ ET __builtin_reduce_addf(VT a) unordered floating-point add reduction. floating point types
+ ET __builtin_reduce_addf(VT a, ET s) ordered floating-point add reduction, initializing the accumulator floating point types
+ with `(ET)s`, then adding each lane of the `a` in-order, starting from
+ lane 0.
======================================= ====================================================================== ==================================
*Masked Builtins*
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 78dd26aa2c455..182ec64533f26 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1664,6 +1664,12 @@ def ReduceAdd : Builtin {
let Prototype = "void(...)";
}
+def ReduceAddf : Builtin {
+ let Spellings = ["__builtin_reduce_addf"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
def ReduceMul : Builtin {
let Spellings = ["__builtin_reduce_mul"];
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 85c7d2cd5c489..87c57ab0cfbdb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1517,6 +1517,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
case Builtin::BI__builtin_reduce_xor:
case Builtin::BI__builtin_reduce_or:
case Builtin::BI__builtin_reduce_and:
+ case Builtin::BI__builtin_reduce_addf:
case Builtin::BI__builtin_reduce_maximum:
case Builtin::BI__builtin_reduce_minimum:
case Builtin::BI__builtin_matrix_transpose:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 850cc8d2c4c45..32fe47caaa97e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4215,6 +4215,28 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_reduce_minimum:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
+ case Builtin::BI__builtin_reduce_addf: {
+ llvm::Value *Vector = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ScalarTy = Vector->getType()->getScalarType();
+ llvm::Value *StartValue = nullptr;
+ if (E->getNumArgs() == 2)
+ StartValue = Builder.CreateFPCast(EmitScalarExpr(E->getArg(1)), ScalarTy);
+ llvm::Value *Args[] = {/*start_value=*/StartValue
+ ? StartValue
+ : llvm::ConstantFP::get(ScalarTy, -0.0F),
+ /*vector=*/Vector};
+ llvm::Function *F =
+ CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Vector->getType());
+ llvm::CallBase *Reduce = Builder.CreateCall(F, Args, "rdx.addf");
+ if (!StartValue) {
+ // No start value means an unordered reduction, which requires the reassoc
+ // FMF flag.
+ llvm::FastMathFlags FMF;
+ FMF.setAllowReassoc();
+ cast<llvm::CallBase>(Reduce)->setFastMathFlags(FMF);
+ }
+ return RValue::get(Reduce);
+ }
case Builtin::BI__builtin_matrix_transpose: {
auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 45006bfc11644..271515c1241ab 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2818,6 +2818,14 @@ static ExprResult BuiltinVectorMathConversions(Sema &S, Expr *E) {
return S.UsualUnaryFPConversions(Res.get());
}
+static QualType GetVectorElementType(ASTContext &Context, QualType VecTy) {
+ if (const auto *TyA = VecTy->getAs<VectorType>())
+ return TyA->getElementType();
+ if (VecTy->isSizelessVectorType())
+ return VecTy->getSizelessVectorEltType(Context);
+ return QualType();
+}
+
ExprResult
Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
CallExpr *TheCall) {
@@ -3668,14 +3676,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
return ExprError();
const Expr *Arg = TheCall->getArg(0);
- const auto *TyA = Arg->getType()->getAs<VectorType>();
-
- QualType ElTy;
- if (TyA)
- ElTy = TyA->getElementType();
- else if (Arg->getType()->isSizelessVectorType())
- ElTy = Arg->getType()->getSizelessVectorEltType(Context);
+ QualType ElTy = GetVectorElementType(Context, Arg->getType());
if (ElTy.isNull() || !ElTy->isIntegerType()) {
Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
<< 1 << /* vector of */ 4 << /* int */ 1 << /* no fp */ 0
@@ -3687,6 +3689,43 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
break;
}
+ case Builtin::BI__builtin_reduce_addf: {
+ if (checkArgCountRange(TheCall, 1, 2))
+ return ExprError();
+
+ ExprResult Vec = UsualUnaryConversions(TheCall->getArg(0));
+ if (Vec.isInvalid())
+ return ExprError();
+
+ TheCall->setArg(0, Vec.get());
+
+ QualType ElTy = GetVectorElementType(Context, Vec.get()->getType());
+ if (ElTy.isNull() || !ElTy->isRealFloatingType()) {
+ Diag(Vec.get()->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+ << 1 << /* vector of */ 4 << /* no int */ 0 << /* fp */ 1
+ << Vec.get()->getType();
+ return ExprError();
+ }
+
+ if (TheCall->getNumArgs() == 2) {
+ ExprResult StartValue = UsualUnaryConversions(TheCall->getArg(1));
+ if (StartValue.isInvalid())
+ return ExprError();
+
+ if (!StartValue.get()->getType()->isRealFloatingType()) {
+ Diag(StartValue.get()->getBeginLoc(),
+ diag::err_builtin_invalid_arg_type)
+ << 2 << /* scalar */ 1 << /* no int */ 0 << /* fp */ 1
+ << StartValue.get()->getType();
+ return ExprError();
+ }
+ TheCall->setArg(1, StartValue.get());
+ }
+
+ TheCall->setType(ElTy);
+ break;
+ }
+
case Builtin::BI__builtin_matrix_transpose:
return BuiltinMatrixTranspose(TheCall, TheCallResult);
diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c
index e12fd729c84c0..bde6e9a4f9868 100644
--- a/clang/test/CodeGen/builtins-reduction-math.c
+++ b/clang/test/CodeGen/builtins-reduction-math.c
@@ -4,6 +4,8 @@
// RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +sve %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=SVE %s
typedef float float4 __attribute__((ext_vector_type(4)));
+typedef _Float16 half8 __attribute__((ext_vector_type(8)));
+
typedef short int si8 __attribute__((ext_vector_type(8)));
typedef unsigned int u4 __attribute__((ext_vector_type(4)));
@@ -162,6 +164,27 @@ void test_builtin_reduce_minimum(float4 vf1) {
const double r4 = __builtin_reduce_minimum(vf1_as_one);
}
+void test_builtin_reduce_addf(float4 vf1, half8 vf2) {
+ // CHECK-LABEL: define void @test_builtin_reduce_addf(
+
+ // CHECK: [[V0:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+ // CHECK-NEXT: call reassoc float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[V0]])
+ float r1 = __builtin_reduce_addf(vf1);
+
+ // CHECK: [[V1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+ // CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[V1]])
+ float r2 = __builtin_reduce_addf(vf1, 0.0f);
+
+ // CHECK: [[V2:%.+]] = load <8 x half>, ptr %vf2.addr, align 16
+ // CHECK-NEXT: call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[V2:%.+]])
+ _Float16 r3 = __builtin_reduce_addf(vf2);
+
+ // CHECK: [[V3:%.+]] = load <8 x half>, ptr %vf2.addr, align 16
+ // CHECK-NEXT: [[RDX:%.+]] = call half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[V3]])
+ // CHECK-NEXT: fpext half [[RDX]] to float
+ float r4 = __builtin_reduce_addf(vf2, -0.0f);
+}
+
#if defined(__ARM_FEATURE_SVE)
#include <arm_sve.h>
diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c
index 74f09d501198b..d4562d967e0e9 100644
--- a/clang/test/Sema/builtins-reduction-math.c
+++ b/clang/test/Sema/builtins-reduction-math.c
@@ -148,3 +148,20 @@ void test_builtin_reduce_minimum(int i, float4 v, int3 iv) {
i = __builtin_reduce_minimum(i);
// expected-error at -1 {{1st argument must be a vector of floating-point types (was 'int')}}
}
+
+void test_builtin_reduce_addf(float f, float4 v, int3 iv) {
+ struct Foo s = __builtin_reduce_addf(v);
+ // expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
+
+ f = __builtin_reduce_addf();
+ // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+
+ f = __builtin_reduce_addf(v, f, v);
+ // expected-error at -1 {{too many arguments to function call, expected at most 2, have 3}}
+
+ f = __builtin_reduce_addf(iv);
+ // expected-error at -1 {{1st argument must be a vector of floating-point types (was 'int3' (vector of 3 'int' values))}}
+
+ f = __builtin_reduce_addf(v, (int)121);
+ // expected-error at -1 {{2nd argument must be a scalar floating-point type (was 'int')}}
+}
>From 2b99bdeb2b5e3ccdacfed2098df10f625e0a9fa3 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 15 Jan 2026 13:37:34 +0000
Subject: [PATCH 2/8] Try to fix docs
---
clang/docs/LanguageExtensions.rst | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 2109679d20dcd..d2541be502862 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -948,8 +948,8 @@ Let ``VT`` be a vector type and ``ET`` the element type of ``VT``.
for the comparison.
ET __builtin_reduce_addf(VT a) unordered floating-point add reduction. floating point types
ET __builtin_reduce_addf(VT a, ET s) ordered floating-point add reduction, initializing the accumulator floating point types
- with `(ET)s`, then adding each lane of the `a` in-order, starting from
- lane 0.
+ with `(ET)s`, then adding each lane of the `a` in-order, starting
+ from lane 0.
======================================= ====================================================================== ==================================
*Masked Builtins*
>From 4b0f3389159acca697538325fe017fccaed2cdef Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 26 Jan 2026 13:58:12 +0000
Subject: [PATCH 3/8] Fixups
---
clang/docs/LanguageExtensions.rst | 50 ++++++++++----------
clang/include/clang/Basic/Builtins.td | 6 +++
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 1 +
clang/lib/CodeGen/CGBuiltin.cpp | 9 ++--
clang/lib/Sema/SemaChecking.cpp | 9 ++--
clang/test/CodeGen/builtins-reduction-math.c | 10 ++--
clang/test/Sema/builtins-reduction-math.c | 8 ++--
7 files changed, 51 insertions(+), 42 deletions(-)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index d2541be502862..7322f69753809 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -926,31 +926,31 @@ Example:
Let ``VT`` be a vector type and ``ET`` the element type of ``VT``.
-======================================= ====================================================================== ==================================
- Name Operation Supported element types
-======================================= ====================================================================== ==================================
- ET __builtin_reduce_max(VT a) return the largest element of the vector. The floating point result integer and floating point types
- will always be a number unless all elements of the vector are NaN.
- ET __builtin_reduce_min(VT a) return the smallest element of the vector. The floating point result integer and floating point types
- will always be a number unless all elements of the vector are NaN.
- ET __builtin_reduce_add(VT a) \+ integer types
- ET __builtin_reduce_mul(VT a) \* integer types
- ET __builtin_reduce_and(VT a) & integer types
- ET __builtin_reduce_or(VT a) \| integer types
- ET __builtin_reduce_xor(VT a) ^ integer types
- ET __builtin_reduce_maximum(VT a) return the largest element of the vector. Follows IEEE 754-2019 floating point types
- semantics, see `LangRef
- <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
- for the comparison.
- ET __builtin_reduce_minimum(VT a) return the smallest element of the vector. Follows IEEE 754-2019 floating point types
- semantics, see `LangRef
- <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
- for the comparison.
- ET __builtin_reduce_addf(VT a) unordered floating-point add reduction. floating point types
- ET __builtin_reduce_addf(VT a, ET s) ordered floating-point add reduction, initializing the accumulator floating point types
- with `(ET)s`, then adding each lane of the `a` in-order, starting
- from lane 0.
-======================================= ====================================================================== ==================================
+============================================= ====================================================================== ==================================
+ Name Operation Supported element types
+============================================= ====================================================================== ==================================
+ ET __builtin_reduce_max(VT a) return the largest element of the vector. The floating point result integer and floating point types
+ will always be a number unless all elements of the vector are NaN.
+ ET __builtin_reduce_min(VT a) return the smallest element of the vector. The floating point result integer and floating point types
+ will always be a number unless all elements of the vector are NaN.
+ ET __builtin_reduce_add(VT a) \+ integer types
+ ET __builtin_reduce_mul(VT a) \* integer types
+ ET __builtin_reduce_and(VT a) & integer types
+ ET __builtin_reduce_or(VT a) \| integer types
+ ET __builtin_reduce_xor(VT a) ^ integer types
+ ET __builtin_reduce_maximum(VT a) return the largest element of the vector. Follows IEEE 754-2019 floating point types
+ semantics, see `LangRef
+ <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
+ for the comparison.
+ ET __builtin_reduce_minimum(VT a) return the smallest element of the vector. Follows IEEE 754-2019 floating point types
+ semantics, see `LangRef
+ <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
+ for the comparison.
+ ET __builtin_reduce_addf(VT a) unordered floating-point add reduction. floating point types
+ ET __builtin_ordered_reduce_addf(VT a, ET s) ordered floating-point add reduction, initializing the accumulator floating point types
+ with `(ET)s`, then adding each lane of the `a` in-order, starting
+ from lane 0.
+============================================= ====================================================================== ==================================
*Masked Builtins*
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 182ec64533f26..59b76e349bb71 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1670,6 +1670,12 @@ def ReduceAddf : Builtin {
let Prototype = "void(...)";
}
+def OrderedReduceAddf : Builtin {
+ let Spellings = ["__builtin_ordered_reduce_addf"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
def ReduceMul : Builtin {
let Spellings = ["__builtin_reduce_mul"];
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 87c57ab0cfbdb..70f9ba5edb783 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1518,6 +1518,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
case Builtin::BI__builtin_reduce_or:
case Builtin::BI__builtin_reduce_and:
case Builtin::BI__builtin_reduce_addf:
+ case Builtin::BI__builtin_ordered_reduce_addf:
case Builtin::BI__builtin_reduce_maximum:
case Builtin::BI__builtin_reduce_minimum:
case Builtin::BI__builtin_matrix_transpose:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 32fe47caaa97e..711c9754de76f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4215,7 +4215,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_reduce_minimum:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
- case Builtin::BI__builtin_reduce_addf: {
+ case Builtin::BI__builtin_reduce_addf:
+ case Builtin::BI__builtin_ordered_reduce_addf: {
llvm::Value *Vector = EmitScalarExpr(E->getArg(0));
llvm::Type *ScalarTy = Vector->getType()->getScalarType();
llvm::Value *StartValue = nullptr;
@@ -4228,9 +4229,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Vector->getType());
llvm::CallBase *Reduce = Builder.CreateCall(F, Args, "rdx.addf");
- if (!StartValue) {
- // No start value means an unordered reduction, which requires the reassoc
- // FMF flag.
+ if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_reduce_addf) {
+ // `__builtin_reduce_addf` an unordered reduction, which requires the
+ // reassoc FMF flag.
llvm::FastMathFlags FMF;
FMF.setAllowReassoc();
cast<llvm::CallBase>(Reduce)->setFastMathFlags(FMF);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 271515c1241ab..84bccd20c7765 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2818,7 +2818,7 @@ static ExprResult BuiltinVectorMathConversions(Sema &S, Expr *E) {
return S.UsualUnaryFPConversions(Res.get());
}
-static QualType GetVectorElementType(ASTContext &Context, QualType VecTy) {
+static QualType getVectorElementType(ASTContext &Context, QualType VecTy) {
if (const auto *TyA = VecTy->getAs<VectorType>())
return TyA->getElementType();
if (VecTy->isSizelessVectorType())
@@ -3677,7 +3677,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
const Expr *Arg = TheCall->getArg(0);
- QualType ElTy = GetVectorElementType(Context, Arg->getType());
+ QualType ElTy = getVectorElementType(Context, Arg->getType());
if (ElTy.isNull() || !ElTy->isIntegerType()) {
Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
<< 1 << /* vector of */ 4 << /* int */ 1 << /* no fp */ 0
@@ -3689,7 +3689,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
break;
}
- case Builtin::BI__builtin_reduce_addf: {
+ case Builtin::BI__builtin_reduce_addf:
+ case Builtin::BI__builtin_ordered_reduce_addf: {
if (checkArgCountRange(TheCall, 1, 2))
return ExprError();
@@ -3699,7 +3700,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
TheCall->setArg(0, Vec.get());
- QualType ElTy = GetVectorElementType(Context, Vec.get()->getType());
+ QualType ElTy = getVectorElementType(Context, Vec.get()->getType());
if (ElTy.isNull() || !ElTy->isRealFloatingType()) {
Diag(Vec.get()->getBeginLoc(), diag::err_builtin_invalid_arg_type)
<< 1 << /* vector of */ 4 << /* no int */ 0 << /* fp */ 1
diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c
index bde6e9a4f9868..2c69315419882 100644
--- a/clang/test/CodeGen/builtins-reduction-math.c
+++ b/clang/test/CodeGen/builtins-reduction-math.c
@@ -168,12 +168,12 @@ void test_builtin_reduce_addf(float4 vf1, half8 vf2) {
// CHECK-LABEL: define void @test_builtin_reduce_addf(
// CHECK: [[V0:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
- // CHECK-NEXT: call reassoc float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[V0]])
- float r1 = __builtin_reduce_addf(vf1);
+ // CHECK-NEXT: call reassoc float @llvm.vector.reduce.fadd.v4f32(float 1.000000e+00, <4 x float> [[V0]])
+ float r1 = __builtin_reduce_addf(vf1, 1.0f);
// CHECK: [[V1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
- // CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[V1]])
- float r2 = __builtin_reduce_addf(vf1, 0.0f);
+ // CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[V1]])
+ float r2 = __builtin_ordered_reduce_addf(vf1);
// CHECK: [[V2:%.+]] = load <8 x half>, ptr %vf2.addr, align 16
// CHECK-NEXT: call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[V2:%.+]])
@@ -182,7 +182,7 @@ void test_builtin_reduce_addf(float4 vf1, half8 vf2) {
// CHECK: [[V3:%.+]] = load <8 x half>, ptr %vf2.addr, align 16
// CHECK-NEXT: [[RDX:%.+]] = call half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[V3]])
// CHECK-NEXT: fpext half [[RDX]] to float
- float r4 = __builtin_reduce_addf(vf2, -0.0f);
+ float r4 = __builtin_ordered_reduce_addf(vf2, -0.0f);
}
#if defined(__ARM_FEATURE_SVE)
diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c
index d4562d967e0e9..3ca5b5755a53e 100644
--- a/clang/test/Sema/builtins-reduction-math.c
+++ b/clang/test/Sema/builtins-reduction-math.c
@@ -153,15 +153,15 @@ void test_builtin_reduce_addf(float f, float4 v, int3 iv) {
struct Foo s = __builtin_reduce_addf(v);
// expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
+ f = __builtin_ordered_reduce_addf(v, f, f);
+ // expected-error at -1 {{too many arguments to function call, expected at most 2, have 3}}
+
f = __builtin_reduce_addf();
// expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
- f = __builtin_reduce_addf(v, f, v);
- // expected-error at -1 {{too many arguments to function call, expected at most 2, have 3}}
-
f = __builtin_reduce_addf(iv);
// expected-error at -1 {{1st argument must be a vector of floating-point types (was 'int3' (vector of 3 'int' values))}}
- f = __builtin_reduce_addf(v, (int)121);
+ f = __builtin_ordered_reduce_addf(v, (int)121);
// expected-error at -1 {{2nd argument must be a scalar floating-point type (was 'int')}}
}
>From 376e17647da4bb0fd82e422a6b153a94232e1722 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 26 Jan 2026 17:55:15 +0000
Subject: [PATCH 4/8] Rename
---
clang/docs/LanguageExtensions.rst | 12 ++++++------
clang/include/clang/Basic/Builtins.td | 4 ++--
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 4 ++--
clang/lib/CodeGen/CGBuiltin.cpp | 8 ++++----
clang/lib/Sema/SemaChecking.cpp | 4 ++--
clang/test/CodeGen/builtins-reduction-math.c | 8 ++++----
clang/test/Sema/builtins-reduction-math.c | 10 +++++-----
7 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 7322f69753809..5a25dcf1f35a0 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -946,8 +946,8 @@ Let ``VT`` be a vector type and ``ET`` the element type of ``VT``.
semantics, see `LangRef
<http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
for the comparison.
- ET __builtin_reduce_addf(VT a) unordered floating-point add reduction. floating point types
- ET __builtin_ordered_reduce_addf(VT a, ET s) ordered floating-point add reduction, initializing the accumulator floating point types
+ ET __builtin_reduce_fadd(VT a) unordered floating-point add reduction. floating point types
+ ET __builtin_ordered_reduce_fadd(VT a, ET s) ordered floating-point add reduction, initializing the accumulator floating point types
with `(ET)s`, then adding each lane of the `a` in-order, starting
from lane 0.
============================================= ====================================================================== ==================================
@@ -979,15 +979,15 @@ Example:
using v8i = int [[clang::ext_vector_type(8)]];
v8i load(v8b mask, int *ptr) { return __builtin_masked_load(mask, ptr); }
-
+
v8i load_expand(v8b mask, int *ptr) {
return __builtin_masked_expand_load(mask, ptr);
}
-
+
void store(v8b mask, v8i val, int *ptr) {
__builtin_masked_store(mask, val, ptr);
}
-
+
void store_compress(v8b mask, v8i val, int *ptr) {
__builtin_masked_compress_store(mask, val, ptr);
}
@@ -1079,7 +1079,7 @@ The matrix type extension supports explicit casts. Implicit type conversion betw
The matrix type extension supports column and row major memory layouts, but not
all builtins are supported with row-major layout. The layout defaults to column
-major and can be specified using `-fmatrix-memory-layout`. To enable column
+major and can be specified using `-fmatrix-memory-layout`. To enable column
major layout, use `-fmatrix-memory-layout=column-major`, and for row major
layout use `-fmatrix-memory-layout=row-major`
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 59b76e349bb71..c328d011c05c0 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1665,13 +1665,13 @@ def ReduceAdd : Builtin {
}
def ReduceAddf : Builtin {
- let Spellings = ["__builtin_reduce_addf"];
+ let Spellings = ["__builtin_reduce_fadd"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
let Prototype = "void(...)";
}
def OrderedReduceAddf : Builtin {
- let Spellings = ["__builtin_ordered_reduce_addf"];
+ let Spellings = ["__builtin_ordered_reduce_fadd"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
let Prototype = "void(...)";
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 70f9ba5edb783..ffc9df52c7a8e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1517,8 +1517,8 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
case Builtin::BI__builtin_reduce_xor:
case Builtin::BI__builtin_reduce_or:
case Builtin::BI__builtin_reduce_and:
- case Builtin::BI__builtin_reduce_addf:
- case Builtin::BI__builtin_ordered_reduce_addf:
+ case Builtin::BI__builtin_reduce_fadd:
+ case Builtin::BI__builtin_ordered_reduce_fadd:
case Builtin::BI__builtin_reduce_maximum:
case Builtin::BI__builtin_reduce_minimum:
case Builtin::BI__builtin_matrix_transpose:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 711c9754de76f..dfd2d5e899d09 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4215,8 +4215,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_reduce_minimum:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
- case Builtin::BI__builtin_reduce_addf:
- case Builtin::BI__builtin_ordered_reduce_addf: {
+ case Builtin::BI__builtin_reduce_fadd:
+ case Builtin::BI__builtin_ordered_reduce_fadd: {
llvm::Value *Vector = EmitScalarExpr(E->getArg(0));
llvm::Type *ScalarTy = Vector->getType()->getScalarType();
llvm::Value *StartValue = nullptr;
@@ -4229,8 +4229,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Vector->getType());
llvm::CallBase *Reduce = Builder.CreateCall(F, Args, "rdx.addf");
- if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_reduce_addf) {
- // `__builtin_reduce_addf` an unordered reduction, which requires the
+ if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_reduce_fadd) {
+ // `__builtin_reduce_fadd` an unordered reduction, which requires the
// reassoc FMF flag.
llvm::FastMathFlags FMF;
FMF.setAllowReassoc();
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 84bccd20c7765..fc641f0515d5f 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3689,8 +3689,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
break;
}
- case Builtin::BI__builtin_reduce_addf:
- case Builtin::BI__builtin_ordered_reduce_addf: {
+ case Builtin::BI__builtin_reduce_fadd:
+ case Builtin::BI__builtin_ordered_reduce_fadd: {
if (checkArgCountRange(TheCall, 1, 2))
return ExprError();
diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c
index 2c69315419882..99f1596e3ff54 100644
--- a/clang/test/CodeGen/builtins-reduction-math.c
+++ b/clang/test/CodeGen/builtins-reduction-math.c
@@ -169,20 +169,20 @@ void test_builtin_reduce_addf(float4 vf1, half8 vf2) {
// CHECK: [[V0:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
// CHECK-NEXT: call reassoc float @llvm.vector.reduce.fadd.v4f32(float 1.000000e+00, <4 x float> [[V0]])
- float r1 = __builtin_reduce_addf(vf1, 1.0f);
+ float r1 = __builtin_reduce_fadd(vf1, 1.0f);
// CHECK: [[V1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
// CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[V1]])
- float r2 = __builtin_ordered_reduce_addf(vf1);
+ float r2 = __builtin_ordered_reduce_fadd(vf1);
// CHECK: [[V2:%.+]] = load <8 x half>, ptr %vf2.addr, align 16
// CHECK-NEXT: call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[V2:%.+]])
- _Float16 r3 = __builtin_reduce_addf(vf2);
+ _Float16 r3 = __builtin_reduce_fadd(vf2);
// CHECK: [[V3:%.+]] = load <8 x half>, ptr %vf2.addr, align 16
// CHECK-NEXT: [[RDX:%.+]] = call half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[V3]])
// CHECK-NEXT: fpext half [[RDX]] to float
- float r4 = __builtin_ordered_reduce_addf(vf2, -0.0f);
+ float r4 = __builtin_ordered_reduce_fadd(vf2, -0.0f);
}
#if defined(__ARM_FEATURE_SVE)
diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c
index 3ca5b5755a53e..738b54dc929dd 100644
--- a/clang/test/Sema/builtins-reduction-math.c
+++ b/clang/test/Sema/builtins-reduction-math.c
@@ -150,18 +150,18 @@ void test_builtin_reduce_minimum(int i, float4 v, int3 iv) {
}
void test_builtin_reduce_addf(float f, float4 v, int3 iv) {
- struct Foo s = __builtin_reduce_addf(v);
+ struct Foo s = __builtin_reduce_fadd(v);
// expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
- f = __builtin_ordered_reduce_addf(v, f, f);
+ f = __builtin_ordered_reduce_fadd(v, f, f);
// expected-error at -1 {{too many arguments to function call, expected at most 2, have 3}}
- f = __builtin_reduce_addf();
+ f = __builtin_reduce_fadd();
// expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
- f = __builtin_reduce_addf(iv);
+ f = __builtin_reduce_fadd(iv);
// expected-error at -1 {{1st argument must be a vector of floating-point types (was 'int3' (vector of 3 'int' values))}}
- f = __builtin_ordered_reduce_addf(v, (int)121);
+ f = __builtin_ordered_reduce_fadd(v, (int)121);
// expected-error at -1 {{2nd argument must be a scalar floating-point type (was 'int')}}
}
>From b5c55c2d05669e4a75a32b0a8946c530b92bcdaf Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 27 Jan 2026 11:36:18 +0000
Subject: [PATCH 5/8] Fixups
---
clang/lib/CodeGen/CGBuiltin.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index dfd2d5e899d09..c1a50bbe1c963 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4230,7 +4230,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Vector->getType());
llvm::CallBase *Reduce = Builder.CreateCall(F, Args, "rdx.addf");
if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_reduce_fadd) {
- // `__builtin_reduce_fadd` an unordered reduction, which requires the
+ // `__builtin_reduce_fadd` is an unordered reduction which requires the
// reassoc FMF flag.
llvm::FastMathFlags FMF;
FMF.setAllowReassoc();
>From 94c546ba73d9645c2483c5a7e0a3371ebe3551a7 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 19 Feb 2026 12:58:23 +0000
Subject: [PATCH 6/8] Tweak names
---
clang/docs/LanguageExtensions.rst | 50 ++++++++++----------
clang/include/clang/Basic/Builtins.td | 8 ++--
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 4 +-
clang/lib/CodeGen/CGBuiltin.cpp | 10 ++--
clang/lib/Sema/SemaChecking.cpp | 4 +-
clang/test/CodeGen/builtins-reduction-math.c | 8 ++--
clang/test/Sema/builtins-reduction-math.c | 10 ++--
7 files changed, 47 insertions(+), 47 deletions(-)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 5a25dcf1f35a0..29eda1121dbcb 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -926,31 +926,31 @@ Example:
Let ``VT`` be a vector type and ``ET`` the element type of ``VT``.
-============================================= ====================================================================== ==================================
- Name Operation Supported element types
-============================================= ====================================================================== ==================================
- ET __builtin_reduce_max(VT a) return the largest element of the vector. The floating point result integer and floating point types
- will always be a number unless all elements of the vector are NaN.
- ET __builtin_reduce_min(VT a) return the smallest element of the vector. The floating point result integer and floating point types
- will always be a number unless all elements of the vector are NaN.
- ET __builtin_reduce_add(VT a) \+ integer types
- ET __builtin_reduce_mul(VT a) \* integer types
- ET __builtin_reduce_and(VT a) & integer types
- ET __builtin_reduce_or(VT a) \| integer types
- ET __builtin_reduce_xor(VT a) ^ integer types
- ET __builtin_reduce_maximum(VT a) return the largest element of the vector. Follows IEEE 754-2019 floating point types
- semantics, see `LangRef
- <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
- for the comparison.
- ET __builtin_reduce_minimum(VT a) return the smallest element of the vector. Follows IEEE 754-2019 floating point types
- semantics, see `LangRef
- <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
- for the comparison.
- ET __builtin_reduce_fadd(VT a) unordered floating-point add reduction. floating point types
- ET __builtin_ordered_reduce_fadd(VT a, ET s) ordered floating-point add reduction, initializing the accumulator floating point types
- with `(ET)s`, then adding each lane of the `a` in-order, starting
- from lane 0.
-============================================= ====================================================================== ==================================
+============================================== ====================================================================== ==================================
+ Name Operation Supported element types
+============================================== ====================================================================== ==================================
+ ET __builtin_reduce_max(VT a) return the largest element of the vector. The floating point result integer and floating point types
+ will always be a number unless all elements of the vector are NaN.
+ ET __builtin_reduce_min(VT a) return the smallest element of the vector. The floating point result integer and floating point types
+ will always be a number unless all elements of the vector are NaN.
+ ET __builtin_reduce_add(VT a) \+ integer types
+ ET __builtin_reduce_mul(VT a) \* integer types
+ ET __builtin_reduce_and(VT a) & integer types
+ ET __builtin_reduce_or(VT a) \| integer types
+ ET __builtin_reduce_xor(VT a) ^ integer types
+ ET __builtin_reduce_maximum(VT a) return the largest element of the vector. Follows IEEE 754-2019 floating point types
+ semantics, see `LangRef
+ <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
+ for the comparison.
+ ET __builtin_reduce_minimum(VT a) return the smallest element of the vector. Follows IEEE 754-2019 floating point types
+ semantics, see `LangRef
+ <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
+ for the comparison.
+ ET __builtin_reduce_any_order_fadd(VT a) floating-point associative fadd reduction. floating point types
+ ET __builtin_reduce_in_order_fadd(VT a, ET s) in order floating-point fadd reduction, initializing the accumulator floating point types
+ with `(ET)s`, then adding each lane of the `a` in-order, starting
+ from lane 0.
+============================================== ====================================================================== ==================================
*Masked Builtins*
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index c328d011c05c0..44c3d9d808f2b 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1664,14 +1664,14 @@ def ReduceAdd : Builtin {
let Prototype = "void(...)";
}
-def ReduceAddf : Builtin {
- let Spellings = ["__builtin_reduce_fadd"];
+def ReduceInOrderFAdd : Builtin {
+ let Spellings = ["__builtin_reduce_in_order_fadd"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
let Prototype = "void(...)";
}
-def OrderedReduceAddf : Builtin {
- let Spellings = ["__builtin_ordered_reduce_fadd"];
+def ReduceAnyOrderFAdd : Builtin {
+ let Spellings = ["__builtin_reduce_any_order_fadd"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
let Prototype = "void(...)";
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index ffc9df52c7a8e..903b52eb58a56 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1517,8 +1517,8 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
case Builtin::BI__builtin_reduce_xor:
case Builtin::BI__builtin_reduce_or:
case Builtin::BI__builtin_reduce_and:
- case Builtin::BI__builtin_reduce_fadd:
- case Builtin::BI__builtin_ordered_reduce_fadd:
+ case Builtin::BI__builtin_reduce_any_order_fadd:
+ case Builtin::BI__builtin_reduce_in_order_fadd:
case Builtin::BI__builtin_reduce_maximum:
case Builtin::BI__builtin_reduce_minimum:
case Builtin::BI__builtin_matrix_transpose:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c1a50bbe1c963..d79b9e8e0dd90 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4215,8 +4215,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_reduce_minimum:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
- case Builtin::BI__builtin_reduce_fadd:
- case Builtin::BI__builtin_ordered_reduce_fadd: {
+ case Builtin::BI__builtin_reduce_any_order_fadd:
+ case Builtin::BI__builtin_reduce_in_order_fadd: {
llvm::Value *Vector = EmitScalarExpr(E->getArg(0));
llvm::Type *ScalarTy = Vector->getType()->getScalarType();
llvm::Value *StartValue = nullptr;
@@ -4229,9 +4229,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Vector->getType());
llvm::CallBase *Reduce = Builder.CreateCall(F, Args, "rdx.addf");
- if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_reduce_fadd) {
- // `__builtin_reduce_fadd` is an unordered reduction which requires the
- // reassoc FMF flag.
+ if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_reduce_any_order_fadd) {
+ // `__builtin_reduce_any_order_fadd` is an associative reduction which
+ // requires the reassoc FMF flag.
llvm::FastMathFlags FMF;
FMF.setAllowReassoc();
cast<llvm::CallBase>(Reduce)->setFastMathFlags(FMF);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index fc641f0515d5f..ada9356b78bf9 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3689,8 +3689,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
break;
}
- case Builtin::BI__builtin_reduce_fadd:
- case Builtin::BI__builtin_ordered_reduce_fadd: {
+ case Builtin::BI__builtin_reduce_any_order_fadd:
+ case Builtin::BI__builtin_reduce_in_order_fadd: {
if (checkArgCountRange(TheCall, 1, 2))
return ExprError();
diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c
index 99f1596e3ff54..99a2ba110f798 100644
--- a/clang/test/CodeGen/builtins-reduction-math.c
+++ b/clang/test/CodeGen/builtins-reduction-math.c
@@ -169,20 +169,20 @@ void test_builtin_reduce_addf(float4 vf1, half8 vf2) {
// CHECK: [[V0:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
// CHECK-NEXT: call reassoc float @llvm.vector.reduce.fadd.v4f32(float 1.000000e+00, <4 x float> [[V0]])
- float r1 = __builtin_reduce_fadd(vf1, 1.0f);
+ float r1 = __builtin_reduce_any_order_fadd(vf1, 1.0f);
// CHECK: [[V1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
// CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[V1]])
- float r2 = __builtin_ordered_reduce_fadd(vf1);
+ float r2 = __builtin_reduce_in_order_fadd(vf1);
// CHECK: [[V2:%.+]] = load <8 x half>, ptr %vf2.addr, align 16
// CHECK-NEXT: call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[V2:%.+]])
- _Float16 r3 = __builtin_reduce_fadd(vf2);
+ _Float16 r3 = __builtin_reduce_any_order_fadd(vf2);
// CHECK: [[V3:%.+]] = load <8 x half>, ptr %vf2.addr, align 16
// CHECK-NEXT: [[RDX:%.+]] = call half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[V3]])
// CHECK-NEXT: fpext half [[RDX]] to float
- float r4 = __builtin_ordered_reduce_fadd(vf2, -0.0f);
+ float r4 = __builtin_reduce_in_order_fadd(vf2, -0.0f);
}
#if defined(__ARM_FEATURE_SVE)
diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c
index 738b54dc929dd..dc1875096f6db 100644
--- a/clang/test/Sema/builtins-reduction-math.c
+++ b/clang/test/Sema/builtins-reduction-math.c
@@ -150,18 +150,18 @@ void test_builtin_reduce_minimum(int i, float4 v, int3 iv) {
}
void test_builtin_reduce_addf(float f, float4 v, int3 iv) {
- struct Foo s = __builtin_reduce_fadd(v);
+ struct Foo s = __builtin_reduce_any_order_fadd(v);
// expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
- f = __builtin_ordered_reduce_fadd(v, f, f);
+ f = __builtin_reduce_in_order_fadd(v, f, f);
// expected-error at -1 {{too many arguments to function call, expected at most 2, have 3}}
- f = __builtin_reduce_fadd();
+ f = __builtin_reduce_any_order_fadd();
// expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
- f = __builtin_reduce_fadd(iv);
+ f = __builtin_reduce_any_order_fadd(iv);
// expected-error at -1 {{1st argument must be a vector of floating-point types (was 'int3' (vector of 3 'int' values))}}
- f = __builtin_ordered_reduce_fadd(v, (int)121);
+ f = __builtin_reduce_in_order_fadd(v, (int)121);
// expected-error at -1 {{2nd argument must be a scalar floating-point type (was 'int')}}
}
>From b1a9cf1ce71a29158bd13005a016b3d0c23f3325 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 20 Feb 2026 15:12:09 +0000
Subject: [PATCH 7/8] Fixups
---
clang/lib/Sema/SemaChecking.cpp | 4 +++-
clang/test/CodeGen/builtins-reduction-math.c | 4 ++--
clang/test/Sema/builtins-reduction-math.c | 5 ++++-
3 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index ada9356b78bf9..00a8dad741361 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3691,7 +3691,9 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
case Builtin::BI__builtin_reduce_any_order_fadd:
case Builtin::BI__builtin_reduce_in_order_fadd: {
- if (checkArgCountRange(TheCall, 1, 2))
+ // For in-order reductions require the user to specify the start value.
+ bool InOrder = BuiltinID == Builtin::BI__builtin_reduce_in_order_fadd;
+ if (InOrder ? checkArgCount(TheCall, 2) : checkArgCountRange(TheCall, 1, 2))
return ExprError();
ExprResult Vec = UsualUnaryConversions(TheCall->getArg(0));
diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c
index 99a2ba110f798..764160e0fd48d 100644
--- a/clang/test/CodeGen/builtins-reduction-math.c
+++ b/clang/test/CodeGen/builtins-reduction-math.c
@@ -172,8 +172,8 @@ void test_builtin_reduce_addf(float4 vf1, half8 vf2) {
float r1 = __builtin_reduce_any_order_fadd(vf1, 1.0f);
// CHECK: [[V1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
- // CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[V1]])
- float r2 = __builtin_reduce_in_order_fadd(vf1);
+ // CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float 2.000000e+00, <4 x float> [[V1]])
+ float r2 = __builtin_reduce_in_order_fadd(vf1, 2.0f);
// CHECK: [[V2:%.+]] = load <8 x half>, ptr %vf2.addr, align 16
// CHECK-NEXT: call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[V2:%.+]])
diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c
index dc1875096f6db..0d1cc8cd52b7b 100644
--- a/clang/test/Sema/builtins-reduction-math.c
+++ b/clang/test/Sema/builtins-reduction-math.c
@@ -153,8 +153,11 @@ void test_builtin_reduce_addf(float f, float4 v, int3 iv) {
struct Foo s = __builtin_reduce_any_order_fadd(v);
// expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
+ f = __builtin_reduce_in_order_fadd(v);
+ // expected-error at -1 {{too few arguments to function call, expected 2, have 1}}
+
f = __builtin_reduce_in_order_fadd(v, f, f);
- // expected-error at -1 {{too many arguments to function call, expected at most 2, have 3}}
+ // expected-error at -1 {{too many arguments to function call, expected 2, have 3}}
f = __builtin_reduce_any_order_fadd();
// expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
>From 45f56c91bd7936910f41b7f4f10aeaceebe7b999 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 20 Feb 2026 15:24:07 +0000
Subject: [PATCH 8/8] Tweak names
---
clang/docs/LanguageExtensions.rst | 2 +-
clang/include/clang/Basic/Builtins.td | 2 +-
clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 2 +-
clang/lib/CodeGen/CGBuiltin.cpp | 6 +++---
clang/lib/Sema/SemaChecking.cpp | 2 +-
clang/test/CodeGen/builtins-reduction-math.c | 4 ++--
clang/test/Sema/builtins-reduction-math.c | 6 +++---
7 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 29eda1121dbcb..f1403faaa2860 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -946,7 +946,7 @@ Let ``VT`` be a vector type and ``ET`` the element type of ``VT``.
semantics, see `LangRef
<http://llvm.org/docs/LangRef.html#i-fminmax-family>`_
for the comparison.
- ET __builtin_reduce_any_order_fadd(VT a) floating-point associative fadd reduction. floating point types
+ ET __builtin_reduce_assoc_fadd(VT a) floating-point associative fadd reduction. floating point types
ET __builtin_reduce_in_order_fadd(VT a, ET s) in order floating-point fadd reduction, initializing the accumulator floating point types
with `(ET)s`, then adding each lane of the `a` in-order, starting
from lane 0.
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 44c3d9d808f2b..0a2183e7fb1ea 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1671,7 +1671,7 @@ def ReduceInOrderFAdd : Builtin {
}
def ReduceAnyOrderFAdd : Builtin {
- let Spellings = ["__builtin_reduce_any_order_fadd"];
+ let Spellings = ["__builtin_reduce_assoc_fadd"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
let Prototype = "void(...)";
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 903b52eb58a56..9879b18d2b763 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1517,7 +1517,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
case Builtin::BI__builtin_reduce_xor:
case Builtin::BI__builtin_reduce_or:
case Builtin::BI__builtin_reduce_and:
- case Builtin::BI__builtin_reduce_any_order_fadd:
+ case Builtin::BI__builtin_reduce_assoc_fadd:
case Builtin::BI__builtin_reduce_in_order_fadd:
case Builtin::BI__builtin_reduce_maximum:
case Builtin::BI__builtin_reduce_minimum:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d79b9e8e0dd90..38010cad75244 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4215,7 +4215,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_reduce_minimum:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
- case Builtin::BI__builtin_reduce_any_order_fadd:
+ case Builtin::BI__builtin_reduce_assoc_fadd:
case Builtin::BI__builtin_reduce_in_order_fadd: {
llvm::Value *Vector = EmitScalarExpr(E->getArg(0));
llvm::Type *ScalarTy = Vector->getType()->getScalarType();
@@ -4229,8 +4229,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Vector->getType());
llvm::CallBase *Reduce = Builder.CreateCall(F, Args, "rdx.addf");
- if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_reduce_any_order_fadd) {
- // `__builtin_reduce_any_order_fadd` is an associative reduction which
+ if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_reduce_assoc_fadd) {
+ // `__builtin_reduce_assoc_fadd` is an associative reduction which
// requires the reassoc FMF flag.
llvm::FastMathFlags FMF;
FMF.setAllowReassoc();
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 00a8dad741361..2e76fe37739f1 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3689,7 +3689,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
break;
}
- case Builtin::BI__builtin_reduce_any_order_fadd:
+ case Builtin::BI__builtin_reduce_assoc_fadd:
case Builtin::BI__builtin_reduce_in_order_fadd: {
// For in-order reductions require the user to specify the start value.
bool InOrder = BuiltinID == Builtin::BI__builtin_reduce_in_order_fadd;
diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c
index 764160e0fd48d..aacea50b97bb5 100644
--- a/clang/test/CodeGen/builtins-reduction-math.c
+++ b/clang/test/CodeGen/builtins-reduction-math.c
@@ -169,7 +169,7 @@ void test_builtin_reduce_addf(float4 vf1, half8 vf2) {
// CHECK: [[V0:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
// CHECK-NEXT: call reassoc float @llvm.vector.reduce.fadd.v4f32(float 1.000000e+00, <4 x float> [[V0]])
- float r1 = __builtin_reduce_any_order_fadd(vf1, 1.0f);
+ float r1 = __builtin_reduce_assoc_fadd(vf1, 1.0f);
// CHECK: [[V1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
// CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float 2.000000e+00, <4 x float> [[V1]])
@@ -177,7 +177,7 @@ void test_builtin_reduce_addf(float4 vf1, half8 vf2) {
// CHECK: [[V2:%.+]] = load <8 x half>, ptr %vf2.addr, align 16
// CHECK-NEXT: call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[V2:%.+]])
- _Float16 r3 = __builtin_reduce_any_order_fadd(vf2);
+ _Float16 r3 = __builtin_reduce_assoc_fadd(vf2);
// CHECK: [[V3:%.+]] = load <8 x half>, ptr %vf2.addr, align 16
// CHECK-NEXT: [[RDX:%.+]] = call half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[V3]])
diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c
index 0d1cc8cd52b7b..5270de644356e 100644
--- a/clang/test/Sema/builtins-reduction-math.c
+++ b/clang/test/Sema/builtins-reduction-math.c
@@ -150,7 +150,7 @@ void test_builtin_reduce_minimum(int i, float4 v, int3 iv) {
}
void test_builtin_reduce_addf(float f, float4 v, int3 iv) {
- struct Foo s = __builtin_reduce_any_order_fadd(v);
+ struct Foo s = __builtin_reduce_assoc_fadd(v);
// expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
f = __builtin_reduce_in_order_fadd(v);
@@ -159,10 +159,10 @@ void test_builtin_reduce_addf(float f, float4 v, int3 iv) {
f = __builtin_reduce_in_order_fadd(v, f, f);
// expected-error at -1 {{too many arguments to function call, expected 2, have 3}}
- f = __builtin_reduce_any_order_fadd();
+ f = __builtin_reduce_assoc_fadd();
// expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
- f = __builtin_reduce_any_order_fadd(iv);
+ f = __builtin_reduce_assoc_fadd(iv);
// expected-error at -1 {{1st argument must be a vector of floating-point types (was 'int3' (vector of 3 'int' values))}}
f = __builtin_reduce_in_order_fadd(v, (int)121);
More information about the cfe-commits
mailing list