[clang] [Clang] Add float type support to __builtin_reduce_add and __builtin_reduce_multipy (PR #120367)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Dec 17 22:50:03 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Farzon Lotfi (farzonl)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/120367.diff
8 Files Affected:
- (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+2-1)
- (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+12)
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+27-2)
- (modified) clang/lib/Sema/SemaChecking.cpp (+23-3)
- (modified) clang/test/AST/ByteCode/builtin-functions.cpp (+15)
- (modified) clang/test/CodeGen/builtins-reduction-math.c (+21)
- (modified) clang/test/Sema/builtins-reduction-math.c (+8-8)
- (modified) clang/test/Sema/constant_builtins_vector.cpp (+12)
``````````diff
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 9344b620779b84..0e227c5a3d8179 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12338,7 +12338,8 @@ def err_builtin_invalid_arg_type: Error <
"a vector of integers|"
"an unsigned integer|"
"an 'int'|"
- "a vector of floating points}1 (was %2)">;
+ "a vector of floating points|"
+ "a vector of integers or floating points}1 (was %2)">;
def err_builtin_matrix_disabled: Error<
"matrix types extension is disabled. Pass -fenable-matrix to enable it">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index d6b33c8aeeaac3..be34b4bb04e3c2 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "../ExprConstShared.h"
#include "Boolean.h"
+#include "ByteCode/Floating.h"
#include "Compiler.h"
#include "EvalEmitter.h"
#include "Interp.h"
@@ -1754,6 +1755,17 @@ static bool interp__builtin_vector_reduce(InterpState &S, CodePtr OpPC,
PrimType ElemT = *S.getContext().classify(ElemType);
unsigned NumElems = Arg.getNumElems();
+ if (ElemType->isRealFloatingType()) {
+ if (ID != Builtin::BI__builtin_reduce_add &&
+ ID != Builtin::BI__builtin_reduce_mul)
+ llvm_unreachable("Only reduce_add and reduce_mul are supported for "
+ "floating-point types.");
+ // Floating-point arithmetic is not valid for constant expression
+ // initialization. Returning false defers checks to integral constant
+ // expression validation, preventing a bad deref of Floating as an integer.
+ return false;
+ }
+
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
T Result = Arg.atIndex(0).deref<T>();
unsigned BitWidth = Result.bitWidth();
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4d4b7428abd505..12e3cb18bdb89d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4274,12 +4274,37 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
*this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
}
- case Builtin::BI__builtin_reduce_add:
+ case Builtin::BI__builtin_reduce_add: {
+ // Note: vector_reduce_fadd takes two arguments a
+ // scalar start value and a vector. That would mean to
+ // correctly call it we would need emitBuiltinWithOneOverloadedType<2>
+ // To keep the builtin sema behavior the same despite type we will
+ // popululate vector_reduce_fadd scalar value with a 0.
+ if (E->getArg(0)->getType()->hasFloatingRepresentation()) {
+ Value *X = EmitScalarExpr(E->getArg(0));
+ auto EltTy = X->getType()->getScalarType();
+ Value *Seed = ConstantFP::get(EltTy, 0);
+ return RValue::get(Builder.CreateIntrinsic(
+ /*ReturnType=*/EltTy, llvm::Intrinsic::vector_reduce_fadd,
+ ArrayRef<Value *>{Seed, X}, nullptr, "rdx.fadd"));
+ }
+ assert(E->getArg(0)->getType()->hasIntegerRepresentation());
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
- case Builtin::BI__builtin_reduce_mul:
+ }
+ case Builtin::BI__builtin_reduce_mul: {
+ if (E->getArg(0)->getType()->hasFloatingRepresentation()) {
+ Value *X = EmitScalarExpr(E->getArg(0));
+ auto EltTy = X->getType()->getScalarType();
+ Value *Seed = ConstantFP::get(EltTy, 0);
+ return RValue::get(Builder.CreateIntrinsic(
+ /*ReturnType=*/EltTy, llvm::Intrinsic::vector_reduce_fmul,
+ ArrayRef<Value *>{Seed, X}, nullptr, "rdx.fmul"));
+ }
+ assert(E->getArg(0)->getType()->hasIntegerRepresentation());
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
+ }
case Builtin::BI__builtin_reduce_xor:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index a248a6b53b0d06..a13c25eb2b6f6b 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2883,11 +2883,31 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
TheCall->setType(ElTy);
break;
}
+ case Builtin::BI__builtin_reduce_add:
+ case Builtin::BI__builtin_reduce_mul: {
+ if (PrepareBuiltinReduceMathOneArgCall(TheCall))
+ return ExprError();
+
+ const Expr *Arg = TheCall->getArg(0);
+ const auto *TyA = Arg->getType()->getAs<VectorType>();
+
+ QualType ElTy;
+ if (TyA)
+ ElTy = TyA->getElementType();
+ else if (Arg->getType()->isSizelessVectorType())
+ ElTy = Arg->getType()->getSizelessVectorEltType(Context);
+
+ if (ElTy.isNull()) {
+ Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+ << 1 << /* vector of integers or floating points */ 10
+ << Arg->getType();
+ return ExprError();
+ }
+ TheCall->setType(ElTy);
+ break;
+ }
// These builtins support vectors of integers only.
- // TODO: ADD/MUL should support floating-point types.
- case Builtin::BI__builtin_reduce_add:
- case Builtin::BI__builtin_reduce_mul:
case Builtin::BI__builtin_reduce_xor:
case Builtin::BI__builtin_reduce_or:
case Builtin::BI__builtin_reduce_and: {
diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp
index 5906cb970f06c4..1a3dbd5e37a709 100644
--- a/clang/test/AST/ByteCode/builtin-functions.cpp
+++ b/clang/test/AST/ByteCode/builtin-functions.cpp
@@ -1056,6 +1056,14 @@ namespace RecuceAdd {
static_assert(__builtin_reduce_add((vector4uint){~0U, 0, 0, 1}) == 0);
static_assert(__builtin_reduce_add((vector4ulong){~0ULL, 0, 0, 1}) == 0);
+ static_assert(__builtin_reduce_add((vector4float){}) == 0.0);
+ // both-error at -1 {{static assertion expression is not an integral constant expression}}
+ static_assert(__builtin_reduce_add((vector4float){1.1, 2.2, 3.3, 4.4}) == 11.0);
+ // both-error at -1 {{static assertion expression is not an integral constant expression}}
+ static_assert(__builtin_reduce_add((vector4double){100.1, 200.2, 300.3, 400.4}) == 1001.0);
+ // both-error at -1 {{static assertion expression is not an integral constant expression}}
+
+
#ifdef __SIZEOF_INT128__
typedef __int128 v4i128 __attribute__((__vector_size__(128 * 2)));
@@ -1091,6 +1099,13 @@ namespace ReduceMul {
(~0U - 1));
#endif
static_assert(__builtin_reduce_mul((vector4ulong){~0ULL, 1, 1, 2}) == ~0ULL - 1);
+
+ static_assert(__builtin_reduce_mul((vector4float){}) == 0.0);
+ // both-error at -1 {{static assertion expression is not an integral constant expression}}
+ static_assert(__builtin_reduce_mul((vector4float){1.0, 2.0, 3.0, 1.0}) == 6.0);
+ // both-error at -1 {{static assertion expression is not an integral constant expression}}
+ static_assert(__builtin_reduce_mul((vector4double){3.0, 4.0, 1.0, 1.0}) == 12.0);
+ // both-error at -1 {{static assertion expression is not an integral constant expression}}
}
namespace ReduceAnd {
diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c
index e12fd729c84c0b..35f12ca710e3e3 100644
--- a/clang/test/CodeGen/builtins-reduction-math.c
+++ b/clang/test/CodeGen/builtins-reduction-math.c
@@ -4,6 +4,7 @@
// RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +sve %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=SVE %s
typedef float float4 __attribute__((ext_vector_type(4)));
+typedef double double4 __attribute__((ext_vector_type(4)));
typedef short int si8 __attribute__((ext_vector_type(8)));
typedef unsigned int u4 __attribute__((ext_vector_type(4)));
@@ -61,6 +62,16 @@ void test_builtin_reduce_min(float4 vf1, si8 vi1, u4 vu1) {
unsigned long long r5 = __builtin_reduce_min(cvi1);
}
+void test_builtin_reduce_addf(float4 vf4, double4 vd4) {
+ // CHECK: [[VF4:%.+]] = load <4 x float>, ptr %vf4.addr, align 16
+ // CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[VF4]])
+ float r2 = __builtin_reduce_add(vf4);
+
+ // CHECK: [[VD4:%.+]] = load <4 x double>, ptr %vd4.addr, align 16
+ // CHECK-NEXT: call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[VD4]])
+ double r3 = __builtin_reduce_add(vd4);
+}
+
void test_builtin_reduce_add(si8 vi1, u4 vu1) {
// CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
// CHECK-NEXT: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VI1]])
@@ -83,6 +94,16 @@ void test_builtin_reduce_add(si8 vi1, u4 vu1) {
unsigned long long r5 = __builtin_reduce_add(cvu1);
}
+void test_builtin_reduce_mulf(float4 vf4, double4 vd4) {
+ // CHECK: [[VF4:%.+]] = load <4 x float>, ptr %vf4.addr, align 16
+ // CHECK-NEXT: call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> [[VF4]])
+ float r2 = __builtin_reduce_mul(vf4);
+
+ // CHECK: [[VD4:%.+]] = load <4 x double>, ptr %vd4.addr, align 16
+ // CHECK-NEXT: call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> [[VD4]])
+ double r3 = __builtin_reduce_mul(vd4);
+}
+
void test_builtin_reduce_mul(si8 vi1, u4 vu1) {
// CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
// CHECK-NEXT: call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> [[VI1]])
diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c
index 9b0d91bfd6e3d2..9e2dac7ebbe6f6 100644
--- a/clang/test/Sema/builtins-reduction-math.c
+++ b/clang/test/Sema/builtins-reduction-math.c
@@ -36,7 +36,7 @@ void test_builtin_reduce_min(int i, float4 v, int3 iv) {
// expected-error at -1 {{1st argument must be a vector type (was 'int')}}
}
-void test_builtin_reduce_add(int i, float4 v, int3 iv) {
+void test_builtin_reduce_add(int i, float f, int3 iv) {
struct Foo s = __builtin_reduce_add(iv);
// expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
@@ -47,13 +47,13 @@ void test_builtin_reduce_add(int i, float4 v, int3 iv) {
// expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
i = __builtin_reduce_add(i);
- // expected-error at -1 {{1st argument must be a vector of integers (was 'int')}}
+ // expected-error at -1 {{1st argument must be a vector of integers or floating points (was 'int')}}
- i = __builtin_reduce_add(v);
- // expected-error at -1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
+ f = __builtin_reduce_add(f);
+ // expected-error at -1 {{1st argument must be a vector of integers or floating points (was 'float')}}
}
-void test_builtin_reduce_mul(int i, float4 v, int3 iv) {
+void test_builtin_reduce_mul(int i, float f, int3 iv) {
struct Foo s = __builtin_reduce_mul(iv);
// expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
@@ -64,10 +64,10 @@ void test_builtin_reduce_mul(int i, float4 v, int3 iv) {
// expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
i = __builtin_reduce_mul(i);
- // expected-error at -1 {{1st argument must be a vector of integers (was 'int')}}
+ // expected-error at -1 {{1st argument must be a vector of integers or floating points (was 'int')}}
- i = __builtin_reduce_mul(v);
- // expected-error at -1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
+ f = __builtin_reduce_mul(f);
+ // expected-error at -1 {{1st argument must be a vector of integers or floating points (was 'float')}}
}
void test_builtin_reduce_xor(int i, float4 v, int3 iv) {
diff --git a/clang/test/Sema/constant_builtins_vector.cpp b/clang/test/Sema/constant_builtins_vector.cpp
index b2f56e5a87ab1a..58a05ec2dd406a 100644
--- a/clang/test/Sema/constant_builtins_vector.cpp
+++ b/clang/test/Sema/constant_builtins_vector.cpp
@@ -746,6 +746,12 @@ constexpr long long reduceAddLong2 = __builtin_reduce_add((vector4long){(1LL <<
static_assert(__builtin_reduce_add((vector4uint){~0U, 0, 0, 1}) == 0);
static_assert(__builtin_reduce_add((vector4ulong){~0ULL, 0, 0, 1}) == 0);
+constexpr float reduceAddFloat = __builtin_reduce_add((vector4float){1.0, 2.0, 3.0, 4.0});
+// expected-error at -1 {{must be initialized by a constant expression}}
+
+constexpr double reduceAddDouble = __builtin_reduce_add((vector4double){-1.0, 2.0, -3.0, 4.0});
+// expected-error at -1 {{must be initialized by a constant expression}}
+
static_assert(__builtin_reduce_mul((vector4char){}) == 0);
static_assert(__builtin_reduce_mul((vector4char){1, 2, 3, 4}) == 24);
static_assert(__builtin_reduce_mul((vector4short){1, 2, 30, 40}) == 2400);
@@ -766,6 +772,12 @@ constexpr long long reduceMulLong2 = __builtin_reduce_mul((vector4long){(1LL <<
static_assert(__builtin_reduce_mul((vector4uint){~0U, 1, 1, 2}) == ~0U - 1);
static_assert(__builtin_reduce_mul((vector4ulong){~0ULL, 1, 1, 2}) == ~0ULL - 1);
+constexpr float reduceMulFloat = __builtin_reduce_mul((vector4float){1.0, 2.0, 3.0, 1.0});
+// expected-error at -1 {{must be initialized by a constant expression}}
+
+constexpr double reduceMulDouble = __builtin_reduce_mul((vector4double){3.0, 4.0, 1.0, 1.0});
+// expected-error at -1 {{must be initialized by a constant expression}}
+
static_assert(__builtin_reduce_and((vector4char){}) == 0);
static_assert(__builtin_reduce_and((vector4char){(char)0x11, (char)0x22, (char)0x44, (char)0x88}) == 0);
static_assert(__builtin_reduce_and((vector4short){(short)0x1111, (short)0x2222, (short)0x4444, (short)0x8888}) == 0);
``````````
</details>
https://github.com/llvm/llvm-project/pull/120367
More information about the cfe-commits
mailing list