[clang] 7999355 - [Clang] Add min/max reduction builtins.
Florian Hahn via cfe-commits
cfe-commits at lists.llvm.org
Tue Nov 2 07:02:06 PDT 2021
Author: Florian Hahn
Date: 2021-11-02T15:01:42+01:00
New Revision: 7999355106fb2fcc8de243d2e34b4b73ae4f3d2f
URL: https://github.com/llvm/llvm-project/commit/7999355106fb2fcc8de243d2e34b4b73ae4f3d2f
DIFF: https://github.com/llvm/llvm-project/commit/7999355106fb2fcc8de243d2e34b4b73ae4f3d2f.diff
LOG: [Clang] Add min/max reduction builtins.
This patch implements __builtin_reduce_max and __builtin_reduce_min as
specified in D111529.
The order of operations does not matter for min or max reductions and
they can be directly lowered to the corresponding
llvm.vector.reduce.{fmin,fmax,umin,umax,smin,smax} intrinsic calls.
Reviewed By: aaron.ballman
Differential Revision: https://reviews.llvm.org/D112001
Added:
clang/test/CodeGen/builtins-reduction-math.c
clang/test/Sema/builtins-reduction-math.c
Modified:
clang/include/clang/Basic/Builtins.def
clang/include/clang/Basic/DiagnosticSemaKinds.td
clang/include/clang/Sema/Sema.h
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Sema/SemaChecking.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index 7d331a86126f1..b05777889e79a 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -646,6 +646,8 @@ BUILTIN(__builtin_call_with_static_chain, "v.", "nt")
BUILTIN(__builtin_elementwise_abs, "v.", "nct")
BUILTIN(__builtin_elementwise_max, "v.", "nct")
BUILTIN(__builtin_elementwise_min, "v.", "nct")
+BUILTIN(__builtin_reduce_max, "v.", "nct")
+BUILTIN(__builtin_reduce_min, "v.", "nct")
BUILTIN(__builtin_matrix_transpose, "v.", "nFt")
BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt")
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index d37c8e9266e9b..a67ef684f1e5c 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11313,7 +11313,7 @@ def err_builtin_invalid_arg_type: Error <
"%ordinal0 argument must be a "
"%select{vector, integer or floating point type|matrix|"
"pointer to a valid matrix element type|"
- "signed integer or floating point type}1 (was %2)">;
+ "signed integer or floating point type|vector type}1 (was %2)">;
def err_builtin_matrix_disabled: Error<
"matrix types extension is disabled. Pass -fenable-matrix to enable it">;
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 11e157bc7d731..909328b164d5c 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -12732,6 +12732,7 @@ class Sema final {
bool SemaBuiltinElementwiseMath(CallExpr *TheCall);
bool SemaBuiltinElementwiseMathOneArg(CallExpr *TheCall);
+ bool SemaBuiltinReduceMath(CallExpr *TheCall);
// Matrix builtin handling.
ExprResult SemaBuiltinMatrixTranspose(CallExpr *TheCall,
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 501b0e3c34436..fab21e5b588a5 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3145,6 +3145,44 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Result);
}
+ case Builtin::BI__builtin_reduce_max: {
+ auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) {
+ if (IrTy->isIntOrIntVectorTy()) {
+ if (auto *VecTy = QT->getAs<VectorType>())
+ QT = VecTy->getElementType();
+ if (QT->isSignedIntegerType())
+ return llvm::Intrinsic::vector_reduce_smax;
+ else
+ return llvm::Intrinsic::vector_reduce_umax;
+ }
+ return llvm::Intrinsic::vector_reduce_fmax;
+ };
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ Value *Result = Builder.CreateUnaryIntrinsic(
+ GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr,
+ "rdx.min");
+ return RValue::get(Result);
+ }
+
+ case Builtin::BI__builtin_reduce_min: {
+ auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) {
+ if (IrTy->isIntOrIntVectorTy()) {
+ if (auto *VecTy = QT->getAs<VectorType>())
+ QT = VecTy->getElementType();
+ if (QT->isSignedIntegerType())
+ return llvm::Intrinsic::vector_reduce_smin;
+ else
+ return llvm::Intrinsic::vector_reduce_umin;
+ }
+ return llvm::Intrinsic::vector_reduce_fmin;
+ };
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ Value *Result = Builder.CreateUnaryIntrinsic(
+ GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr,
+ "rdx.min");
+ return RValue::get(Result);
+ }
+
case Builtin::BI__builtin_matrix_transpose: {
const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
Value *MatValue = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index bf458f914c111..84cebb03b5f02 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1985,6 +1985,11 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
if (SemaBuiltinElementwiseMath(TheCall))
return ExprError();
break;
+ case Builtin::BI__builtin_reduce_max:
+ case Builtin::BI__builtin_reduce_min:
+ if (SemaBuiltinReduceMath(TheCall))
+ return ExprError();
+ break;
case Builtin::BI__builtin_matrix_transpose:
return SemaBuiltinMatrixTranspose(TheCall, TheCallResult);
@@ -16596,6 +16601,26 @@ bool Sema::SemaBuiltinElementwiseMath(CallExpr *TheCall) {
return false;
}
+bool Sema::SemaBuiltinReduceMath(CallExpr *TheCall) {
+ if (checkArgCount(*this, TheCall, 1))
+ return true;
+
+ ExprResult A = UsualUnaryConversions(TheCall->getArg(0));
+ if (A.isInvalid())
+ return true;
+
+ TheCall->setArg(0, A.get());
+ const VectorType *TyA = A.get()->getType()->getAs<VectorType>();
+ if (!TyA) {
+ SourceLocation ArgLoc = TheCall->getArg(0)->getBeginLoc();
+ return Diag(ArgLoc, diag::err_builtin_invalid_arg_type)
+ << 1 << /* vector ty*/ 4 << A.get()->getType();
+ }
+
+ TheCall->setType(TyA->getElementType());
+ return false;
+}
+
ExprResult Sema::SemaBuiltinMatrixTranspose(CallExpr *TheCall,
ExprResult CallResult) {
if (checkArgCount(*this, TheCall, 1))
diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c
new file mode 100644
index 0000000000000..417caed494d90
--- /dev/null
+++ b/clang/test/CodeGen/builtins-reduction-math.c
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef short int si8 __attribute__((ext_vector_type(8)));
+typedef unsigned int u4 __attribute__((ext_vector_type(4)));
+
+__attribute__((address_space(1))) float4 vf1_as_one;
+
+void test_builtin_reduce_max(float4 vf1, si8 vi1, u4 vu1) {
+ // CHECK-LABEL: define void @test_builtin_reduce_max(
+ // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
+ // CHECK-NEXT: call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[VF1]])
+ float r1 = __builtin_reduce_max(vf1);
+
+ // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
+ // CHECK-NEXT: call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> [[VI1]])
+ short r2 = __builtin_reduce_max(vi1);
+
+ // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
+ // CHECK-NEXT: call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[VU1]])
+ unsigned r3 = __builtin_reduce_max(vu1);
+
+ // CHECK: [[VF1_AS1:%.+]] = load <4 x float>, <4 x float> addrspace(1)* @vf1_as_one, align 16
+ // CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[VF1_AS1]])
+ // CHECK-NEXT: fpext float [[RDX1]] to double
+ const double r4 = __builtin_reduce_max(vf1_as_one);
+
+ // CHECK: [[CVI1:%.+]] = load <8 x i16>, <8 x i16>* %cvi1, align 16
+ // CHECK-NEXT: [[RDX2:%.+]] = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> [[CVI1]])
+ // CHECK-NEXT: sext i16 [[RDX2]] to i64
+ const si8 cvi1 = vi1;
+ unsigned long long r5 = __builtin_reduce_max(cvi1);
+}
+
+void test_builtin_reduce_min(float4 vf1, si8 vi1, u4 vu1) {
+ // CHECK-LABEL: define void @test_builtin_reduce_min(
+ // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16
+ // CHECK-NEXT: call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[VF1]])
+ float r1 = __builtin_reduce_min(vf1);
+
+ // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
+ // CHECK-NEXT: call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[VI1]])
+ short r2 = __builtin_reduce_min(vi1);
+
+ // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
+ // CHECK-NEXT: call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[VU1]])
+ unsigned r3 = __builtin_reduce_min(vu1);
+
+ // CHECK: [[VF1_AS1:%.+]] = load <4 x float>, <4 x float> addrspace(1)* @vf1_as_one, align 16
+ // CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[VF1_AS1]])
+ // CHECK-NEXT: fpext float [[RDX1]] to double
+ const double r4 = __builtin_reduce_min(vf1_as_one);
+
+ // CHECK: [[CVI1:%.+]] = load <8 x i16>, <8 x i16>* %cvi1, align 16
+ // CHECK-NEXT: [[RDX2:%.+]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[CVI1]])
+ // CHECK-NEXT: sext i16 [[RDX2]] to i64
+ const si8 cvi1 = vi1;
+ unsigned long long r5 = __builtin_reduce_min(cvi1);
+}
diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c
new file mode 100644
index 0000000000000..0d1aecaa99c30
--- /dev/null
+++ b/clang/test/Sema/builtins-reduction-math.c
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 %s -pedantic -verify -triple=x86_64-apple-darwin9
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef int int3 __attribute__((ext_vector_type(3)));
+typedef unsigned unsigned4 __attribute__((ext_vector_type(4)));
+
+struct Foo {
+ char *p;
+};
+
+void test_builtin_reduce_max(int i, float4 v, int3 iv) {
+ struct Foo s = __builtin_reduce_max(iv);
+ // expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
+
+ i = __builtin_reduce_max(v, v);
+ // expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
+
+ i = __builtin_reduce_max();
+ // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+
+ i = __builtin_reduce_max(i);
+ // expected-error at -1 {{1st argument must be a vector type (was 'int')}}
+}
+
+void test_builtin_reduce_min(int i, float4 v, int3 iv) {
+ struct Foo s = __builtin_reduce_min(iv);
+ // expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
+
+ i = __builtin_reduce_min(v, v);
+ // expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
+
+ i = __builtin_reduce_min();
+ // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+
+ i = __builtin_reduce_min(i);
+ // expected-error at -1 {{1st argument must be a vector type (was 'int')}}
+}
More information about the cfe-commits
mailing list