[clang] Clang: Add minnum/maxnum builtin functions (PR #129207)
YunQiang Su via cfe-commits
cfe-commits at lists.llvm.org
Fri Feb 28 00:34:51 PST 2025
https://github.com/wzssyqa created https://github.com/llvm/llvm-project/pull/129207
With https://github.com/llvm/llvm-project/pull/112852, we claimed that llvm.minnum and llvm.maxnum should treat +0.0>-0.0, while libc doesn't require fmin(3)/fmax(3) for it.
To make llvm.minnum/llvm.maxnum easy to use, we define the builtin functions for them, include
__builtin_minnum
__builtin_elementwise_minnum
__builtin_minnum
__builtin_elementwise_minnum
__builtin_minnum
__builtin_elementwise_minnum
__builtin_minnum
__builtin_maxnum
__builtin_elementwise_maxnum
__builtin_maxnum
__builtin_elementwise_maxnum
__builtin_maxnum
__builtin_elementwise_maxnum
__builtin_maxnum
All of them support _Float16, float, double, long double.
>From 917e3b80ae7962a4f0bd5735cdc88163fcdbdcc7 Mon Sep 17 00:00:00 2001
From: YunQiang Su <syq at debian.org>
Date: Fri, 28 Feb 2025 16:09:04 +0800
Subject: [PATCH] Clang: Add minnum/maxnum builtin functions support
With https://github.com/llvm/llvm-project/pull/112852, we claimed
that llvm.minnum and llvm.maxnum should treat +0.0>-0.0,
while libc doesn't require fmin(3)/fmax(3) for it.
To make llvm.minnum/llvm.maxnum easy to use, we define the builtin
functions for them, include
__builtin_minnum
__builtin_elementwise_minnum
__builtin_minnum
__builtin_elementwise_minnum
__builtin_minnum
__builtin_elementwise_minnum
__builtin_minnum
__builtin_maxnum
__builtin_elementwise_maxnum
__builtin_maxnum
__builtin_elementwise_maxnum
__builtin_maxnum
__builtin_elementwise_maxnum
__builtin_maxnum
All of them support _Float16, float, double, long double.
---
clang/include/clang/Basic/Builtins.td | 24 ++
clang/include/clang/Sema/Sema.h | 1 +
clang/lib/CodeGen/CGBuiltin.cpp | 26 ++
clang/lib/Sema/SemaChecking.cpp | 45 ++++
clang/test/CodeGen/builtin-maxnum-minnum.c | 269 +++++++++++++++++++++
5 files changed, 365 insertions(+)
create mode 100644 clang/test/CodeGen/builtin-maxnum-minnum.c
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 598ae171b1389..fd49180d1e58d 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -209,6 +209,18 @@ def FmaxF16F128 : Builtin, F16F128MathTemplate {
let Prototype = "T(T, T)";
}
+def MinNum : Builtin {
+ let Spellings = ["__builtin_minnum"];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, CustomTypeChecking, Constexpr];
+ let Prototype = "void(...)";
+}
+
+def MaxNum : Builtin {
+ let Spellings = ["__builtin_maxnum"];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, CustomTypeChecking, Constexpr];
+ let Prototype = "void(...)";
+}
+
def FminF16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_fmin"];
let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, Constexpr];
@@ -1298,6 +1310,18 @@ def ElementwiseMin : Builtin {
let Prototype = "void(...)";
}
+def ElementwiseMaxNum : Builtin {
+ let Spellings = ["__builtin_elementwise_maxnum"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
+def ElementwiseMinNum : Builtin {
+ let Spellings = ["__builtin_elementwise_minnum"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
def ElementwiseMaximum : Builtin {
let Spellings = ["__builtin_elementwise_maximum"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index ebdbc69384efb..41a0b41f331de 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -2569,6 +2569,7 @@ class Sema final : public SemaBase {
ExprResult AtomicOpsOverloaded(ExprResult TheCallResult,
AtomicExpr::AtomicOp Op);
+ bool BuiltinMaxNumMinNumMath(CallExpr *TheCall);
/// \param FPOnly restricts the arguments to floating-point types.
bool BuiltinElementwiseMath(CallExpr *TheCall, bool FPOnly = false);
bool PrepareBuiltinReduceMathOneArgCall(CallExpr *TheCall);
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 65fac01d58362..cfbb7b20c33c7 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3220,6 +3220,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Intrinsic::minnum,
Intrinsic::experimental_constrained_minnum));
+ case Builtin::BI__builtin_maxnum:
+ return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(
+ *this, E, Intrinsic::maxnum,
+ Intrinsic::experimental_constrained_maxnum));
+
+ case Builtin::BI__builtin_minnum:
+ return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(
+ *this, E, Intrinsic::minnum,
+ Intrinsic::experimental_constrained_minnum));
+
case Builtin::BIfmaximum_num:
case Builtin::BIfmaximum_numf:
case Builtin::BIfmaximum_numl:
@@ -4398,6 +4408,22 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Result);
}
+ case Builtin::BI__builtin_elementwise_maxnum: {
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ Value *Op1 = EmitScalarExpr(E->getArg(1));
+ Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maxnum, Op0,
+ Op1, nullptr, "elt.maxnum");
+ return RValue::get(Result);
+ }
+
+ case Builtin::BI__builtin_elementwise_minnum: {
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ Value *Op1 = EmitScalarExpr(E->getArg(1));
+ Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minnum, Op0,
+ Op1, nullptr, "elt.minnum");
+ return RValue::get(Result);
+ }
+
case Builtin::BI__builtin_elementwise_maximum: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
Value *Op1 = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index f9926c6b4adab..d23b7368bd653 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2753,8 +2753,17 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
break;
}
+ case Builtin::BI__builtin_minnum:
+ case Builtin::BI__builtin_maxnum: {
+ if (BuiltinMaxNumMinNumMath(TheCall))
+ return ExprError();
+ break;
+ }
+
// These builtins restrict the element type to floating point
// types only, and take in two arguments.
+ case Builtin::BI__builtin_elementwise_minnum:
+ case Builtin::BI__builtin_elementwise_maxnum:
case Builtin::BI__builtin_elementwise_minimum:
case Builtin::BI__builtin_elementwise_maximum:
case Builtin::BI__builtin_elementwise_atan2:
@@ -15234,6 +15243,42 @@ bool Sema::PrepareBuiltinElementwiseMathOneArgCall(CallExpr *TheCall) {
return false;
}
+bool Sema::BuiltinMaxNumMinNumMath(CallExpr *TheCall) {
+ if (checkArgCount(TheCall, 2))
+ return true;
+
+ ExprResult OrigArg0 = TheCall->getArg(0);
+ ExprResult OrigArg1 = TheCall->getArg(1);
+
+ // Do standard promotions between the two arguments, returning their common
+ // type.
+ QualType Res = UsualArithmeticConversions(
+ OrigArg0, OrigArg1, TheCall->getExprLoc(), ACK_Comparison);
+ if (OrigArg0.isInvalid() || OrigArg1.isInvalid())
+ return true;
+
+ // Make sure any conversions are pushed back into the call; this is
+ // type safe since unordered compare builtins are declared as "_Bool
+ // foo(...)".
+ TheCall->setArg(0, OrigArg0.get());
+ TheCall->setArg(1, OrigArg1.get());
+
+ if (!OrigArg0.get()->isTypeDependent() && OrigArg1.get()->isTypeDependent())
+ return true;
+
+ // If the common type isn't a real floating type, then the arguments were
+ // invalid for this operation.
+ if (Res.isNull() || !Res->isRealFloatingType())
+ return Diag(OrigArg0.get()->getBeginLoc(),
+ diag::err_typecheck_call_invalid_ordered_compare)
+ << OrigArg0.get()->getType() << OrigArg1.get()->getType()
+ << SourceRange(OrigArg0.get()->getBeginLoc(),
+ OrigArg1.get()->getEndLoc());
+
+ TheCall->setType(Res);
+ return false;
+}
+
bool Sema::BuiltinElementwiseMath(CallExpr *TheCall, bool FPOnly) {
if (auto Res = BuiltinVectorMath(TheCall, FPOnly); Res.has_value()) {
TheCall->setType(*Res);
diff --git a/clang/test/CodeGen/builtin-maxnum-minnum.c b/clang/test/CodeGen/builtin-maxnum-minnum.c
new file mode 100644
index 0000000000000..3971ec644ffc5
--- /dev/null
+++ b/clang/test/CodeGen/builtin-maxnum-minnum.c
@@ -0,0 +1,269 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -disable-llvm-passes -O3 -triple x86_64 %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK
+
+typedef _Float16 half8 __attribute__((ext_vector_type(8)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef double double2 __attribute__((ext_vector_type(2)));
+typedef long double ldouble2 __attribute__((ext_vector_type(2)));
+
+// CHECK-LABEL: define dso_local half @fmin16(
+// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store half [[A]], ptr [[A_ADDR]], align 2, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT: store half [[B]], ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.minnum.f16(half [[TMP0]], half [[TMP1]])
+// CHECK-NEXT: ret half [[TMP2]]
+//
+_Float16 fmin16(_Float16 a, _Float16 b) {
+ return __builtin_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local <8 x half> @pfmin16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
+// CHECK-NEXT: ret <8 x half> [[ELT_MINNUM]]
+//
+half8 pfmin16(half8 a, half8 b) {
+ return __builtin_elementwise_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local float @fmin32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT: store float [[A]], ptr [[A_ADDR]], align 4, !tbaa [[TBAA7:![0-9]+]]
+// CHECK-NEXT: store float [[B]], ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[TMP0]], float [[TMP1]])
+// CHECK-NEXT: ret float [[TMP2]]
+//
+float fmin32(float a, float b) {
+ return __builtin_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local <4 x float> @pfmin32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
+// CHECK-NEXT: ret <4 x float> [[ELT_MINNUM]]
+//
+float4 pfmin32(float4 a, float4 b) {
+ return __builtin_elementwise_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local double @fmin64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT: store double [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA9:![0-9]+]]
+// CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]]
+// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8, !tbaa [[TBAA9]]
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]]
+// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[TMP0]], double [[TMP1]])
+// CHECK-NEXT: ret double [[TMP2]]
+//
+double fmin64(double a, double b) {
+ return __builtin_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local <2 x double> @pfmin64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
+// CHECK-NEXT: ret <2 x double> [[ELT_MINNUM]]
+//
+double2 pfmin64(double2 a, double2 b) {
+ return __builtin_elementwise_minnum(a, b);
+}
+
+// CHECK-LABEL: define dso_local x86_fp80 @fmin80(
+// CHECK-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT: store x86_fp80 [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA11:![0-9]+]]
+// CHECK-NEXT: store x86_fp80 [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[A_ADDR]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[B_ADDR]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: [[TMP2:%.*]] = call x86_fp80 @llvm.minnum.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]])
+// CHECK-NEXT: ret x86_fp80 [[TMP2]]
+//
+long double fmin80(long double a, long double b) {
+ return __builtin_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local <2 x x86_fp80> @pfmin80(
+// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
+// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]])
+// CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINNUM]]
+//
+ldouble2 pfmin80(ldouble2 a, ldouble2 b) {
+ return __builtin_elementwise_minnum(a, b);
+}
+
+// CHECK-LABEL: define dso_local half @fmax16(
+// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store half [[A]], ptr [[A_ADDR]], align 2, !tbaa [[TBAA2]]
+// CHECK-NEXT: store half [[B]], ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.maxnum.f16(half [[TMP0]], half [[TMP1]])
+// CHECK-NEXT: ret half [[TMP2]]
+//
+_Float16 fmax16(_Float16 a, _Float16 b) {
+ return __builtin_maxnum(a, b);
+}
+// CHECK-LABEL: define dso_local <8 x half> @pfmax16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
+// CHECK-NEXT: ret <8 x half> [[ELT_MAXNUM]]
+//
+half8 pfmax16(half8 a, half8 b) {
+ return __builtin_elementwise_maxnum(a, b);
+}
+// CHECK-LABEL: define dso_local float @fmax32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT: store float [[A]], ptr [[A_ADDR]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: store float [[B]], ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[TMP0]], float [[TMP1]])
+// CHECK-NEXT: ret float [[TMP2]]
+//
+float fmax32(float a, float b) {
+ return __builtin_maxnum(a, b);
+}
+// CHECK-LABEL: define dso_local <4 x float> @pfmax32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
+// CHECK-NEXT: ret <4 x float> [[ELT_MAXNUM]]
+//
+float4 pfmax32(float4 a, float4 b) {
+ return __builtin_elementwise_maxnum(a, b);
+}
+// CHECK-LABEL: define dso_local double @fmax64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT: store double [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA9]]
+// CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]]
+// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8, !tbaa [[TBAA9]]
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]]
+// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[TMP0]], double [[TMP1]])
+// CHECK-NEXT: ret double [[TMP2]]
+//
+double fmax64(double a, double b) {
+ return __builtin_maxnum(a, b);
+}
+// CHECK-LABEL: define dso_local <2 x double> @pfmax64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
+// CHECK-NEXT: ret <2 x double> [[ELT_MAXNUM]]
+//
+double2 pfmax64(double2 a, double2 b) {
+ return __builtin_elementwise_maxnum(a, b);
+}
+
+// CHECK-LABEL: define dso_local x86_fp80 @fmax80(
+// CHECK-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT: store x86_fp80 [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: store x86_fp80 [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[A_ADDR]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[B_ADDR]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: [[TMP2:%.*]] = call x86_fp80 @llvm.maxnum.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]])
+// CHECK-NEXT: ret x86_fp80 [[TMP2]]
+//
+long double fmax80(long double a, long double b) {
+ return __builtin_maxnum(a, b);
+}
+// CHECK-LABEL: define dso_local <2 x x86_fp80> @pfmax80(
+// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR3]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
+// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]])
+// CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINNUM]]
+//
+ldouble2 pfmax80(ldouble2 a, ldouble2 b) {
+ return __builtin_elementwise_minnum(a, b);
+}
+
+//.
+// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"_Float16", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[TBAA6]] = !{[[META4]], [[META4]], i64 0}
+// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+// CHECK: [[META8]] = !{!"float", [[META4]], i64 0}
+// CHECK: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
+// CHECK: [[META10]] = !{!"double", [[META4]], i64 0}
+// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// CHECK: [[META12]] = !{!"long double", [[META4]], i64 0}
+//.
More information about the cfe-commits
mailing list