[flang-commits] [flang] [flang][runtime] Address PRODUCT numeric discrepancy, folding vs runtime (PR #90125)
Peter Klausler via flang-commits
flang-commits at lists.llvm.org
Thu Apr 25 14:17:15 PDT 2024
https://github.com/klausler created https://github.com/llvm/llvm-project/pull/90125
Ensure that the runtime implementations of floating-point reductions use intermediate results of the same precision as the operands, so that results match those from constant folding. (SUM reduction uses Kahan summation in both cases.)
>From ab75b863e7194cf37c8656f8df7fdf1080393af6 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler at nvidia.com>
Date: Thu, 25 Apr 2024 14:13:53 -0700
Subject: [PATCH] [flang][runtime] Address PRODUCT numeric discrepancy, folding
vs runtime
Ensure that the runtime implementations of floating-point reductions
use intermediate results of the same precision as the operands, so
that results match those from constant folding. (SUM reduction uses
Kahan summation in both cases.)
---
flang/runtime/product.cpp | 8 ++++----
flang/runtime/reduction-templates.h | 11 +++++------
flang/runtime/sum.cpp | 3 ++-
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/flang/runtime/product.cpp b/flang/runtime/product.cpp
index 4c3b8c33a12e0f..7fc0fcd3b107de 100644
--- a/flang/runtime/product.cpp
+++ b/flang/runtime/product.cpp
@@ -107,7 +107,7 @@ CppTypeFor<TypeCategory::Integer, 16> RTDEF(ProductInteger16)(
CppTypeFor<TypeCategory::Real, 4> RTDEF(ProductReal4)(const Descriptor &x,
const char *source, int line, int dim, const Descriptor *mask) {
return GetTotalReduction<TypeCategory::Real, 4>(x, source, line, dim, mask,
- NonComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
+ NonComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 4>>{x},
"PRODUCT");
}
CppTypeFor<TypeCategory::Real, 8> RTDEF(ProductReal8)(const Descriptor &x,
@@ -137,7 +137,7 @@ void RTDEF(CppProductComplex4)(CppTypeFor<TypeCategory::Complex, 4> &result,
const Descriptor &x, const char *source, int line, int dim,
const Descriptor *mask) {
result = GetTotalReduction<TypeCategory::Complex, 4>(x, source, line, dim,
- mask, ComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
+ mask, ComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 4>>{x},
"PRODUCT");
}
void RTDEF(CppProductComplex8)(CppTypeFor<TypeCategory::Complex, 8> &result,
@@ -169,8 +169,8 @@ void RTDEF(CppProductComplex16)(CppTypeFor<TypeCategory::Complex, 16> &result,
void RTDEF(ProductDim)(Descriptor &result, const Descriptor &x, int dim,
const char *source, int line, const Descriptor *mask) {
TypedPartialNumericReduction<NonComplexProductAccumulator,
- NonComplexProductAccumulator, ComplexProductAccumulator>(
- result, x, dim, source, line, mask, "PRODUCT");
+ NonComplexProductAccumulator, ComplexProductAccumulator,
+ /*MIN_REAL_KIND=*/4>(result, x, dim, source, line, mask, "PRODUCT");
}
RT_EXT_API_GROUP_END
diff --git a/flang/runtime/reduction-templates.h b/flang/runtime/reduction-templates.h
index f8e6f6095509e6..d102e5642547d6 100644
--- a/flang/runtime/reduction-templates.h
+++ b/flang/runtime/reduction-templates.h
@@ -240,11 +240,10 @@ inline RT_API_ATTRS void PartialIntegerReduction(Descriptor &result,
kind, terminator, result, x, dim, mask, terminator, intrinsic);
}
-template <TypeCategory CAT, template <typename> class ACCUM>
+template <TypeCategory CAT, template <typename> class ACCUM, int MIN_KIND>
struct PartialFloatingReductionHelper {
template <int KIND> struct Functor {
- static constexpr int Intermediate{
- std::max(KIND, 8)}; // use at least "double" for intermediate results
+ static constexpr int Intermediate{std::max(KIND, MIN_KIND)};
RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
int dim, const Descriptor *mask, Terminator &terminator,
const char *intrinsic) const {
@@ -260,7 +259,7 @@ struct PartialFloatingReductionHelper {
template <template <typename> class INTEGER_ACCUM,
template <typename> class REAL_ACCUM,
- template <typename> class COMPLEX_ACCUM>
+ template <typename> class COMPLEX_ACCUM, int MIN_REAL_KIND>
inline RT_API_ATTRS void TypedPartialNumericReduction(Descriptor &result,
const Descriptor &x, int dim, const char *source, int line,
const Descriptor *mask, const char *intrinsic) {
@@ -274,13 +273,13 @@ inline RT_API_ATTRS void TypedPartialNumericReduction(Descriptor &result,
break;
case TypeCategory::Real:
ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real,
- REAL_ACCUM>::template Functor,
+ REAL_ACCUM, MIN_REAL_KIND>::template Functor,
void>(catKind->second, terminator, result, x, dim, mask, terminator,
intrinsic);
break;
case TypeCategory::Complex:
ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex,
- COMPLEX_ACCUM>::template Functor,
+ COMPLEX_ACCUM, MIN_REAL_KIND>::template Functor,
void>(catKind->second, terminator, result, x, dim, mask, terminator,
intrinsic);
break;
diff --git a/flang/runtime/sum.cpp b/flang/runtime/sum.cpp
index d2495e3e956fe6..1d64554c2785dc 100644
--- a/flang/runtime/sum.cpp
+++ b/flang/runtime/sum.cpp
@@ -188,7 +188,8 @@ void RTDEF(CppSumComplex16)(CppTypeFor<TypeCategory::Complex, 16> &result,
void RTDEF(SumDim)(Descriptor &result, const Descriptor &x, int dim,
const char *source, int line, const Descriptor *mask) {
TypedPartialNumericReduction<IntegerSumAccumulator, RealSumAccumulator,
- ComplexSumAccumulator>(result, x, dim, source, line, mask, "SUM");
+ ComplexSumAccumulator, /*MIN_REAL_KIND=*/4>(
+ result, x, dim, source, line, mask, "SUM");
}
RT_EXT_API_GROUP_END
More information about the flang-commits
mailing list