[flang-commits] [flang] [flang][OpenACC] Fix reduction init value for minnumf/minimumf/maxnumf/maximumf (PR #187647)

Fri Mar 20 10:02:47 PDT 2026

https://github.com/khaki3 updated https://github.com/llvm/llvm-project/pull/187647

>From 9a0cb90fa1d889cfff684123d5a17f566cfb73ec Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Fri, 20 Mar 2026 00:17:00 -0700
Subject: [PATCH 1/3] [flang][OpenACC] Fix reduction init value for
 minnumf/minimumf/maxnumf/maximumf

The reduction recipe init region was producing 0.0 instead of the
correct identity value (largest representable float for min, smallest
for max) when the reduction operator was AccMinnumf, AccMinimumf,
AccMaxnumf, or AccMaximumf. Only AccMin and AccMax were handled,
causing the new operator variants to fall through to the default
branch which returns 0.

This caused GPU min reductions to always produce 0.0 since
min(x, 0.0) = 0.0 for all positive x.

Made-with: Cursor
---
 .../Optimizer/OpenACC/Support/FIROpenACCUtils.cpp  | 14 +++++++++++---
 flang/test/Lower/OpenACC/acc-reduction-maxmin.f90  |  2 ++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp
index a53ea9216f7ab..dfe2641f3f80e 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp
@@ -274,7 +274,9 @@ std::string fir::acc::getRecipeName(mlir::acc::RecipeKind kind, Type type,
 /// Get the initial value for reduction operator.
 template <typename R>
 static R getReductionInitValue(mlir::acc::ReductionOperator op, mlir::Type ty) {
-  if (op == mlir::acc::ReductionOperator::AccMin) {
+  if (op == mlir::acc::ReductionOperator::AccMin ||
+      op == mlir::acc::ReductionOperator::AccMinnumf ||
+      op == mlir::acc::ReductionOperator::AccMinimumf) {
     // min init value -> largest
     if constexpr (std::is_same_v<R, llvm::APInt>) {
       assert(ty.isIntOrIndex() && "expect integer or index type");
@@ -286,7 +288,9 @@ static R getReductionInitValue(mlir::acc::ReductionOperator op, mlir::Type ty) {
       return llvm::APFloat::getLargest(floatTy.getFloatSemantics(),
                                        /*negative=*/false);
     }
-  } else if (op == mlir::acc::ReductionOperator::AccMax) {
+  } else if (op == mlir::acc::ReductionOperator::AccMax ||
+             op == mlir::acc::ReductionOperator::AccMaxnumf ||
+             op == mlir::acc::ReductionOperator::AccMaximumf) {
     // max init value -> smallest
     if constexpr (std::is_same_v<R, llvm::APInt>) {
       assert(ty.isIntOrIndex() && "expect integer or index type");
@@ -348,7 +352,11 @@ static mlir::Value getReductionInitValue(fir::FirOpBuilder &builder,
         builder, loc, ty,
         builder.getIntegerAttr(ty, getReductionInitValue<llvm::APInt>(op, ty)));
   if (op == mlir::acc::ReductionOperator::AccMin ||
-      op == mlir::acc::ReductionOperator::AccMax) {
+      op == mlir::acc::ReductionOperator::AccMinnumf ||
+      op == mlir::acc::ReductionOperator::AccMinimumf ||
+      op == mlir::acc::ReductionOperator::AccMax ||
+      op == mlir::acc::ReductionOperator::AccMaxnumf ||
+      op == mlir::acc::ReductionOperator::AccMaximumf) {
     if (mlir::isa<mlir::ComplexType>(ty))
       llvm::report_fatal_error(
           "min/max reduction not supported for complex type");
diff --git a/flang/test/Lower/OpenACC/acc-reduction-maxmin.f90 b/flang/test/Lower/OpenACC/acc-reduction-maxmin.f90
index 2d0746decf29d..cd6e3fd2fb07b 100644
--- a/flang/test/Lower/OpenACC/acc-reduction-maxmin.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction-maxmin.f90
@@ -61,6 +61,7 @@ end subroutine acc_array_reduction_min
 ! EXTREMUM:             %[[MINIMUMF_0:.*]] = arith.minimumf %{{.*}}, %{{.*}} fastmath<contract> : f32
 
 ! EXTREMUM-LABEL:   acc.reduction.recipe @reduction_minimumf_ref_f32 : !fir.ref<f32> reduction_operator <minimumf> init {
+! EXTREMUM:           %[[CST:.*]] = arith.constant 3.40282347E+38 : f32
 ! EXTREMUM:         } combiner {
 ! EXTREMUM:           %[[MINIMUMF_0:.*]] = arith.minimumf %{{.*}}, %{{.*}} fastmath<contract> : f32
 
@@ -79,6 +80,7 @@ end subroutine acc_array_reduction_min
 ! EXTREMENUM:             %[[MINNUMF_0:.*]] = arith.minnumf %{{.*}}, %{{.*}} fastmath<contract> : f32
 
 ! EXTREMENUM-LABEL:   acc.reduction.recipe @reduction_minnumf_ref_f32 : !fir.ref<f32> reduction_operator <minnumf> init {
+! EXTREMENUM:           %[[CST:.*]] = arith.constant 3.40282347E+38 : f32
 ! EXTREMENUM:         } combiner {
 ! EXTREMENUM:           %[[MINNUMF_0:.*]] = arith.minnumf %{{.*}}, %{{.*}} fastmath<contract> : f32
 

>From a91064b9bc4976ee00ca77676183470383c5f202 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Fri, 20 Mar 2026 09:25:22 -0700
Subject: [PATCH 2/3] [flang][OpenACC] Fix reduction init value for
 minnumf/minimumf/maxnumf/maximumf

The reduction recipe init region was producing 0.0 instead of the
correct identity value (largest representable float for min, smallest
for max) when the reduction operator was AccMinnumf, AccMinimumf,
AccMaxnumf, or AccMaximumf. Only AccMin and AccMax were handled,
causing the new operator variants to fall through to the default
branch which returns 0.

This caused GPU min reductions to always produce 0.0 since
min(x, 0.0) = 0.0 for all positive x.

Refactor the if/else chain to a switch statement so the compiler
can warn about unhandled enum values when new operators are added.

Made-with: Cursor
---
 .../OpenACC/Support/FIROpenACCUtils.cpp       | 33 +++++++++++--------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp
index dfe2641f3f80e..f600fba0864f2 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp
@@ -274,9 +274,10 @@ std::string fir::acc::getRecipeName(mlir::acc::RecipeKind kind, Type type,
 /// Get the initial value for reduction operator.
 template <typename R>
 static R getReductionInitValue(mlir::acc::ReductionOperator op, mlir::Type ty) {
-  if (op == mlir::acc::ReductionOperator::AccMin ||
-      op == mlir::acc::ReductionOperator::AccMinnumf ||
-      op == mlir::acc::ReductionOperator::AccMinimumf) {
+  switch (op) {
+  case mlir::acc::ReductionOperator::AccMin:
+  case mlir::acc::ReductionOperator::AccMinnumf:
+  case mlir::acc::ReductionOperator::AccMinimumf:
     // min init value -> largest
     if constexpr (std::is_same_v<R, llvm::APInt>) {
       assert(ty.isIntOrIndex() && "expect integer or index type");
@@ -288,9 +289,10 @@ static R getReductionInitValue(mlir::acc::ReductionOperator op, mlir::Type ty) {
       return llvm::APFloat::getLargest(floatTy.getFloatSemantics(),
                                        /*negative=*/false);
     }
-  } else if (op == mlir::acc::ReductionOperator::AccMax ||
-             op == mlir::acc::ReductionOperator::AccMaxnumf ||
-             op == mlir::acc::ReductionOperator::AccMaximumf) {
+    break;
+  case mlir::acc::ReductionOperator::AccMax:
+  case mlir::acc::ReductionOperator::AccMaxnumf:
+  case mlir::acc::ReductionOperator::AccMaximumf:
     // max init value -> smallest
     if constexpr (std::is_same_v<R, llvm::APInt>) {
       assert(ty.isIntOrIndex() && "expect integer or index type");
@@ -302,30 +304,35 @@ static R getReductionInitValue(mlir::acc::ReductionOperator op, mlir::Type ty) {
       return llvm::APFloat::getSmallest(floatTy.getFloatSemantics(),
                                         /*negative=*/true);
     }
-  } else if (op == mlir::acc::ReductionOperator::AccIand) {
+    break;
+  case mlir::acc::ReductionOperator::AccIand:
     if constexpr (std::is_same_v<R, llvm::APInt>) {
       assert(ty.isIntOrIndex() && "expect integer type");
       unsigned bits = ty.getIntOrFloatBitWidth();
       return llvm::APInt::getAllOnes(bits);
     }
-  } else {
-    assert(op != mlir::acc::ReductionOperator::AccNone);
-    // +, ior, ieor init value -> 0
-    // * init value -> 1
+    break;
+  case mlir::acc::ReductionOperator::AccAdd:
+  case mlir::acc::ReductionOperator::AccMul:
+  case mlir::acc::ReductionOperator::AccIor:
+  case mlir::acc::ReductionOperator::AccXor: {
+    // +, ior, ieor init value -> 0; * init value -> 1
     int64_t value = (op == mlir::acc::ReductionOperator::AccMul) ? 1 : 0;
     if constexpr (std::is_same_v<R, llvm::APInt>) {
       assert(ty.isIntOrIndex() && "expect integer or index type");
       return llvm::APInt(ty.getIntOrFloatBitWidth(), value, true);
     }
-
     if constexpr (std::is_same_v<R, llvm::APFloat>) {
       assert(mlir::isa<mlir::FloatType>(ty) && "expect float type");
       auto floatTy = mlir::dyn_cast<mlir::FloatType>(ty);
       return llvm::APFloat(floatTy.getFloatSemantics(), value);
     }
-
     if constexpr (std::is_same_v<R, int64_t>)
       return value;
+    break;
+  }
+  default:
+    llvm_unreachable("OpenACC reduction unsupported operator");
   }
   llvm_unreachable("OpenACC reduction unsupported type");
 }

>From 50966792e0b73dded3ff529513203699fac1e2fd Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Fri, 20 Mar 2026 10:02:35 -0700
Subject: [PATCH 3/3] [flang][OpenACC] Fix reduction init value for
 minnumf/minimumf/maxnumf/maximumf

The reduction recipe init region was producing 0.0 instead of the
correct identity value (largest representable float for min, smallest
for max) when the reduction operator was AccMinnumf, AccMinimumf,
AccMaxnumf, or AccMaximumf. Only AccMin and AccMax were handled,
causing the new operator variants to fall through to the else branch
which returns 0.

This caused GPU min reductions to always produce 0.0 since
min(x, 0.0) = 0.0 for all positive x.

Replace the duplicated identity value logic with
arith::getIdentityValue, using a mapping from acc::ReductionOperator
to arith::AtomicRMWKind. Use minimumf/maximumf (which respect
useOnlyFiniteValue) instead of minnumf/maxnumf (whose MLIR identity
is NaN) to get correct finite identity values.

Made-with: Cursor
---
 .../OpenACC/Support/FIROpenACCUtils.cpp       | 122 ++++++------------
 1 file changed, 37 insertions(+), 85 deletions(-)

diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp
index f600fba0864f2..3a8f548fefdf6 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp
@@ -271,70 +271,41 @@ std::string fir::acc::getRecipeName(mlir::acc::RecipeKind kind, Type type,
   return ::getRecipeName(kind, type, kindMap, bounds, reductionOp);
 }
 
-/// Get the initial value for reduction operator.
-template <typename R>
-static R getReductionInitValue(mlir::acc::ReductionOperator op, mlir::Type ty) {
+/// Map acc::ReductionOperator to arith::AtomicRMWKind for identity value
+/// computation. Uses minimumf/maximumf instead of minnumf/maxnumf because
+/// arith::getIdentityValueAttr for minnumf/maxnumf returns NaN (the IEEE 754
+/// identity), which doesn't work with comparison-based reductions on GPU.
+/// minimumf/maximumf identity with useOnlyFiniteValue gives the correct
+/// finite extreme value (FLT_MAX / -FLT_MAX).
+static mlir::arith::AtomicRMWKind
+getAtomicRMWKindForIdentity(mlir::acc::ReductionOperator op, mlir::Type ty) {
+  bool isFloat = mlir::isa<mlir::FloatType>(ty);
   switch (op) {
+  case mlir::acc::ReductionOperator::AccAdd:
+    return isFloat ? mlir::arith::AtomicRMWKind::addf
+                   : mlir::arith::AtomicRMWKind::addi;
+  case mlir::acc::ReductionOperator::AccMul:
+    return isFloat ? mlir::arith::AtomicRMWKind::mulf
+                   : mlir::arith::AtomicRMWKind::muli;
   case mlir::acc::ReductionOperator::AccMin:
   case mlir::acc::ReductionOperator::AccMinnumf:
   case mlir::acc::ReductionOperator::AccMinimumf:
-    // min init value -> largest
-    if constexpr (std::is_same_v<R, llvm::APInt>) {
-      assert(ty.isIntOrIndex() && "expect integer or index type");
-      return llvm::APInt::getSignedMaxValue(ty.getIntOrFloatBitWidth());
-    }
-    if constexpr (std::is_same_v<R, llvm::APFloat>) {
-      auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty);
-      assert(floatTy && "expect float type");
-      return llvm::APFloat::getLargest(floatTy.getFloatSemantics(),
-                                       /*negative=*/false);
-    }
-    break;
+    return isFloat ? mlir::arith::AtomicRMWKind::minimumf
+                   : mlir::arith::AtomicRMWKind::mins;
   case mlir::acc::ReductionOperator::AccMax:
   case mlir::acc::ReductionOperator::AccMaxnumf:
   case mlir::acc::ReductionOperator::AccMaximumf:
-    // max init value -> smallest
-    if constexpr (std::is_same_v<R, llvm::APInt>) {
-      assert(ty.isIntOrIndex() && "expect integer or index type");
-      return llvm::APInt::getSignedMinValue(ty.getIntOrFloatBitWidth());
-    }
-    if constexpr (std::is_same_v<R, llvm::APFloat>) {
-      auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty);
-      assert(floatTy && "expect float type");
-      return llvm::APFloat::getSmallest(floatTy.getFloatSemantics(),
-                                        /*negative=*/true);
-    }
-    break;
+    return isFloat ? mlir::arith::AtomicRMWKind::maximumf
+                   : mlir::arith::AtomicRMWKind::maxs;
   case mlir::acc::ReductionOperator::AccIand:
-    if constexpr (std::is_same_v<R, llvm::APInt>) {
-      assert(ty.isIntOrIndex() && "expect integer type");
-      unsigned bits = ty.getIntOrFloatBitWidth();
-      return llvm::APInt::getAllOnes(bits);
-    }
-    break;
-  case mlir::acc::ReductionOperator::AccAdd:
-  case mlir::acc::ReductionOperator::AccMul:
+    return mlir::arith::AtomicRMWKind::andi;
   case mlir::acc::ReductionOperator::AccIor:
-  case mlir::acc::ReductionOperator::AccXor: {
-    // +, ior, ieor init value -> 0; * init value -> 1
-    int64_t value = (op == mlir::acc::ReductionOperator::AccMul) ? 1 : 0;
-    if constexpr (std::is_same_v<R, llvm::APInt>) {
-      assert(ty.isIntOrIndex() && "expect integer or index type");
-      return llvm::APInt(ty.getIntOrFloatBitWidth(), value, true);
-    }
-    if constexpr (std::is_same_v<R, llvm::APFloat>) {
-      assert(mlir::isa<mlir::FloatType>(ty) && "expect float type");
-      auto floatTy = mlir::dyn_cast<mlir::FloatType>(ty);
-      return llvm::APFloat(floatTy.getFloatSemantics(), value);
-    }
-    if constexpr (std::is_same_v<R, int64_t>)
-      return value;
-    break;
-  }
+    return mlir::arith::AtomicRMWKind::ori;
+  case mlir::acc::ReductionOperator::AccXor:
+    return mlir::arith::AtomicRMWKind::xori;
   default:
-    llvm_unreachable("OpenACC reduction unsupported operator");
+    llvm_unreachable("unsupported acc::ReductionOperator");
   }
-  llvm_unreachable("OpenACC reduction unsupported type");
 }
 
 /// Return a constant with the initial value for the reduction operator and
@@ -348,43 +319,24 @@ static mlir::Value getReductionInitValue(fir::FirOpBuilder &builder,
       op == mlir::acc::ReductionOperator::AccEqv ||
       op == mlir::acc::ReductionOperator::AccNeqv) {
     assert(mlir::isa<fir::LogicalType>(ty) && "expect fir.logical type");
-    bool value = true; // .true. for .and. and .eqv.
-    if (op == mlir::acc::ReductionOperator::AccLor ||
-        op == mlir::acc::ReductionOperator::AccNeqv)
-      value = false; // .false. for .or. and .neqv.
+    bool value = (op == mlir::acc::ReductionOperator::AccLand ||
+                  op == mlir::acc::ReductionOperator::AccEqv);
     return builder.createBool(loc, value);
   }
-  if (ty.isIntOrIndex())
-    return mlir::arith::ConstantOp::create(
-        builder, loc, ty,
-        builder.getIntegerAttr(ty, getReductionInitValue<llvm::APInt>(op, ty)));
-  if (op == mlir::acc::ReductionOperator::AccMin ||
-      op == mlir::acc::ReductionOperator::AccMinnumf ||
-      op == mlir::acc::ReductionOperator::AccMinimumf ||
-      op == mlir::acc::ReductionOperator::AccMax ||
-      op == mlir::acc::ReductionOperator::AccMaxnumf ||
-      op == mlir::acc::ReductionOperator::AccMaximumf) {
-    if (mlir::isa<mlir::ComplexType>(ty))
-      llvm::report_fatal_error(
-          "min/max reduction not supported for complex type");
-    if (auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty))
-      return mlir::arith::ConstantOp::create(
-          builder, loc, ty,
-          builder.getFloatAttr(ty,
-                               getReductionInitValue<llvm::APFloat>(op, ty)));
-  } else if (auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty)) {
-    return mlir::arith::ConstantOp::create(
-        builder, loc, ty,
-        builder.getFloatAttr(ty, getReductionInitValue<int64_t>(op, ty)));
-  } else if (auto cmplxTy = mlir::dyn_cast_or_null<mlir::ComplexType>(ty)) {
-    mlir::Type floatTy = cmplxTy.getElementType();
-    mlir::Value realInit = builder.createRealConstant(
-        loc, floatTy, getReductionInitValue<int64_t>(op, cmplxTy));
-    mlir::Value imagInit = builder.createRealConstant(loc, floatTy, 0.0);
+  if (auto cmplxTy = mlir::dyn_cast<mlir::ComplexType>(ty)) {
+    mlir::arith::AtomicRMWKind kind =
+        getAtomicRMWKindForIdentity(op, cmplxTy.getElementType());
+    mlir::Value realInit = mlir::arith::getIdentityValue(
+        kind, cmplxTy.getElementType(), builder, loc,
+        /*useOnlyFiniteValue=*/true);
+    mlir::Value imagInit =
+        builder.createRealConstant(loc, cmplxTy.getElementType(), 0.0);
     return fir::factory::Complex{builder, loc}.createComplex(cmplxTy, realInit,
                                                              imagInit);
   }
-  llvm::report_fatal_error("Unsupported OpenACC reduction type");
+  mlir::arith::AtomicRMWKind kind = getAtomicRMWKindForIdentity(op, ty);
+  return mlir::arith::getIdentityValue(kind, ty, builder, loc,
+                                       /*useOnlyFiniteValue=*/true);
 }
 
 static llvm::SmallVector<mlir::Value>