[flang-commits] [flang] [flang] add simplification for ProductOp intrinsic (PR #169575)

Mon Dec 8 10:14:24 PST 2025

https://github.com/eugeneepshteyn updated https://github.com/llvm/llvm-project/pull/169575

>From f7daa182904cd19e9e2b92ea41d3bf6930179f87 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Tue, 25 Nov 2025 21:57:14 +0000
Subject: [PATCH 01/11] add simplification for ProductOp intrinsic

---
 .../Transforms/SimplifyHLFIRIntrinsics.cpp    | 56 +++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index ce8ebaa803f47..67b43a346747d 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -23,6 +23,7 @@
 #include "mlir/IR/Location.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include <type_traits>
 
 namespace hlfir {
 #define GEN_PASS_DEF_SIMPLIFYHLFIRINTRINSICS
@@ -931,6 +932,43 @@ class SumAsElementalConverter
   mlir::Value genScalarAdd(mlir::Value value1, mlir::Value value2);
 };
 
+/// Reduction converter for Product.
+class ProductAsElementalConverter
+    : public NumericReductionAsElementalConverterBase<hlfir::ProductOp> {
+  using Base = NumericReductionAsElementalConverterBase;
+
+public:
+  ProductAsElementalConverter(hlfir::ProductOp op, mlir::PatternRewriter &rewriter)
+      : Base{op, rewriter} {}
+
+
+private:
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues(
+      [[maybe_unused]] mlir::ValueRange oneBasedIndices,
+      [[maybe_unused]] const llvm::SmallVectorImpl<mlir::Value> &extents)
+      final {
+    return {
+        // check element type, and use
+        // fir::factory::create{Integer or Real}Constant 
+        fir::factory::createZeroValue(builder, loc, getResultElementType())};
+  }
+  virtual llvm::SmallVector<mlir::Value>
+  reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
+                   hlfir::Entity array,
+                   mlir::ValueRange oneBasedIndices) final {
+    checkReductions(currentValue);
+    hlfir::Entity elementValue =
+        hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
+    // NOTE: we can use "Kahan summation" same way as the runtime
+    // (e.g. when fast-math is not allowed), but let's start with
+    // the simple version.
+    return {genScalarMult(currentValue[0], elementValue)};
+  }
+
+  // Generate scalar addition of the two values (of the same data type).
+  mlir::Value genScalarMult(mlir::Value value1, mlir::Value value2);
+};
+
 /// Base class for logical reductions like ALL, ANY, COUNT.
 /// They do not have MASK and FastMathFlags.
 template <typename OpT>
@@ -1194,6 +1232,20 @@ mlir::Value SumAsElementalConverter::genScalarAdd(mlir::Value value1,
   llvm_unreachable("unsupported SUM reduction type");
 }
 
+mlir::Value ProductAsElementalConverter::genScalarMult(mlir::Value value1,
+                                                  mlir::Value value2) {
+  mlir::Type ty = value1.getType();
+  assert(ty == value2.getType() && "reduction values' types do not match");
+  if (mlir::isa<mlir::FloatType>(ty))
+    return mlir::arith::MulFOp::create(builder, loc, value1, value2);
+  else if (mlir::isa<mlir::ComplexType>(ty))
+    return fir::MulcOp::create(builder, loc, value1, value2);
+  else if (mlir::isa<mlir::IntegerType>(ty))
+    return mlir::arith::MulIOp::create(builder, loc, value1, value2);
+
+  llvm_unreachable("unsupported MUL reduction type");
+}
+
 mlir::Value ReductionAsElementalConverter::genMaskValue(
     mlir::Value mask, mlir::Value isPresentPred, mlir::ValueRange indices) {
   mlir::OpBuilder::InsertionGuard guard(builder);
@@ -1265,6 +1317,9 @@ class ReductionConversion : public mlir::OpRewritePattern<Op> {
     } else if constexpr (std::is_same_v<Op, hlfir::SumOp>) {
       SumAsElementalConverter converter{op, rewriter};
       return converter.convert();
+    } else if constexpr (std::is_same_v<Op, hlfir::ProductOp>) {
+      ProductAsElementalConverter converter{op, rewriter};
+      return converter.convert();
     }
     return rewriter.notifyMatchFailure(op, "unexpected reduction operation");
   }
@@ -3158,6 +3213,7 @@ class SimplifyHLFIRIntrinsics
     mlir::RewritePatternSet patterns(context);
     patterns.insert<TransposeAsElementalConversion>(context);
     patterns.insert<ReductionConversion<hlfir::SumOp>>(context);
+    patterns.insert<ReductionConversion<hlfir::ProductOp>>(context);
     patterns.insert<ArrayShiftConversion<hlfir::CShiftOp>>(context);
     patterns.insert<ArrayShiftConversion<hlfir::EOShiftOp>>(context);
     patterns.insert<CmpCharOpConversion>(context);

>From e94d55815dc9b128b8dbf499d809ec210a6770c5 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Tue, 25 Nov 2025 22:02:05 +0000
Subject: [PATCH 02/11] format

---
 .../HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp          | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index 67b43a346747d..8eaf4df42d672 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -938,10 +938,10 @@ class ProductAsElementalConverter
   using Base = NumericReductionAsElementalConverterBase;
 
 public:
-  ProductAsElementalConverter(hlfir::ProductOp op, mlir::PatternRewriter &rewriter)
+  ProductAsElementalConverter(hlfir::ProductOp op,
+                              mlir::PatternRewriter &rewriter)
       : Base{op, rewriter} {}
 
-
 private:
   virtual llvm::SmallVector<mlir::Value> genReductionInitValues(
       [[maybe_unused]] mlir::ValueRange oneBasedIndices,
@@ -949,7 +949,7 @@ class ProductAsElementalConverter
       final {
     return {
         // check element type, and use
-        // fir::factory::create{Integer or Real}Constant 
+        // fir::factory::create{Integer or Real}Constant
         fir::factory::createZeroValue(builder, loc, getResultElementType())};
   }
   virtual llvm::SmallVector<mlir::Value>
@@ -1233,7 +1233,7 @@ mlir::Value SumAsElementalConverter::genScalarAdd(mlir::Value value1,
 }
 
 mlir::Value ProductAsElementalConverter::genScalarMult(mlir::Value value1,
-                                                  mlir::Value value2) {
+                                                       mlir::Value value2) {
   mlir::Type ty = value1.getType();
   assert(ty == value2.getType() && "reduction values' types do not match");
   if (mlir::isa<mlir::FloatType>(ty))

>From 4ffa90bd2e3d8a83cd233b24b025417c9b06f26f Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Wed, 26 Nov 2025 14:40:05 +0000
Subject: [PATCH 03/11] add builder for crating  value

---
 .../flang/Optimizer/Builder/FIRBuilder.h      | 10 ++++++++++
 flang/lib/Optimizer/Builder/FIRBuilder.cpp    | 19 +++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index c586ac0ec08e3..bb85f56ffee24 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -208,6 +208,11 @@ class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener {
     return createRealConstant(loc, realType, 0u);
   }
 
+  /// Create a real constant of type \p realType with value one.
+  mlir::Value createRealOneConstant(mlir::Location loc, mlir::Type realType) {
+    return createRealConstant(loc, realType, 1u);
+  }
+
   /// Create a slot for a local on the stack. Besides the variable's type and
   /// shape, it may be given name, pinned, or target attributes.
   mlir::Value allocateLocal(mlir::Location loc, mlir::Type ty,
@@ -856,6 +861,11 @@ mlir::Value genLenOfCharacter(fir::FirOpBuilder &builder, mlir::Location loc,
 mlir::Value createZeroValue(fir::FirOpBuilder &builder, mlir::Location loc,
                             mlir::Type type);
 
+/// Create a one value of a given numerical or logical \p type (`true`
+/// for logical types).
+mlir::Value createOneValue(fir::FirOpBuilder &builder, mlir::Location loc,
+                            mlir::Type type);
+
 /// Get the integer constants of triplet and compute the extent.
 std::optional<std::int64_t> getExtentFromTriplet(mlir::Value lb, mlir::Value ub,
                                                  mlir::Value stride);
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index 73ce1dc1174a0..eac1994067c2e 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -1671,6 +1671,25 @@ mlir::Value fir::factory::createZeroValue(fir::FirOpBuilder &builder,
                            "numeric or logical type");
 }
 
+mlir::Value fir::factory::createOneValue(fir::FirOpBuilder &builder,
+                                          mlir::Location loc, mlir::Type type) {
+  mlir::Type i1 = builder.getIntegerType(1);
+  if (mlir::isa<fir::LogicalType>(type) || type == i1)
+    return builder.createConvert(loc, type, builder.createBool(loc, true));
+  if (fir::isa_integer(type))
+    return builder.createIntegerConstant(loc, type, 0);
+  if (fir::isa_real(type))
+    return builder.createRealOneConstant(loc, type);
+  if (fir::isa_complex(type)) {
+    fir::factory::Complex complexHelper(builder, loc);
+    mlir::Type partType = complexHelper.getComplexPartType(type);
+    mlir::Value onePart = builder.createRealOneConstant(loc, partType);
+    return complexHelper.createComplex(type, onePart, onePart);
+  }
+  fir::emitFatalError(loc, "internal: trying to generate zero value of non "
+                           "numeric or logical type");
+}
+
 std::optional<std::int64_t>
 fir::factory::getExtentFromTriplet(mlir::Value lb, mlir::Value ub,
                                    mlir::Value stride) {

>From f4d60ef49fa50205d7bc4ba8b6cdc4c0fb2bd6ee Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Wed, 26 Nov 2025 14:40:26 +0000
Subject: [PATCH 04/11] correct initial value for product simplification

---
 .../Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp    | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index 8eaf4df42d672..15d24568ee136 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -948,9 +948,7 @@ class ProductAsElementalConverter
       [[maybe_unused]] const llvm::SmallVectorImpl<mlir::Value> &extents)
       final {
     return {
-        // check element type, and use
-        // fir::factory::create{Integer or Real}Constant
-        fir::factory::createZeroValue(builder, loc, getResultElementType())};
+        fir::factory::createOneValue(builder, loc, getResultElementType())};
   }
   virtual llvm::SmallVector<mlir::Value>
   reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,

>From 13bc9b280b8913f4f7773b6b4695db087e711ae0 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Wed, 26 Nov 2025 14:40:55 +0000
Subject: [PATCH 05/11] add tests for product simplification

---
 .../simplify-hlfir-intrinsics-product.fir     | 448 ++++++++++++++++++
 1 file changed, 448 insertions(+)
 create mode 100644 flang/test/HLFIR/simplify-hlfir-intrinsics-product.fir

diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-product.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-product.fir
new file mode 100644
index 0000000000000..f34f4f7faf2fa
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-product.fir
@@ -0,0 +1,448 @@
+// RUN: fir-opt --simplify-hlfir-intrinsics %s | FileCheck %s
+
+// box with known extents
+func.func @product_box_known_extents(%arg0: !fir.box<!fir.array<2x3xi32>>) -> !hlfir.expr<2xi32> {
+  %cst = arith.constant 2 : i32
+  %res = hlfir.product %arg0 dim %cst : (!fir.box<!fir.array<2x3xi32>>, i32) -> !hlfir.expr<2xi32>
+  return %res : !hlfir.expr<2xi32>
+}
+// CHECK-LABEL:   func.func @product_box_known_extents(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<2x3xi32>>) -> !hlfir.expr<2xi32> {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 0 : index
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 0 : i32
+// CHECK:           %[[CONSTANT_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[CONSTANT_4:.*]] = arith.constant 3 : index
+// CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[CONSTANT_3]] : (index) -> !fir.shape<1>
+// CHECK:           %[[ELEMENTAL_0:.*]] = hlfir.elemental %[[SHAPE_0]] unordered : (!fir.shape<1>) -> !hlfir.expr<2xi32> {
+// CHECK:           ^bb0(%[[VAL_0:.*]]: index):
+// CHECK:             %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_1:.*]] = %[[CONSTANT_1]] to %[[CONSTANT_4]] step %[[CONSTANT_1]] unordered iter_args(%[[VAL_2:.*]] = %[[CONSTANT_2]]) -> (i32) {
+// CHECK:               %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_0]] : (!fir.box<!fir.array<2x3xi32>>, index) -> (index, index, index)
+// CHECK:               %[[BOX_DIMS_1:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_1]] : (!fir.box<!fir.array<2x3xi32>>, index) -> (index, index, index)
+// CHECK:               %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_0]]#0, %[[CONSTANT_1]] : index
+// CHECK:               %[[ADDI_0:.*]] = arith.addi %[[VAL_0]], %[[SUBI_0]] : index
+// CHECK:               %[[SUBI_1:.*]] = arith.subi %[[BOX_DIMS_1]]#0, %[[CONSTANT_1]] : index
+// CHECK:               %[[ADDI_1:.*]] = arith.addi %[[VAL_1]], %[[SUBI_1]] : index
+// CHECK:               %[[DESIGNATE_0:.*]] = hlfir.designate %[[ARG0]] (%[[ADDI_0]], %[[ADDI_1]])  : (!fir.box<!fir.array<2x3xi32>>, index, index) -> !fir.ref<i32>
+// CHECK:               %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
+// CHECK:               %[[MULI_0:.*]] = arith.muli %[[VAL_2]], %[[LOAD_0]] : i32
+// CHECK:               fir.result %[[MULI_0]] : i32
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[DO_LOOP_0]] : i32
+// CHECK:           }
+// CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<2xi32>
+// CHECK:         }
+
+// expr with known extents
+func.func @product_expr_known_extents(%arg0: !hlfir.expr<2x3xi32>) -> !hlfir.expr<3xi32> {
+  %cst = arith.constant 1 : i32
+  %res = hlfir.product %arg0 dim %cst : (!hlfir.expr<2x3xi32>, i32) -> !hlfir.expr<3xi32>
+  return %res : !hlfir.expr<3xi32>
+}
+// CHECK-LABEL:   func.func @product_expr_known_extents(
+// CHECK-SAME:      %[[ARG0:.*]]: !hlfir.expr<2x3xi32>) -> !hlfir.expr<3xi32> {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 0 : i32
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 2 : index
+// CHECK:           %[[CONSTANT_3:.*]] = arith.constant 3 : index
+// CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[CONSTANT_3]] : (index) -> !fir.shape<1>
+// CHECK:           %[[ELEMENTAL_0:.*]] = hlfir.elemental %[[SHAPE_0]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xi32> {
+// CHECK:           ^bb0(%[[VAL_0:.*]]: index):
+// CHECK:             %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_1:.*]] = %[[CONSTANT_0]] to %[[CONSTANT_2]] step %[[CONSTANT_0]] unordered iter_args(%[[VAL_2:.*]] = %[[CONSTANT_1]]) -> (i32) {
+// CHECK:               %[[APPLY_0:.*]] = hlfir.apply %[[ARG0]], %[[VAL_1]], %[[VAL_0]] : (!hlfir.expr<2x3xi32>, index, index) -> i32
+// CHECK:               %[[MULI_0:.*]] = arith.muli %[[VAL_2]], %[[APPLY_0]] : i32
+// CHECK:               fir.result %[[MULI_0]] : i32
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[DO_LOOP_0]] : i32
+// CHECK:           }
+// CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<3xi32>
+// CHECK:         }
+
+
+// box with unknown extent
+func.func @product_box_unknown_extent1(%arg0: !fir.box<!fir.array<?x3xcomplex<f64>>>) -> !hlfir.expr<3xcomplex<f64>> {
+  %cst = arith.constant 1 : i32
+  %res = hlfir.product %arg0 dim %cst : (!fir.box<!fir.array<?x3xcomplex<f64>>>, i32) -> !hlfir.expr<3xcomplex<f64>>
+  return %res : !hlfir.expr<3xcomplex<f64>>
+}
+// CHECK-LABEL:   func.func @product_box_unknown_extent1(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<?x3xcomplex<f64>>>) -> !hlfir.expr<3xcomplex<f64>> {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1.000000e+00 : f64
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[CONSTANT_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_3]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
+// CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[CONSTANT_2]] : (index) -> !fir.shape<1>
+// CHECK:           %[[ELEMENTAL_0:.*]] = hlfir.elemental %[[SHAPE_0]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xcomplex<f64>> {
+// CHECK:           ^bb0(%[[VAL_0:.*]]: index):
+// CHECK:             %[[UNDEFINED_0:.*]] = fir.undefined complex<f64>
+// CHECK:             %[[INSERT_VALUE_0:.*]] = fir.insert_value %[[UNDEFINED_0]], %[[CONSTANT_1]], [0 : index] : (complex<f64>, f64) -> complex<f64>
+// CHECK:             %[[INSERT_VALUE_1:.*]] = fir.insert_value %[[INSERT_VALUE_0]], %[[CONSTANT_1]], [1 : index] : (complex<f64>, f64) -> complex<f64>
+// CHECK:             %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_1:.*]] = %[[CONSTANT_0]] to %[[BOX_DIMS_0]]#1 step %[[CONSTANT_0]] iter_args(%[[VAL_2:.*]] = %[[INSERT_VALUE_1]]) -> (complex<f64>) {
+// CHECK:               %[[BOX_DIMS_1:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_3]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
+// CHECK:               %[[BOX_DIMS_2:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_0]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
+// CHECK:               %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_1]]#0, %[[CONSTANT_0]] : index
+// CHECK:               %[[ADDI_0:.*]] = arith.addi %[[VAL_1]], %[[SUBI_0]] : index
+// CHECK:               %[[SUBI_1:.*]] = arith.subi %[[BOX_DIMS_2]]#0, %[[CONSTANT_0]] : index
+// CHECK:               %[[ADDI_1:.*]] = arith.addi %[[VAL_0]], %[[SUBI_1]] : index
+// CHECK:               %[[DESIGNATE_0:.*]] = hlfir.designate %[[ARG0]] (%[[ADDI_0]], %[[ADDI_1]])  : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index, index) -> !fir.ref<complex<f64>>
+// CHECK:               %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<complex<f64>>
+// CHECK:               %[[MULC_0:.*]] = fir.mulc %[[VAL_2]], %[[LOAD_0]] : complex<f64>
+// CHECK:               fir.result %[[MULC_0]] : complex<f64>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[DO_LOOP_0]] : complex<f64>
+// CHECK:           }
+// CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<3xcomplex<f64>>
+// CHECK:         }
+
+func.func @product_box_unknown_extent2(%arg0: !fir.box<!fir.array<?x3xcomplex<f64>>>) -> !hlfir.expr<?xcomplex<f64>> {
+  %cst = arith.constant 2 : i32
+  %res = hlfir.product %arg0 dim %cst : (!fir.box<!fir.array<?x3xcomplex<f64>>>, i32) -> !hlfir.expr<?xcomplex<f64>>
+  return %res : !hlfir.expr<?xcomplex<f64>>
+}
+// CHECK-LABEL:   func.func @product_box_unknown_extent2(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<?x3xcomplex<f64>>>) -> !hlfir.expr<?xcomplex<f64>> {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1.000000e+00 : f64
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[CONSTANT_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_3]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
+// CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[BOX_DIMS_0]]#1 : (index) -> !fir.shape<1>
+// CHECK:           %[[ELEMENTAL_0:.*]] = hlfir.elemental %[[SHAPE_0]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xcomplex<f64>> {
+// CHECK:           ^bb0(%[[VAL_0:.*]]: index):
+// CHECK:             %[[UNDEFINED_0:.*]] = fir.undefined complex<f64>
+// CHECK:             %[[INSERT_VALUE_0:.*]] = fir.insert_value %[[UNDEFINED_0]], %[[CONSTANT_1]], [0 : index] : (complex<f64>, f64) -> complex<f64>
+// CHECK:             %[[INSERT_VALUE_1:.*]] = fir.insert_value %[[INSERT_VALUE_0]], %[[CONSTANT_1]], [1 : index] : (complex<f64>, f64) -> complex<f64>
+// CHECK:             %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_1:.*]] = %[[CONSTANT_0]] to %[[CONSTANT_2]] step %[[CONSTANT_0]] iter_args(%[[VAL_2:.*]] = %[[INSERT_VALUE_1]]) -> (complex<f64>) {
+// CHECK:               %[[BOX_DIMS_1:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_3]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
+// CHECK:               %[[BOX_DIMS_2:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_0]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
+// CHECK:               %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_1]]#0, %[[CONSTANT_0]] : index
+// CHECK:               %[[ADDI_0:.*]] = arith.addi %[[VAL_0]], %[[SUBI_0]] : index
+// CHECK:               %[[SUBI_1:.*]] = arith.subi %[[BOX_DIMS_2]]#0, %[[CONSTANT_0]] : index
+// CHECK:               %[[ADDI_1:.*]] = arith.addi %[[VAL_1]], %[[SUBI_1]] : index
+// CHECK:               %[[DESIGNATE_0:.*]] = hlfir.designate %[[ARG0]] (%[[ADDI_0]], %[[ADDI_1]])  : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index, index) -> !fir.ref<complex<f64>>
+// CHECK:               %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<complex<f64>>
+// CHECK:               %[[MULC_0:.*]] = fir.mulc %[[VAL_2]], %[[LOAD_0]] : complex<f64>
+// CHECK:               fir.result %[[MULC_0]] : complex<f64>
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[DO_LOOP_0]] : complex<f64>
+// CHECK:           }
+// CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<?xcomplex<f64>>
+// CHECK:         }
+
+// expr with unknown extent
+func.func @product_expr_unknown_extent1(%arg0: !hlfir.expr<?x3xf32>) -> !hlfir.expr<3xf32> {
+  %cst = arith.constant 1 : i32
+  %res = hlfir.product %arg0 dim %cst : (!hlfir.expr<?x3xf32>, i32) -> !hlfir.expr<3xf32>
+  return %res : !hlfir.expr<3xf32>
+}
+// CHECK-LABEL:   func.func @product_expr_unknown_extent1(
+// CHECK-SAME:      %[[ARG0:.*]]: !hlfir.expr<?x3xf32>) -> !hlfir.expr<3xf32> {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1.000000e+00 : f32
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[SHAPE_OF_0:.*]] = hlfir.shape_of %[[ARG0]] : (!hlfir.expr<?x3xf32>) -> !fir.shape<2>
+// CHECK:           %[[GET_EXTENT_0:.*]] = hlfir.get_extent %[[SHAPE_OF_0]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[CONSTANT_2]] : (index) -> !fir.shape<1>
+// CHECK:           %[[ELEMENTAL_0:.*]] = hlfir.elemental %[[SHAPE_0]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> {
+// CHECK:           ^bb0(%[[VAL_0:.*]]: index):
+// CHECK:             %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_1:.*]] = %[[CONSTANT_0]] to %[[GET_EXTENT_0]] step %[[CONSTANT_0]] iter_args(%[[VAL_2:.*]] = %[[CONSTANT_1]]) -> (f32) {
+// CHECK:               %[[APPLY_0:.*]] = hlfir.apply %[[ARG0]], %[[VAL_1]], %[[VAL_0]] : (!hlfir.expr<?x3xf32>, index, index) -> f32
+// CHECK:               %[[MULF_0:.*]] = arith.mulf %[[VAL_2]], %[[APPLY_0]] : f32
+// CHECK:               fir.result %[[MULF_0]] : f32
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[DO_LOOP_0]] : f32
+// CHECK:           }
+// CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<3xf32>
+// CHECK:         }
+
+func.func @product_expr_unknown_extent2(%arg0: !hlfir.expr<?x3xf32>) -> !hlfir.expr<?xf32> {
+  %cst = arith.constant 2 : i32
+  %res = hlfir.product %arg0 dim %cst : (!hlfir.expr<?x3xf32>, i32) -> !hlfir.expr<?xf32>
+  return %res : !hlfir.expr<?xf32>
+}
+// CHECK-LABEL:   func.func @product_expr_unknown_extent2(
+// CHECK-SAME:      %[[ARG0:.*]]: !hlfir.expr<?x3xf32>) -> !hlfir.expr<?xf32> {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1.000000e+00 : f32
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[SHAPE_OF_0:.*]] = hlfir.shape_of %[[ARG0]] : (!hlfir.expr<?x3xf32>) -> !fir.shape<2>
+// CHECK:           %[[GET_EXTENT_0:.*]] = hlfir.get_extent %[[SHAPE_OF_0]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[GET_EXTENT_0]] : (index) -> !fir.shape<1>
+// CHECK:           %[[ELEMENTAL_0:.*]] = hlfir.elemental %[[SHAPE_0]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
+// CHECK:           ^bb0(%[[VAL_0:.*]]: index):
+// CHECK:             %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_1:.*]] = %[[CONSTANT_0]] to %[[CONSTANT_2]] step %[[CONSTANT_0]] iter_args(%[[VAL_2:.*]] = %[[CONSTANT_1]]) -> (f32) {
+// CHECK:               %[[APPLY_0:.*]] = hlfir.apply %[[ARG0]], %[[VAL_0]], %[[VAL_1]] : (!hlfir.expr<?x3xf32>, index, index) -> f32
+// CHECK:               %[[MULF_0:.*]] = arith.mulf %[[VAL_2]], %[[APPLY_0]] : f32
+// CHECK:               fir.result %[[MULF_0]] : f32
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[DO_LOOP_0]] : f32
+// CHECK:           }
+// CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<?xf32>
+// CHECK:         }
+
+// scalar mask
+func.func @product_scalar_mask(%arg0: !hlfir.expr<?x3xf32>, %mask: !fir.ref<!fir.logical<1>>) -> !hlfir.expr<3xf32> {
+  %cst = arith.constant 1 : i32
+  %res = hlfir.product %arg0 dim %cst mask %mask : (!hlfir.expr<?x3xf32>, i32, !fir.ref<!fir.logical<1>>) -> !hlfir.expr<3xf32>
+  return %res : !hlfir.expr<3xf32>
+}
+// CHECK-LABEL:   func.func @product_scalar_mask(
+// CHECK-SAME:      %[[ARG0:.*]]: !hlfir.expr<?x3xf32>,
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<!fir.logical<1>>) -> !hlfir.expr<3xf32> {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1.000000e+00 : f32
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[SHAPE_OF_0:.*]] = hlfir.shape_of %[[ARG0]] : (!hlfir.expr<?x3xf32>) -> !fir.shape<2>
+// CHECK:           %[[GET_EXTENT_0:.*]] = hlfir.get_extent %[[SHAPE_OF_0]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[CONSTANT_2]] : (index) -> !fir.shape<1>
+// CHECK:           %[[LOAD_0:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<1>>
+// CHECK:           %[[ELEMENTAL_0:.*]] = hlfir.elemental %[[SHAPE_0]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> {
+// CHECK:           ^bb0(%[[VAL_0:.*]]: index):
+// CHECK:             %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_1:.*]] = %[[CONSTANT_0]] to %[[GET_EXTENT_0]] step %[[CONSTANT_0]] iter_args(%[[VAL_2:.*]] = %[[CONSTANT_1]]) -> (f32) {
+// CHECK:               %[[CONVERT_0:.*]] = fir.convert %[[LOAD_0]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[IF_0:.*]] = fir.if %[[CONVERT_0]] -> (f32) {
+// CHECK:                 %[[APPLY_0:.*]] = hlfir.apply %[[ARG0]], %[[VAL_1]], %[[VAL_0]] : (!hlfir.expr<?x3xf32>, index, index) -> f32
+// CHECK:                 %[[MULF_0:.*]] = arith.mulf %[[VAL_2]], %[[APPLY_0]] : f32
+// CHECK:                 fir.result %[[MULF_0]] : f32
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_2]] : f32
+// CHECK:               }
+// CHECK:               fir.result %[[IF_0]] : f32
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[DO_LOOP_0]] : f32
+// CHECK:           }
+// CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<3xf32>
+// CHECK:         }
+
+// scalar boxed mask
+func.func @product_scalar_boxed_mask(%arg0: !hlfir.expr<?x3xf32>, %mask: !fir.box<!fir.logical<1>>) -> !hlfir.expr<3xf32> {
+  %cst = arith.constant 1 : i32
+  %res = hlfir.product %arg0 dim %cst mask %mask : (!hlfir.expr<?x3xf32>, i32, !fir.box<!fir.logical<1>>) -> !hlfir.expr<3xf32>
+  return %res : !hlfir.expr<3xf32>
+}
+// CHECK-LABEL:   func.func @product_scalar_boxed_mask(
+// CHECK-SAME:      %[[ARG0:.*]]: !hlfir.expr<?x3xf32>,
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.box<!fir.logical<1>>) -> !hlfir.expr<3xf32> {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1.000000e+00 : f32
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant true
+// CHECK:           %[[CONSTANT_3:.*]] = arith.constant 3 : index
+// CHECK:           %[[SHAPE_OF_0:.*]] = hlfir.shape_of %[[ARG0]] : (!hlfir.expr<?x3xf32>) -> !fir.shape<2>
+// CHECK:           %[[GET_EXTENT_0:.*]] = hlfir.get_extent %[[SHAPE_OF_0]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[CONSTANT_3]] : (index) -> !fir.shape<1>
+// CHECK:           %[[IS_PRESENT_0:.*]] = fir.is_present %[[ARG1]] : (!fir.box<!fir.logical<1>>) -> i1
+// CHECK:           %[[IF_0:.*]] = fir.if %[[IS_PRESENT_0]] -> (!fir.logical<1>) {
+// CHECK:             %[[BOX_ADDR_0:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.logical<1>>) -> !fir.ref<!fir.logical<1>>
+// CHECK:             %[[LOAD_0:.*]] = fir.load %[[BOX_ADDR_0]] : !fir.ref<!fir.logical<1>>
+// CHECK:             fir.result %[[LOAD_0]] : !fir.logical<1>
+// CHECK:           } else {
+// CHECK:             %[[CONVERT_0:.*]] = fir.convert %[[CONSTANT_2]] : (i1) -> !fir.logical<1>
+// CHECK:             fir.result %[[CONVERT_0]] : !fir.logical<1>
+// CHECK:           }
+// CHECK:           %[[ELEMENTAL_0:.*]] = hlfir.elemental %[[SHAPE_0]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> {
+// CHECK:           ^bb0(%[[VAL_0:.*]]: index):
+// CHECK:             %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_1:.*]] = %[[CONSTANT_0]] to %[[GET_EXTENT_0]] step %[[CONSTANT_0]] iter_args(%[[VAL_2:.*]] = %[[CONSTANT_1]]) -> (f32) {
+// CHECK:               %[[CONVERT_1:.*]] = fir.convert %[[IF_0]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[IF_1:.*]] = fir.if %[[CONVERT_1]] -> (f32) {
+// CHECK:                 %[[APPLY_0:.*]] = hlfir.apply %[[ARG0]], %[[VAL_1]], %[[VAL_0]] : (!hlfir.expr<?x3xf32>, index, index) -> f32
+// CHECK:                 %[[MULF_0:.*]] = arith.mulf %[[VAL_2]], %[[APPLY_0]] : f32
+// CHECK:                 fir.result %[[MULF_0]] : f32
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_2]] : f32
+// CHECK:               }
+// CHECK:               fir.result %[[IF_1]] : f32
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[DO_LOOP_0]] : f32
+// CHECK:           }
+// CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<3xf32>
+// CHECK:         }
+
+// array mask
+func.func @product_array_mask(%arg0: !hlfir.expr<?x3xf32>, %mask: !fir.box<!fir.array<?x3x!fir.logical<1>>>) -> !hlfir.expr<?xf32> {
+  %cst = arith.constant 2 : i32
+  %res = hlfir.product %arg0 dim %cst mask %mask : (!hlfir.expr<?x3xf32>, i32, !fir.box<!fir.array<?x3x!fir.logical<1>>>) -> !hlfir.expr<?xf32>
+  return %res : !hlfir.expr<?xf32>
+}
+// CHECK-LABEL:   func.func @product_array_mask(
+// CHECK-SAME:      %[[ARG0:.*]]: !hlfir.expr<?x3xf32>,
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.box<!fir.array<?x3x!fir.logical<1>>>) -> !hlfir.expr<?xf32> {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant true
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 0 : index
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[CONSTANT_3:.*]] = arith.constant 1.000000e+00 : f32
+// CHECK:           %[[CONSTANT_4:.*]] = arith.constant 3 : index
+// CHECK:           %[[SHAPE_OF_0:.*]] = hlfir.shape_of %[[ARG0]] : (!hlfir.expr<?x3xf32>) -> !fir.shape<2>
+// CHECK:           %[[GET_EXTENT_0:.*]] = hlfir.get_extent %[[SHAPE_OF_0]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[GET_EXTENT_0]] : (index) -> !fir.shape<1>
+// CHECK:           %[[IS_PRESENT_0:.*]] = fir.is_present %[[ARG1]] : (!fir.box<!fir.array<?x3x!fir.logical<1>>>) -> i1
+// CHECK:           %[[ELEMENTAL_0:.*]] = hlfir.elemental %[[SHAPE_0]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
+// CHECK:           ^bb0(%[[VAL_0:.*]]: index):
+// CHECK:             %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_1:.*]] = %[[CONSTANT_2]] to %[[CONSTANT_4]] step %[[CONSTANT_2]] iter_args(%[[VAL_2:.*]] = %[[CONSTANT_3]]) -> (f32) {
+// CHECK:               %[[IF_0:.*]] = fir.if %[[IS_PRESENT_0]] -> (!fir.logical<1>) {
+// CHECK:                 %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[ARG1]], %[[CONSTANT_1]] : (!fir.box<!fir.array<?x3x!fir.logical<1>>>, index) -> (index, index, index)
+// CHECK:                 %[[BOX_DIMS_1:.*]]:3 = fir.box_dims %[[ARG1]], %[[CONSTANT_2]] : (!fir.box<!fir.array<?x3x!fir.logical<1>>>, index) -> (index, index, index)
+// CHECK:                 %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_0]]#0, %[[CONSTANT_2]] : index
+// CHECK:                 %[[ADDI_0:.*]] = arith.addi %[[VAL_0]], %[[SUBI_0]] : index
+// CHECK:                 %[[SUBI_1:.*]] = arith.subi %[[BOX_DIMS_1]]#0, %[[CONSTANT_2]] : index
+// CHECK:                 %[[ADDI_1:.*]] = arith.addi %[[VAL_1]], %[[SUBI_1]] : index
+// CHECK:                 %[[DESIGNATE_0:.*]] = hlfir.designate %[[ARG1]] (%[[ADDI_0]], %[[ADDI_1]])  : (!fir.box<!fir.array<?x3x!fir.logical<1>>>, index, index) -> !fir.ref<!fir.logical<1>>
+// CHECK:                 %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<!fir.logical<1>>
+// CHECK:                 fir.result %[[LOAD_0]] : !fir.logical<1>
+// CHECK:               } else {
+// CHECK:                 %[[CONVERT_0:.*]] = fir.convert %[[CONSTANT_0]] : (i1) -> !fir.logical<1>
+// CHECK:                 fir.result %[[CONVERT_0]] : !fir.logical<1>
+// CHECK:               }
+// CHECK:               %[[CONVERT_1:.*]] = fir.convert %[[IF_0]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[IF_1:.*]] = fir.if %[[CONVERT_1]] -> (f32) {
+// CHECK:                 %[[APPLY_0:.*]] = hlfir.apply %[[ARG0]], %[[VAL_0]], %[[VAL_1]] : (!hlfir.expr<?x3xf32>, index, index) -> f32
+// CHECK:                 %[[MULF_0:.*]] = arith.mulf %[[VAL_2]], %[[APPLY_0]] : f32
+// CHECK:                 fir.result %[[MULF_0]] : f32
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_2]] : f32
+// CHECK:               }
+// CHECK:               fir.result %[[IF_1]] : f32
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[DO_LOOP_0]] : f32
+// CHECK:           }
+// CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<?xf32>
+// CHECK:         }
+
+// array expr mask
+func.func @product_array_expr_mask(%arg0: !hlfir.expr<?x3xf32>, %mask: !hlfir.expr<?x3x!fir.logical<1>>) -> !hlfir.expr<?xf32> {
+  %cst = arith.constant 2 : i32
+  %res = hlfir.product %arg0 dim %cst mask %mask : (!hlfir.expr<?x3xf32>, i32, !hlfir.expr<?x3x!fir.logical<1>>) -> !hlfir.expr<?xf32>
+  return %res : !hlfir.expr<?xf32>
+}
+// CHECK-LABEL:   func.func @product_array_expr_mask(
+// CHECK-SAME:      %[[ARG0:.*]]: !hlfir.expr<?x3xf32>,
+// CHECK-SAME:      %[[ARG1:.*]]: !hlfir.expr<?x3x!fir.logical<1>>) -> !hlfir.expr<?xf32> {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1.000000e+00 : f32
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[SHAPE_OF_0:.*]] = hlfir.shape_of %[[ARG0]] : (!hlfir.expr<?x3xf32>) -> !fir.shape<2>
+// CHECK:           %[[GET_EXTENT_0:.*]] = hlfir.get_extent %[[SHAPE_OF_0]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[GET_EXTENT_0]] : (index) -> !fir.shape<1>
+// CHECK:           %[[ELEMENTAL_0:.*]] = hlfir.elemental %[[SHAPE_0]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
+// CHECK:           ^bb0(%[[VAL_0:.*]]: index):
+// CHECK:             %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_1:.*]] = %[[CONSTANT_0]] to %[[CONSTANT_2]] step %[[CONSTANT_0]] iter_args(%[[VAL_2:.*]] = %[[CONSTANT_1]]) -> (f32) {
+// CHECK:               %[[APPLY_0:.*]] = hlfir.apply %[[ARG1]], %[[VAL_0]], %[[VAL_1]] : (!hlfir.expr<?x3x!fir.logical<1>>, index, index) -> !fir.logical<1>
+// CHECK:               %[[CONVERT_0:.*]] = fir.convert %[[APPLY_0]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[IF_0:.*]] = fir.if %[[CONVERT_0]] -> (f32) {
+// CHECK:                 %[[APPLY_1:.*]] = hlfir.apply %[[ARG0]], %[[VAL_0]], %[[VAL_1]] : (!hlfir.expr<?x3xf32>, index, index) -> f32
+// CHECK:                 %[[MULF_0:.*]] = arith.mulf %[[VAL_2]], %[[APPLY_1]] : f32
+// CHECK:                 fir.result %[[MULF_0]] : f32
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_2]] : f32
+// CHECK:               }
+// CHECK:               fir.result %[[IF_0]] : f32
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[DO_LOOP_0]] : f32
+// CHECK:           }
+// CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<?xf32>
+// CHECK:         }
+
+// unordered floating point reduction
+func.func @product_unordered_reduction(%arg0: !hlfir.expr<2x3xf32>) -> !hlfir.expr<3xf32> {
+  %cst = arith.constant 1 : i32
+  %res = hlfir.product %arg0 dim %cst {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<2x3xf32>, i32) -> !hlfir.expr<3xf32>
+  return %res : !hlfir.expr<3xf32>
+}
+// CHECK-LABEL:   func.func @product_unordered_reduction(
+// CHECK-SAME:      %[[ARG0:.*]]: !hlfir.expr<2x3xf32>) -> !hlfir.expr<3xf32> {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1.000000e+00 : f32
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 2 : index
+// CHECK:           %[[CONSTANT_3:.*]] = arith.constant 3 : index
+// CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[CONSTANT_3]] : (index) -> !fir.shape<1>
+// CHECK:           %[[ELEMENTAL_0:.*]] = hlfir.elemental %[[SHAPE_0]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> {
+// CHECK:           ^bb0(%[[VAL_0:.*]]: index):
+// CHECK:             %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_1:.*]] = %[[CONSTANT_0]] to %[[CONSTANT_2]] step %[[CONSTANT_0]] unordered iter_args(%[[VAL_2:.*]] = %[[CONSTANT_1]]) -> (f32) {
+// CHECK:               %[[APPLY_0:.*]] = hlfir.apply %[[ARG0]], %[[VAL_1]], %[[VAL_0]] : (!hlfir.expr<2x3xf32>, index, index) -> f32
+// CHECK:               %[[MULF_0:.*]] = arith.mulf %[[VAL_2]], %[[APPLY_0]] fastmath<reassoc> : f32
+// CHECK:               fir.result %[[MULF_0]] : f32
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[DO_LOOP_0]] : f32
+// CHECK:           }
+// CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<3xf32>
+// CHECK:         }
+
+// total 1d reduction
+func.func @product_total_1d_reduction(%arg0: !fir.box<!fir.array<3xi32>>) -> i32 {
+  %cst = arith.constant 1 : i32
+  %res = hlfir.product %arg0 dim %cst : (!fir.box<!fir.array<3xi32>>, i32) -> i32
+  return %res : i32
+}
+// CHECK-LABEL:   func.func @product_total_1d_reduction(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<3xi32>>) -> i32 {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 0 : index
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 3 : index
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 0 : i32
+// CHECK:           %[[CONSTANT_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_0:.*]] = %[[CONSTANT_3]] to %[[CONSTANT_1]] step %[[CONSTANT_3]] unordered iter_args(%[[VAL_1:.*]] = %[[CONSTANT_2]]) -> (i32) {
+// CHECK:             %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_0]] : (!fir.box<!fir.array<3xi32>>, index) -> (index, index, index)
+// CHECK:             %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_0]]#0, %[[CONSTANT_3]] : index
+// CHECK:             %[[ADDI_0:.*]] = arith.addi %[[VAL_0]], %[[SUBI_0]] : index
+// CHECK:             %[[DESIGNATE_0:.*]] = hlfir.designate %[[ARG0]] (%[[ADDI_0]])  : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:             %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
+// CHECK:             %[[MULI_0:.*]] = arith.muli %[[VAL_1]], %[[LOAD_0]] : i32
+// CHECK:             fir.result %[[MULI_0]] : i32
+// CHECK:           }
+// CHECK:           return %[[DO_LOOP_0]] : i32
+// CHECK:         }
+
+// total 2d reduction
+func.func @product_total_2d_reduction(%arg0: !fir.box<!fir.array<?x3xi32>>) -> i32 {
+  %res = hlfir.product %arg0 : (!fir.box<!fir.array<?x3xi32>>) -> i32
+  return %res : i32
+}
+// CHECK-LABEL:   func.func @product_total_2d_reduction(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<?x3xi32>>) -> i32 {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 0 : i32
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[CONSTANT_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_3]] : (!fir.box<!fir.array<?x3xi32>>, index) -> (index, index, index)
+// CHECK:           %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_0:.*]] = %[[CONSTANT_0]] to %[[CONSTANT_2]] step %[[CONSTANT_0]] unordered iter_args(%[[VAL_1:.*]] = %[[CONSTANT_1]]) -> (i32) {
+// CHECK:             %[[DO_LOOP_1:.*]] = fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_0]] to %[[BOX_DIMS_0]]#1 step %[[CONSTANT_0]] unordered iter_args(%[[VAL_3:.*]] = %[[VAL_1]]) -> (i32) {
+// CHECK:               %[[BOX_DIMS_1:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_3]] : (!fir.box<!fir.array<?x3xi32>>, index) -> (index, index, index)
+// CHECK:               %[[BOX_DIMS_2:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_0]] : (!fir.box<!fir.array<?x3xi32>>, index) -> (index, index, index)
+// CHECK:               %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_1]]#0, %[[CONSTANT_0]] : index
+// CHECK:               %[[ADDI_0:.*]] = arith.addi %[[VAL_2]], %[[SUBI_0]] : index
+// CHECK:               %[[SUBI_1:.*]] = arith.subi %[[BOX_DIMS_2]]#0, %[[CONSTANT_0]] : index
+// CHECK:               %[[ADDI_1:.*]] = arith.addi %[[VAL_0]], %[[SUBI_1]] : index
+// CHECK:               %[[DESIGNATE_0:.*]] = hlfir.designate %[[ARG0]] (%[[ADDI_0]], %[[ADDI_1]])  : (!fir.box<!fir.array<?x3xi32>>, index, index) -> !fir.ref<i32>
+// CHECK:               %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
+// CHECK:               %[[MULI_0:.*]] = arith.muli %[[VAL_3]], %[[LOAD_0]] : i32
+// CHECK:               fir.result %[[MULI_0]] : i32
+// CHECK:             }
+// CHECK:             fir.result %[[DO_LOOP_1]] : i32
+// CHECK:           }
+// CHECK:           return %[[DO_LOOP_0]] : i32
+// CHECK:         }
+
+// negative: invalid dim==0
+func.func @product_invalid_dim0(%arg0: !hlfir.expr<2x3xi32>) -> !hlfir.expr<3xi32> {
+  %cst = arith.constant 0 : i32
+  %res = hlfir.product %arg0 dim %cst : (!hlfir.expr<2x3xi32>, i32) -> !hlfir.expr<3xi32>
+  return %res : !hlfir.expr<3xi32>
+}
+// CHECK-LABEL:   func.func @product_invalid_dim0(
+// CHECK-SAME:      %[[ARG0:.*]]: !hlfir.expr<2x3xi32>) -> !hlfir.expr<3xi32> {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 0 : i32
+// CHECK:           %[[PRODUCT_0:.*]] = hlfir.product %[[ARG0]] dim %[[CONSTANT_0]] : (!hlfir.expr<2x3xi32>, i32) -> !hlfir.expr<3xi32>
+// CHECK:           return %[[PRODUCT_0]] : !hlfir.expr<3xi32>
+// CHECK:         }
+
+// negative: invalid dim>rank
+func.func @product_invalid_dim_big(%arg0: !hlfir.expr<2x3xi32>) -> !hlfir.expr<3xi32> {
+  %cst = arith.constant 3 : i32
+  %res = hlfir.product %arg0 dim %cst : (!hlfir.expr<2x3xi32>, i32) -> !hlfir.expr<3xi32>
+  return %res : !hlfir.expr<3xi32>
+}
+// CHECK-LABEL:   func.func @product_invalid_dim_big(
+// CHECK-SAME:      %[[ARG0:.*]]: !hlfir.expr<2x3xi32>) -> !hlfir.expr<3xi32> {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 3 : i32
+// CHECK:           %[[PRODUCT_0:.*]] = hlfir.product %[[ARG0]] dim %[[CONSTANT_0]] : (!hlfir.expr<2x3xi32>, i32) -> !hlfir.expr<3xi32>
+// CHECK:           return %[[PRODUCT_0]] : !hlfir.expr<3xi32>
+// CHECK:         }
\ No newline at end of file

>From 4c5f89e1837d091964aa89b36fb88473dbdf9187 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Wed, 26 Nov 2025 14:41:38 +0000
Subject: [PATCH 06/11] format

---
 flang/include/flang/Optimizer/Builder/FIRBuilder.h             | 2 +-
 flang/lib/Optimizer/Builder/FIRBuilder.cpp                     | 2 +-
 .../lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp | 3 +--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index bb85f56ffee24..48a72d73c03bd 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -864,7 +864,7 @@ mlir::Value createZeroValue(fir::FirOpBuilder &builder, mlir::Location loc,
 /// Create a one value of a given numerical or logical \p type (`true`
 /// for logical types).
 mlir::Value createOneValue(fir::FirOpBuilder &builder, mlir::Location loc,
-                            mlir::Type type);
+                           mlir::Type type);
 
 /// Get the integer constants of triplet and compute the extent.
 std::optional<std::int64_t> getExtentFromTriplet(mlir::Value lb, mlir::Value ub,
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index eac1994067c2e..490e9df8c3850 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -1672,7 +1672,7 @@ mlir::Value fir::factory::createZeroValue(fir::FirOpBuilder &builder,
 }
 
 mlir::Value fir::factory::createOneValue(fir::FirOpBuilder &builder,
-                                          mlir::Location loc, mlir::Type type) {
+                                         mlir::Location loc, mlir::Type type) {
   mlir::Type i1 = builder.getIntegerType(1);
   if (mlir::isa<fir::LogicalType>(type) || type == i1)
     return builder.createConvert(loc, type, builder.createBool(loc, true));
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index 15d24568ee136..3d68da9b8fa9b 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -947,8 +947,7 @@ class ProductAsElementalConverter
       [[maybe_unused]] mlir::ValueRange oneBasedIndices,
       [[maybe_unused]] const llvm::SmallVectorImpl<mlir::Value> &extents)
       final {
-    return {
-        fir::factory::createOneValue(builder, loc, getResultElementType())};
+    return {fir::factory::createOneValue(builder, loc, getResultElementType())};
   }
   virtual llvm::SmallVector<mlir::Value>
   reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,

>From 3db99137a8f2bd6e0ff5bcae52e9730887a21a55 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Wed, 26 Nov 2025 14:47:19 +0000
Subject: [PATCH 07/11] remove leftover comment

---
 .../lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index 3d68da9b8fa9b..e958cf839c016 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -956,9 +956,6 @@ class ProductAsElementalConverter
     checkReductions(currentValue);
     hlfir::Entity elementValue =
         hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
-    // NOTE: we can use "Kahan summation" same way as the runtime
-    // (e.g. when fast-math is not allowed), but let's start with
-    // the simple version.
     return {genScalarMult(currentValue[0], elementValue)};
   }
 

>From a489b4b06fd56df5b815a375263bd14c3104535a Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Wed, 26 Nov 2025 15:10:11 +0000
Subject: [PATCH 08/11] correct docstrings

---
 flang/lib/Optimizer/Builder/FIRBuilder.cpp                      | 2 +-
 .../lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index 490e9df8c3850..6857f0a2bb74f 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -1686,7 +1686,7 @@ mlir::Value fir::factory::createOneValue(fir::FirOpBuilder &builder,
     mlir::Value onePart = builder.createRealOneConstant(loc, partType);
     return complexHelper.createComplex(type, onePart, onePart);
   }
-  fir::emitFatalError(loc, "internal: trying to generate zero value of non "
+  fir::emitFatalError(loc, "internal: trying to generate one value of non "
                            "numeric or logical type");
 }
 
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index e958cf839c016..9022bca2c6d47 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -959,7 +959,7 @@ class ProductAsElementalConverter
     return {genScalarMult(currentValue[0], elementValue)};
   }
 
-  // Generate scalar addition of the two values (of the same data type).
+  // Generate scalar multiplication of the two values (of the same data type).
   mlir::Value genScalarMult(mlir::Value value1, mlir::Value value2);
 };
 

>From c71fb78eb7581fafcbbb2fd4c653278fa50867ea Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Thu, 27 Nov 2025 15:05:54 +0000
Subject: [PATCH 09/11] resolving comments

---
 flang/lib/Optimizer/Builder/FIRBuilder.cpp    |  5 +--
 .../Transforms/SimplifyHLFIRIntrinsics.cpp    |  1 -
 .../simplify-hlfir-intrinsics-product.fir     | 31 ++++++++++---------
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index 6857f0a2bb74f..003883e49a2f2 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -1683,8 +1683,9 @@ mlir::Value fir::factory::createOneValue(fir::FirOpBuilder &builder,
   if (fir::isa_complex(type)) {
     fir::factory::Complex complexHelper(builder, loc);
     mlir::Type partType = complexHelper.getComplexPartType(type);
-    mlir::Value onePart = builder.createRealOneConstant(loc, partType);
-    return complexHelper.createComplex(type, onePart, onePart);
+    mlir::Value realPart = builder.createRealOneConstant(loc, partType);
+    mlir::Value imagPart = builder.createRealZeroConstant(loc, partType);
+    return complexHelper.createComplex(type, realPart, imagPart);
   }
   fir::emitFatalError(loc, "internal: trying to generate one value of non "
                            "numeric or logical type");
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index 9022bca2c6d47..4fa8103687e02 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -23,7 +23,6 @@
 #include "mlir/IR/Location.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include <type_traits>
 
 namespace hlfir {
 #define GEN_PASS_DEF_SIMPLIFYHLFIRINTRINSICS
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-product.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-product.fir
index f34f4f7faf2fa..aadba65760687 100644
--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-product.fir
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-product.fir
@@ -58,7 +58,6 @@ func.func @product_expr_known_extents(%arg0: !hlfir.expr<2x3xi32>) -> !hlfir.exp
 // CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<3xi32>
 // CHECK:         }
 
-
 // box with unknown extent
 func.func @product_box_unknown_extent1(%arg0: !fir.box<!fir.array<?x3xcomplex<f64>>>) -> !hlfir.expr<3xcomplex<f64>> {
   %cst = arith.constant 1 : i32
@@ -68,18 +67,19 @@ func.func @product_box_unknown_extent1(%arg0: !fir.box<!fir.array<?x3xcomplex<f6
 // CHECK-LABEL:   func.func @product_box_unknown_extent1(
 // CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<?x3xcomplex<f64>>>) -> !hlfir.expr<3xcomplex<f64>> {
 // CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
-// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1.000000e+00 : f64
-// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 3 : index
-// CHECK:           %[[CONSTANT_3:.*]] = arith.constant 0 : index
-// CHECK:           %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_3]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
-// CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[CONSTANT_2]] : (index) -> !fir.shape<1>
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 0.000000e+00 : f64
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 1.000000e+00 : f64
+// CHECK:           %[[CONSTANT_3:.*]] = arith.constant 3 : index
+// CHECK:           %[[CONSTANT_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_4]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
+// CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[CONSTANT_3]] : (index) -> !fir.shape<1>
 // CHECK:           %[[ELEMENTAL_0:.*]] = hlfir.elemental %[[SHAPE_0]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xcomplex<f64>> {
 // CHECK:           ^bb0(%[[VAL_0:.*]]: index):
 // CHECK:             %[[UNDEFINED_0:.*]] = fir.undefined complex<f64>
-// CHECK:             %[[INSERT_VALUE_0:.*]] = fir.insert_value %[[UNDEFINED_0]], %[[CONSTANT_1]], [0 : index] : (complex<f64>, f64) -> complex<f64>
+// CHECK:             %[[INSERT_VALUE_0:.*]] = fir.insert_value %[[UNDEFINED_0]], %[[CONSTANT_2]], [0 : index] : (complex<f64>, f64) -> complex<f64>
 // CHECK:             %[[INSERT_VALUE_1:.*]] = fir.insert_value %[[INSERT_VALUE_0]], %[[CONSTANT_1]], [1 : index] : (complex<f64>, f64) -> complex<f64>
 // CHECK:             %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_1:.*]] = %[[CONSTANT_0]] to %[[BOX_DIMS_0]]#1 step %[[CONSTANT_0]] iter_args(%[[VAL_2:.*]] = %[[INSERT_VALUE_1]]) -> (complex<f64>) {
-// CHECK:               %[[BOX_DIMS_1:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_3]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
+// CHECK:               %[[BOX_DIMS_1:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_4]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
 // CHECK:               %[[BOX_DIMS_2:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_0]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
 // CHECK:               %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_1]]#0, %[[CONSTANT_0]] : index
 // CHECK:               %[[ADDI_0:.*]] = arith.addi %[[VAL_1]], %[[SUBI_0]] : index
@@ -103,18 +103,19 @@ func.func @product_box_unknown_extent2(%arg0: !fir.box<!fir.array<?x3xcomplex<f6
 // CHECK-LABEL:   func.func @product_box_unknown_extent2(
 // CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<?x3xcomplex<f64>>>) -> !hlfir.expr<?xcomplex<f64>> {
 // CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
-// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1.000000e+00 : f64
-// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 3 : index
-// CHECK:           %[[CONSTANT_3:.*]] = arith.constant 0 : index
-// CHECK:           %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_3]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 0.000000e+00 : f64
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 1.000000e+00 : f64
+// CHECK:           %[[CONSTANT_3:.*]] = arith.constant 3 : index
+// CHECK:           %[[CONSTANT_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_4]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
 // CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[BOX_DIMS_0]]#1 : (index) -> !fir.shape<1>
 // CHECK:           %[[ELEMENTAL_0:.*]] = hlfir.elemental %[[SHAPE_0]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xcomplex<f64>> {
 // CHECK:           ^bb0(%[[VAL_0:.*]]: index):
 // CHECK:             %[[UNDEFINED_0:.*]] = fir.undefined complex<f64>
-// CHECK:             %[[INSERT_VALUE_0:.*]] = fir.insert_value %[[UNDEFINED_0]], %[[CONSTANT_1]], [0 : index] : (complex<f64>, f64) -> complex<f64>
+// CHECK:             %[[INSERT_VALUE_0:.*]] = fir.insert_value %[[UNDEFINED_0]], %[[CONSTANT_2]], [0 : index] : (complex<f64>, f64) -> complex<f64>
 // CHECK:             %[[INSERT_VALUE_1:.*]] = fir.insert_value %[[INSERT_VALUE_0]], %[[CONSTANT_1]], [1 : index] : (complex<f64>, f64) -> complex<f64>
-// CHECK:             %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_1:.*]] = %[[CONSTANT_0]] to %[[CONSTANT_2]] step %[[CONSTANT_0]] iter_args(%[[VAL_2:.*]] = %[[INSERT_VALUE_1]]) -> (complex<f64>) {
-// CHECK:               %[[BOX_DIMS_1:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_3]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
+// CHECK:             %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_1:.*]] = %[[CONSTANT_0]] to %[[CONSTANT_3]] step %[[CONSTANT_0]] iter_args(%[[VAL_2:.*]] = %[[INSERT_VALUE_1]]) -> (complex<f64>) {
+// CHECK:               %[[BOX_DIMS_1:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_4]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
 // CHECK:               %[[BOX_DIMS_2:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_0]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
 // CHECK:               %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_1]]#0, %[[CONSTANT_0]] : index
 // CHECK:               %[[ADDI_0:.*]] = arith.addi %[[VAL_0]], %[[SUBI_0]] : index

>From 76ad596536e77cd8171abc7390ac6de2bdec79c3 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Tue, 2 Dec 2025 21:43:26 +0000
Subject: [PATCH 10/11] fix: integer starting val is 1

---
 flang/lib/Optimizer/Builder/FIRBuilder.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index 003883e49a2f2..c704ac79ae5f7 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -1677,7 +1677,7 @@ mlir::Value fir::factory::createOneValue(fir::FirOpBuilder &builder,
   if (mlir::isa<fir::LogicalType>(type) || type == i1)
     return builder.createConvert(loc, type, builder.createBool(loc, true));
   if (fir::isa_integer(type))
-    return builder.createIntegerConstant(loc, type, 0);
+    return builder.createIntegerConstant(loc, type, 1);
   if (fir::isa_real(type))
     return builder.createRealOneConstant(loc, type);
   if (fir::isa_complex(type)) {

>From 0ea48bd94f7ddbd32595fd945695deb93917f127 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Tue, 2 Dec 2025 22:49:13 +0000
Subject: [PATCH 11/11] test: update tests

---
 .../HLFIR/simplify-hlfir-intrinsics-product.fir  | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-product.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-product.fir
index aadba65760687..6d6b15f745b65 100644
--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-product.fir
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-product.fir
@@ -10,7 +10,7 @@ func.func @product_box_known_extents(%arg0: !fir.box<!fir.array<2x3xi32>>) -> !h
 // CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<2x3xi32>>) -> !hlfir.expr<2xi32> {
 // CHECK:           %[[CONSTANT_0:.*]] = arith.constant 0 : index
 // CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1 : index
-// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 0 : i32
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 1 : i32
 // CHECK:           %[[CONSTANT_3:.*]] = arith.constant 2 : index
 // CHECK:           %[[CONSTANT_4:.*]] = arith.constant 3 : index
 // CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[CONSTANT_3]] : (index) -> !fir.shape<1>
@@ -42,7 +42,7 @@ func.func @product_expr_known_extents(%arg0: !hlfir.expr<2x3xi32>) -> !hlfir.exp
 // CHECK-LABEL:   func.func @product_expr_known_extents(
 // CHECK-SAME:      %[[ARG0:.*]]: !hlfir.expr<2x3xi32>) -> !hlfir.expr<3xi32> {
 // CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
-// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 0 : i32
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1 : i32
 // CHECK:           %[[CONSTANT_2:.*]] = arith.constant 2 : index
 // CHECK:           %[[CONSTANT_3:.*]] = arith.constant 3 : index
 // CHECK:           %[[SHAPE_0:.*]] = fir.shape %[[CONSTANT_3]] : (index) -> !fir.shape<1>
@@ -58,6 +58,7 @@ func.func @product_expr_known_extents(%arg0: !hlfir.expr<2x3xi32>) -> !hlfir.exp
 // CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<3xi32>
 // CHECK:         }
 
+
 // box with unknown extent
 func.func @product_box_unknown_extent1(%arg0: !fir.box<!fir.array<?x3xcomplex<f64>>>) -> !hlfir.expr<3xcomplex<f64>> {
   %cst = arith.constant 1 : i32
@@ -95,6 +96,7 @@ func.func @product_box_unknown_extent1(%arg0: !fir.box<!fir.array<?x3xcomplex<f6
 // CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<3xcomplex<f64>>
 // CHECK:         }
 
+
 func.func @product_box_unknown_extent2(%arg0: !fir.box<!fir.array<?x3xcomplex<f64>>>) -> !hlfir.expr<?xcomplex<f64>> {
   %cst = arith.constant 2 : i32
   %res = hlfir.product %arg0 dim %cst : (!fir.box<!fir.array<?x3xcomplex<f64>>>, i32) -> !hlfir.expr<?xcomplex<f64>>
@@ -131,6 +133,7 @@ func.func @product_box_unknown_extent2(%arg0: !fir.box<!fir.array<?x3xcomplex<f6
 // CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<?xcomplex<f64>>
 // CHECK:         }
 
+
 // expr with unknown extent
 func.func @product_expr_unknown_extent1(%arg0: !hlfir.expr<?x3xf32>) -> !hlfir.expr<3xf32> {
   %cst = arith.constant 1 : i32
@@ -157,6 +160,7 @@ func.func @product_expr_unknown_extent1(%arg0: !hlfir.expr<?x3xf32>) -> !hlfir.e
 // CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<3xf32>
 // CHECK:         }
 
+
 func.func @product_expr_unknown_extent2(%arg0: !hlfir.expr<?x3xf32>) -> !hlfir.expr<?xf32> {
   %cst = arith.constant 2 : i32
   %res = hlfir.product %arg0 dim %cst : (!hlfir.expr<?x3xf32>, i32) -> !hlfir.expr<?xf32>
@@ -259,6 +263,7 @@ func.func @product_scalar_boxed_mask(%arg0: !hlfir.expr<?x3xf32>, %mask: !fir.bo
 // CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<3xf32>
 // CHECK:         }
 
+
 // array mask
 func.func @product_array_mask(%arg0: !hlfir.expr<?x3xf32>, %mask: !fir.box<!fir.array<?x3x!fir.logical<1>>>) -> !hlfir.expr<?xf32> {
   %cst = arith.constant 2 : i32
@@ -309,6 +314,7 @@ func.func @product_array_mask(%arg0: !hlfir.expr<?x3xf32>, %mask: !fir.box<!fir.
 // CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<?xf32>
 // CHECK:         }
 
+
 // array expr mask
 func.func @product_array_expr_mask(%arg0: !hlfir.expr<?x3xf32>, %mask: !hlfir.expr<?x3x!fir.logical<1>>) -> !hlfir.expr<?xf32> {
   %cst = arith.constant 2 : i32
@@ -368,6 +374,7 @@ func.func @product_unordered_reduction(%arg0: !hlfir.expr<2x3xf32>) -> !hlfir.ex
 // CHECK:           return %[[ELEMENTAL_0]] : !hlfir.expr<3xf32>
 // CHECK:         }
 
+
 // total 1d reduction
 func.func @product_total_1d_reduction(%arg0: !fir.box<!fir.array<3xi32>>) -> i32 {
   %cst = arith.constant 1 : i32
@@ -378,7 +385,7 @@ func.func @product_total_1d_reduction(%arg0: !fir.box<!fir.array<3xi32>>) -> i32
 // CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<3xi32>>) -> i32 {
 // CHECK:           %[[CONSTANT_0:.*]] = arith.constant 0 : index
 // CHECK:           %[[CONSTANT_1:.*]] = arith.constant 3 : index
-// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 0 : i32
+// CHECK:           %[[CONSTANT_2:.*]] = arith.constant 1 : i32
 // CHECK:           %[[CONSTANT_3:.*]] = arith.constant 1 : index
 // CHECK:           %[[DO_LOOP_0:.*]] = fir.do_loop %[[VAL_0:.*]] = %[[CONSTANT_3]] to %[[CONSTANT_1]] step %[[CONSTANT_3]] unordered iter_args(%[[VAL_1:.*]] = %[[CONSTANT_2]]) -> (i32) {
 // CHECK:             %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_0]] : (!fir.box<!fir.array<3xi32>>, index) -> (index, index, index)
@@ -400,7 +407,7 @@ func.func @product_total_2d_reduction(%arg0: !fir.box<!fir.array<?x3xi32>>) -> i
 // CHECK-LABEL:   func.func @product_total_2d_reduction(
 // CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<?x3xi32>>) -> i32 {
 // CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
-// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 0 : i32
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1 : i32
 // CHECK:           %[[CONSTANT_2:.*]] = arith.constant 3 : index
 // CHECK:           %[[CONSTANT_3:.*]] = arith.constant 0 : index
 // CHECK:           %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[ARG0]], %[[CONSTANT_3]] : (!fir.box<!fir.array<?x3xi32>>, index) -> (index, index, index)
@@ -422,6 +429,7 @@ func.func @product_total_2d_reduction(%arg0: !fir.box<!fir.array<?x3xi32>>) -> i
 // CHECK:           return %[[DO_LOOP_0]] : i32
 // CHECK:         }
 
+
 // negative: invalid dim==0
 func.func @product_invalid_dim0(%arg0: !hlfir.expr<2x3xi32>) -> !hlfir.expr<3xi32> {
   %cst = arith.constant 0 : i32