[flang-commits] [flang] 7d2d693 - [flang][openacc] Support array reduction for min in lowering

Mon Jun 26 09:20:15 PDT 2023

Author: Valentin Clement
Date: 2023-06-26T09:20:11-07:00
New Revision: 7d2d693cac998391ac1d0df1e98d655e21c24f53

URL: https://github.com/llvm/llvm-project/commit/7d2d693cac998391ac1d0df1e98d655e21c24f53
DIFF: https://github.com/llvm/llvm-project/commit/7d2d693cac998391ac1d0df1e98d655e21c24f53.diff

LOG: [flang][openacc] Support array reduction for min in lowering

Add loweirng support for array reduction with the
min operator.

Depends on D153650

Reviewed By: jeanPerier

Differential Revision: https://reviews.llvm.org/D153661

Added: 
    

Modified: 
    flang/lib/Lower/OpenACC.cpp
    flang/test/Lower/OpenACC/acc-reduction.f90

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index c206399e313d9..9b6dc8eccb1be 100644

--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -535,39 +535,40 @@ getReductionOperator(const Fortran::parser::AccReductionOperator &op) {
   llvm_unreachable("unexpected reduction operator");
 }
 
-/// Get the correct DenseElementsAttr attribute for the given init value.
-/// The verifier on the DenseElementsAttr is strict about the init value passed
-/// to it so it must matched the type.
-static mlir::DenseElementsAttr getDenseAttr(mlir::ShapedType shTy,
-                                            int64_t value) {
-  if (shTy.getElementType().isIntOrIndex()) {
-    if (auto intTy = mlir::dyn_cast<mlir::IntegerType>(shTy.getElementType())) {
-      if (intTy.getIntOrFloatBitWidth() == 8)
-        return mlir::DenseElementsAttr::get(shTy, static_cast<int8_t>(value));
-      if (intTy.getIntOrFloatBitWidth() == 16)
-        return mlir::DenseElementsAttr::get(shTy, static_cast<int16_t>(value));
-      if (intTy.getIntOrFloatBitWidth() == 32)
-        return mlir::DenseElementsAttr::get(shTy, static_cast<int32_t>(value));
-      if (intTy.getIntOrFloatBitWidth() == 64)
-        return mlir::DenseElementsAttr::get(shTy, value);
+/// Get the initial value for reduction operator.
+template <typename R>
+static R getReductionInitValue(mlir::acc::ReductionOperator op, mlir::Type ty) {
+  if (op == mlir::acc::ReductionOperator::AccMin) {
+    // min init value -> largest
+    if constexpr (std::is_same_v<R, llvm::APInt>) {
+      assert(ty.isIntOrIndex() && "expect integer or index type");
+      return llvm::APInt::getSignedMaxValue(ty.getIntOrFloatBitWidth());
+    }
+    if constexpr (std::is_same_v<R, llvm::APFloat>) {
+      auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty);
+      assert(floatTy && "expect float type");
+      return llvm::APFloat::getLargest(floatTy.getFloatSemantics(),
+                                       /*negative=*/false);
+    }
+  } else {
+    // +, ior, ieor init value -> 0
+    // * init value -> 1
+    int64_t value = (op == mlir::acc::ReductionOperator::AccMul) ? 1 : 0;
+    if constexpr (std::is_same_v<R, llvm::APInt>) {
+      assert(ty.isIntOrIndex() && "expect integer or index type");
+      return llvm::APInt(ty.getIntOrFloatBitWidth(), value, true);
     }
-  }
 
-  if (mlir::isa<mlir::FloatType>(shTy.getElementType())) {
-    if (auto intTy = mlir::dyn_cast<mlir::FloatType>(shTy.getElementType())) {
-      if (intTy.getIntOrFloatBitWidth() == 16)
-        return mlir::DenseElementsAttr::get(shTy, static_cast<float>(value));
-      if (intTy.getIntOrFloatBitWidth() == 32)
-        return mlir::DenseElementsAttr::get(shTy, static_cast<float>(value));
-      if (intTy.getIntOrFloatBitWidth() == 64)
-        return mlir::DenseElementsAttr::get(shTy, static_cast<double>(value));
-      if (intTy.getIntOrFloatBitWidth() == 128)
-        return mlir::DenseElementsAttr::get(shTy,
-                                            static_cast<long double>(value));
+    if constexpr (std::is_same_v<R, llvm::APFloat>) {
+      assert(mlir::isa<mlir::FloatType>(ty) && "expect float type");
+      auto floatTy = mlir::dyn_cast<mlir::FloatType>(ty);
+      return llvm::APFloat(floatTy.getFloatSemantics(), value);
     }
-  }
 
-  llvm_unreachable("unsupported dense attribute type");
+    if constexpr (std::is_same_v<R, int64_t>)
+      return value;
+  }
+  llvm_unreachable("OpenACC reduction unsupported type");
 }
 
 static mlir::Value genReductionInitValue(fir::FirOpBuilder &builder,
@@ -581,19 +582,34 @@ static mlir::Value genReductionInitValue(fir::FirOpBuilder &builder,
 
   // min -> largest
   if (op == mlir::acc::ReductionOperator::AccMin) {
-    if (ty.isIntOrIndex()) {
-      unsigned bits = ty.getIntOrFloatBitWidth();
+    if (ty.isIntOrIndex())
       return builder.create<mlir::arith::ConstantOp>(
           loc, ty,
-          builder.getIntegerAttr(
-              ty, llvm::APInt::getSignedMaxValue(bits).getSExtValue()));
-    }
-    if (auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty)) {
-      const llvm::fltSemantics &sem = floatTy.getFloatSemantics();
+          builder.getIntegerAttr(ty,
+                                 getReductionInitValue<llvm::APInt>(op, ty)));
+    if (auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty))
       return builder.create<mlir::arith::ConstantOp>(
           loc, ty,
-          builder.getFloatAttr(
-              ty, llvm::APFloat::getLargest(sem, /*negative=*/false)));
+          builder.getFloatAttr(ty,
+                               getReductionInitValue<llvm::APFloat>(op, ty)));
+    if (auto refTy = mlir::dyn_cast<fir::ReferenceType>(ty)) {
+      if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(refTy.getEleTy())) {
+        mlir::Type vecTy =
+            mlir::VectorType::get(seqTy.getShape(), seqTy.getEleTy());
+        auto shTy = vecTy.cast<mlir::ShapedType>();
+        if (seqTy.getEleTy().isIntOrIndex())
+          return builder.create<mlir::arith::ConstantOp>(
+              loc, vecTy,
+              mlir::DenseElementsAttr::get(
+                  shTy,
+                  getReductionInitValue<llvm::APInt>(op, seqTy.getEleTy())));
+        if (mlir::isa<mlir::FloatType>(seqTy.getEleTy()))
+          return builder.create<mlir::arith::ConstantOp>(
+              loc, vecTy,
+              mlir::DenseElementsAttr::get(
+                  shTy,
+                  getReductionInitValue<llvm::APFloat>(op, seqTy.getEleTy())));
+      }
     }
     // max -> least
   } else if (op == mlir::acc::ReductionOperator::AccMax) {
@@ -610,22 +626,31 @@ static mlir::Value genReductionInitValue(fir::FirOpBuilder &builder,
               ty, llvm::APFloat::getSmallest(floatTy.getFloatSemantics(),
                                              /*negative=*/true)));
   } else {
-    // 0 for +, ior, ieor
-    // 1 for *
-    int64_t initValue = op == mlir::acc::ReductionOperator::AccMul ? 1 : 0;
     if (ty.isIntOrIndex())
       return builder.create<mlir::arith::ConstantOp>(
-          loc, ty, builder.getIntegerAttr(ty, initValue));
+          loc, ty,
+          builder.getIntegerAttr(ty, getReductionInitValue<int64_t>(op, ty)));
     if (mlir::isa<mlir::FloatType>(ty))
       return builder.create<mlir::arith::ConstantOp>(
-          loc, ty, builder.getFloatAttr(ty, initValue));
+          loc, ty,
+          builder.getFloatAttr(ty, getReductionInitValue<int64_t>(op, ty)));
     if (auto refTy = mlir::dyn_cast<fir::ReferenceType>(ty)) {
       if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(refTy.getEleTy())) {
-        mlir::Type vecType =
+        mlir::Type vecTy =
             mlir::VectorType::get(seqTy.getShape(), seqTy.getEleTy());
-        mlir::DenseElementsAttr denseAttr =
-            getDenseAttr(vecType.cast<mlir::ShapedType>(), initValue);
-        return builder.create<mlir::arith::ConstantOp>(loc, vecType, denseAttr);
+        auto shTy = vecTy.cast<mlir::ShapedType>();
+        if (seqTy.getEleTy().isIntOrIndex())
+          return builder.create<mlir::arith::ConstantOp>(
+              loc, vecTy,
+              mlir::DenseElementsAttr::get(
+                  shTy,
+                  getReductionInitValue<llvm::APInt>(op, seqTy.getEleTy())));
+        if (mlir::isa<mlir::FloatType>(seqTy.getEleTy()))
+          return builder.create<mlir::arith::ConstantOp>(
+              loc, vecTy,
+              mlir::DenseElementsAttr::get(
+                  shTy,
+                  getReductionInitValue<llvm::APFloat>(op, seqTy.getEleTy())));
       }
     }
   }

diff  --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
index 56e80ff0dce6d..0a06bcbc5ffc8 100644
--- a/flang/test/Lower/OpenACC/acc-reduction.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -24,6 +24,32 @@
 ! CHECK:   acc.yield %[[SELECT]] : i32
 ! CHECK: }
 
+! CHECK-LABEL: acc.reduction.recipe @reduction_min_ref_100x10xf32 : !fir.ref<!fir.array<100x10xf32>> reduction_operator <min> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100x10xf32>>):
+! CHECK:   %[[CST:.*]] = arith.constant dense<3.40282347E+38> : vector<100x10xf32>
+! CHECK:   acc.yield %[[CST]] : vector<100x10xf32>
+! CHECK: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xf32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xf32>>):
+! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
+! CHECK:   %[[UB0:.*]] = arith.constant 9 : index
+! CHECK:   %[[STEP0:.*]] = arith.constant 1 : index
+! CHECK:   fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
+! CHECK:     %[[LB1:.*]] = arith.constant 0 : index
+! CHECK:     %[[UB1:.*]] = arith.constant 99 : index
+! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
+! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK:       %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<f32>
+! CHECK:       %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<f32>
+! CHECK:       %[[CMP:.*]] = arith.cmpf olt, %[[LOAD1]], %[[LOAD2]] : f32
+! CHECK:       %[[SELECT:.*]] = arith.select %[[CMP]], %[[LOAD1]], %[[LOAD2]] : f32
+! CHECK:       fir.store %[[SELECT]] to %[[COORD1]] : !fir.ref<f32>
+! CHECK:     }
+! CHECK:   }
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100x10xf32>>
+! CHECK: }
+
 ! CHECK-LABEL: acc.reduction.recipe @reduction_min_f32 : f32 reduction_operator <min> init {
 ! CHECK: ^bb0(%{{.*}}: f32):
 ! CHECK:   %[[INIT:.*]] = arith.constant 3.40282347E+38 : f32
@@ -35,6 +61,27 @@
 ! CHECK:   acc.yield %[[SELECT]] : f32
 ! CHECK: }
 
+! CHECK-LABEL: acc.reduction.recipe @reduction_min_ref_100xi32 : !fir.ref<!fir.array<100xi32>> reduction_operator <min> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100xi32>>):
+! CHECK:   %[[CST:.*]] = arith.constant dense<2147483647> : vector<100xi32>
+! CHECK:   acc.yield %[[CST]] : vector<100xi32>
+! CHECK: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100xi32>>):
+! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
+! CHECK:   %[[UB0:.*]] = arith.constant 99 : index
+! CHECK:   %[[STEP0:.*]] = arith.constant 1 : index
+! CHECK:   fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
+! CHECK:     %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]] : (!fir.ref<!fir.array<100xi32>>, index) -> !fir.ref<i32>
+! CHECK:     %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]] : (!fir.ref<!fir.array<100xi32>>, index) -> !fir.ref<i32>
+! CHECK:     %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
+! CHECK:     %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
+! CHECK:     %[[CMP:.*]] = arith.cmpi slt, %[[LOAD1]], %[[LOAD2]] : i32
+! CHECK:     %[[SELECT:.*]] = arith.select %[[CMP]], %[[LOAD1]], %[[LOAD2]] : i32
+! CHECK:     fir.store %[[SELECT]] to %[[COORD1]] : !fir.ref<i32>
+! CHECK:   }
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100xi32>>
+! CHECK: }
+
 ! CHECK-LABEL: acc.reduction.recipe @reduction_min_i32 : i32 reduction_operator <min> init {
 ! CHECK: ^bb0(%arg0: i32):
 ! CHECK:   %[[INIT:.*]] = arith.constant 2147483647 : i32
@@ -374,6 +421,22 @@ subroutine acc_reduction_min_int(a, b)
 ! CHECK:       %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<i32>) -> !fir.ref<i32> {name = "b"} 
 ! CHECK:       acc.loop reduction(@reduction_min_i32 -> %[[RED_B]] : !fir.ref<i32>)
 
+subroutine acc_reduction_min_int_array_1d(a, b)
+  integer :: a(100), b(100)
+  integer :: i
+
+  !$acc loop reduction(min:b)
+  do i = 1, 100
+    b(i) = min(b(i), a(i))
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPacc_reduction_min_int_array_1d(
+! CHECK-SAME: %{{.*}}: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "b"})
+! CHECK: %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[ARG1]] : !fir.ref<!fir.array<100xi32>>) bounds(%2) -> !fir.ref<!fir.array<100xi32>> {name = "b"} 
+! CHECK: acc.loop reduction(@reduction_min_ref_100xi32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100xi32>>)
+
+
 subroutine acc_reduction_min_float(a, b)
   real :: a(100), b
   integer :: i
@@ -389,6 +452,24 @@ subroutine acc_reduction_min_float(a, b)
 ! CHECK:       %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<f32>) -> !fir.ref<f32> {name = "b"} 
 ! CHECK:       acc.loop reduction(@reduction_min_f32 -> %[[RED_B]] : !fir.ref<f32>)
 
+subroutine acc_reduction_min_float_array2d(a, b)
+  real :: a(100, 10), b(100, 10)
+  integer :: i, j
+
+  !$acc loop reduction(min:b) collapse(2)
+  do i = 1, 100
+    do j = 1, 10
+      b(i, j) = min(b(i, j), a(i, j))
+    end do
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPacc_reduction_min_float_array2d(
+! CHECK-SAME: %{{.*}}: !fir.ref<!fir.array<100x10xf32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xf32>> {fir.bindc_name = "b"})
+! CHECK: %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[ARG1]] : !fir.ref<!fir.array<100x10xf32>>) bounds(%3, %5) -> !fir.ref<!fir.array<100x10xf32>> {name = "b"} 
+! CHECK: acc.loop reduction(@reduction_min_ref_100x10xf32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100x10xf32>>)
+! CHECK: attributes {collapse = 2 : i64}
+
 subroutine acc_reduction_max_int(a, b)
   integer :: a(100)
   integer :: i, b