[flang-commits] [flang] ff86ce6 - [flang][openacc] Add lowering support for 1d array reduction for add/mul operator

Thu Jun 22 13:09:54 PDT 2023

Author: Valentin Clement
Date: 2023-06-22T13:09:48-07:00
New Revision: ff86ce65f690f3680e2e21162524ba2c1894a18e

URL: https://github.com/llvm/llvm-project/commit/ff86ce65f690f3680e2e21162524ba2c1894a18e
DIFF: https://github.com/llvm/llvm-project/commit/ff86ce65f690f3680e2e21162524ba2c1894a18e.diff

LOG: [flang][openacc] Add lowering support for 1d array reduction for add/mul operator

Lower 1d array reduction for add and mul operator. Multi-dimensional arrays and
other operator will follow.

Reviewed By: razvanlupusoru

Differential Revision: https://reviews.llvm.org/D153448

Added: 
    

Modified: 
    flang/lib/Lower/OpenACC.cpp
    flang/test/Lower/OpenACC/acc-reduction.f90
    mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
    mlir/test/Dialect/OpenACC/invalid.mlir

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 63f848902ba31..bc5ab46aca726 100644

--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -535,6 +535,41 @@ getReductionOperator(const Fortran::parser::AccReductionOperator &op) {
   llvm_unreachable("unexpected reduction operator");
 }
 
+/// Get the correct DenseElementsAttr attribute for the given init value.
+/// The verifier on the DenseElementsAttr is strict about the init value passed
+/// to it so it must matched the type.
+static mlir::DenseElementsAttr getDenseAttr(mlir::ShapedType shTy,
+                                            int64_t value) {
+  if (shTy.getElementType().isIntOrIndex()) {
+    if (auto intTy = mlir::dyn_cast<mlir::IntegerType>(shTy.getElementType())) {
+      if (intTy.getIntOrFloatBitWidth() == 8)
+        return mlir::DenseElementsAttr::get(shTy, static_cast<int8_t>(value));
+      if (intTy.getIntOrFloatBitWidth() == 16)
+        return mlir::DenseElementsAttr::get(shTy, static_cast<int16_t>(value));
+      if (intTy.getIntOrFloatBitWidth() == 32)
+        return mlir::DenseElementsAttr::get(shTy, static_cast<int32_t>(value));
+      if (intTy.getIntOrFloatBitWidth() == 64)
+        return mlir::DenseElementsAttr::get(shTy, value);
+    }
+  }
+
+  if (mlir::isa<mlir::FloatType>(shTy.getElementType())) {
+    if (auto intTy = mlir::dyn_cast<mlir::FloatType>(shTy.getElementType())) {
+      if (intTy.getIntOrFloatBitWidth() == 16)
+        return mlir::DenseElementsAttr::get(shTy, static_cast<float>(value));
+      if (intTy.getIntOrFloatBitWidth() == 32)
+        return mlir::DenseElementsAttr::get(shTy, static_cast<float>(value));
+      if (intTy.getIntOrFloatBitWidth() == 64)
+        return mlir::DenseElementsAttr::get(shTy, static_cast<double>(value));
+      if (intTy.getIntOrFloatBitWidth() == 128)
+        return mlir::DenseElementsAttr::get(shTy,
+                                            static_cast<long double>(value));
+    }
+  }
+
+  llvm_unreachable("unsupported dense attribute type");
+}
+
 static mlir::Value genReductionInitValue(fir::FirOpBuilder &builder,
                                          mlir::Location loc, mlir::Type ty,
                                          mlir::acc::ReductionOperator op) {
@@ -584,6 +619,15 @@ static mlir::Value genReductionInitValue(fir::FirOpBuilder &builder,
     if (mlir::isa<mlir::FloatType>(ty))
       return builder.create<mlir::arith::ConstantOp>(
           loc, ty, builder.getFloatAttr(ty, initValue));
+    if (auto refTy = mlir::dyn_cast<fir::ReferenceType>(ty)) {
+      if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(refTy.getEleTy())) {
+        mlir::Type vecType =
+            mlir::VectorType::get(seqTy.getShape(), seqTy.getEleTy());
+        mlir::DenseElementsAttr denseAttr =
+            getDenseAttr(vecType.cast<mlir::ShapedType>(), initValue);
+        return builder.create<mlir::arith::ConstantOp>(loc, vecType, denseAttr);
+      }
+    }
   }
 
   TODO(loc, "reduction type");
@@ -592,6 +636,39 @@ static mlir::Value genReductionInitValue(fir::FirOpBuilder &builder,
 static mlir::Value genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
                                mlir::acc::ReductionOperator op, mlir::Type ty,
                                mlir::Value value1, mlir::Value value2) {
+
+  // Handle combiner on arrays.
+  if (auto refTy = mlir::dyn_cast<fir::ReferenceType>(ty)) {
+    if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(refTy.getEleTy())) {
+      if (seqTy.getShape().size() > 1)
+        TODO(loc, "OpenACC reduction on array with more than one dimension");
+      if (seqTy.hasDynamicExtents())
+        TODO(loc, "OpenACC reduction on array with dynamic extents");
+      mlir::Type idxTy = builder.getIndexType();
+      mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
+      auto lb = builder.create<mlir::arith::ConstantOp>(
+          loc, idxTy, builder.getIntegerAttr(idxTy, 0));
+      auto ub = builder.create<mlir::arith::ConstantOp>(
+          loc, idxTy, builder.getIntegerAttr(idxTy, seqTy.getShape()[0] - 1));
+      auto step = builder.create<mlir::arith::ConstantOp>(
+          loc, idxTy, builder.getIntegerAttr(idxTy, 1));
+      auto loop = builder.create<fir::DoLoopOp>(loc, lb, ub, step,
+                                                /*unordered=*/false);
+      builder.setInsertionPointToStart(loop.getBody());
+      auto addr1 = builder.create<fir::CoordinateOp>(
+          loc, refTy, value1, mlir::ValueRange{loop.getInductionVar()});
+      auto addr2 = builder.create<fir::CoordinateOp>(
+          loc, refTy, value2, mlir::ValueRange{loop.getInductionVar()});
+      auto load1 = builder.create<fir::LoadOp>(loc, addr1);
+      auto load2 = builder.create<fir::LoadOp>(loc, addr2);
+      auto combined =
+          genCombiner(builder, loc, op, seqTy.getEleTy(), load1, load2);
+      builder.create<fir::StoreOp>(loc, combined, addr1);
+      builder.setInsertionPointAfter(loop);
+      return value1;
+    }
+  }
+
   if (op == mlir::acc::ReductionOperator::AccAdd) {
     if (ty.isIntOrIndex())
       return builder.create<mlir::arith::AddIOp>(loc, value1, value2);
@@ -666,10 +743,16 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
         converter, builder, semanticsContext, stmtCtx, accObject,
         operandLocation, asFortran, bounds);
 
-    if (!fir::isa_trivial(fir::unwrapRefType(baseAddr.getType())))
+    mlir::Type reductionTy = fir::unwrapRefType(baseAddr.getType());
+    if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(reductionTy))
+      reductionTy = seqTy.getEleTy();
+
+    if (!fir::isa_trivial(reductionTy))
       TODO(operandLocation, "reduction with unsupported type");
 
     mlir::Type ty = fir::unwrapRefType(baseAddr.getType());
+    if (!fir::isa_trivial(ty))
+      ty = baseAddr.getType();
     std::string recipeName = fir::getTypeAsString(
         ty, converter.getKindMap(),
         ("reduction_" + stringifyReductionOperator(mlirOp)).str());

diff  --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
index 805cfaca36889..88fec63552b90 100644
--- a/flang/test/Lower/OpenACC/acc-reduction.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -56,6 +56,26 @@
 ! CHECK:   acc.yield %[[COMBINED]] : f32
 ! CHECK: }
 
+! CHECK-LABEL: acc.reduction.recipe @reduction_mul_ref_100xi32 : !fir.ref<!fir.array<100xi32>> reduction_operator <mul> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100xi32>>):
+! CHECK:   %[[CST:.*]] = arith.constant dense<1> : vector<100xi32>
+! CHECK:   acc.yield %[[CST]] : vector<100xi32>
+! CHECK: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100xi32>>):
+! CHECK:   %[[LB:.*]] = arith.constant 0 : index
+! CHECK:   %[[UB:.*]] = arith.constant 99 : index
+! CHECK:   %[[STEP:.*]] = arith.constant 1 : index
+! CHECK:   fir.do_loop %[[IV:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] {
+! CHECK:     %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV]] : (!fir.ref<!fir.array<100xi32>>, index) -> !fir.ref<i32>
+! CHECK:     %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV]] : (!fir.ref<!fir.array<100xi32>>, index) -> !fir.ref<i32>
+! CHECK:     %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
+! CHECK:     %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
+! CHECK:     %[[COMBINED:.*]] = arith.muli %[[LOAD1]], %[[LOAD2]] : i32
+! CHECK:     fir.store %[[COMBINED]] to %[[COORD1]] : !fir.ref<i32>
+! CHECK:   }
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100xi32>>
+! CHECK: }
+
 ! CHECK-LABEL: acc.reduction.recipe @reduction_mul_i32 : i32 reduction_operator <mul> init {
 ! CHECK: ^bb0(%{{.*}}: i32):
 ! CHECK:   %[[INIT:.*]] = arith.constant 1 : i32
@@ -66,6 +86,26 @@
 ! CHECK:   acc.yield %[[COMBINED]] : i32
 ! CHECK: }
 
+! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_100xf32 : !fir.ref<!fir.array<100xf32>> reduction_operator <add> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100xf32>>):
+! CHECK:   %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<100xf32>
+! CHECK:   acc.yield %[[CST]] : vector<100xf32>
+! CHECK: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100xf32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100xf32>>):
+! CHECK:   %[[LB:.*]] = arith.constant 0 : index
+! CHECK:   %[[UB:.*]] = arith.constant 99 : index
+! CHECK:   %[[STEP:.*]] = arith.constant 1 : index
+! CHECK:   fir.do_loop %[[IV:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] {
+! CHECK:   %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV]] : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+! CHECK:   %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV]] : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<f32>
+! CHECK:   %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<f32>
+! CHECK:   %[[COMBINED:.*]] = arith.addf %[[LOAD1]], %[[LOAD2]] fastmath<contract> : f32
+! CHECK:   fir.store %[[COMBINED]] to %[[COORD1]] : !fir.ref<f32>
+! CHECK:   }
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100xf32>>
+! CHECK: }
+
 ! CHECK-LABEL: acc.reduction.recipe @reduction_add_f32 : f32 reduction_operator <add> init {
 ! CHECK: ^bb0(%{{.*}}: f32):
 ! CHECK:   %[[INIT:.*]] = arith.constant 0.000000e+00 : f32
@@ -76,6 +116,26 @@
 ! CHECK:   acc.yield %[[COMBINED]] : f32
 ! CHECK: }
 
+! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_100xi32 : !fir.ref<!fir.array<100xi32>> reduction_operator <add> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100xi32>>):
+! CHECK:   %[[CST:.*]] = arith.constant dense<0> : vector<100xi32>
+! CHECK:   acc.yield %[[CST]] : vector<100xi32>
+! CHECK: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100xi32>>):
+! CHECK:   %[[LB:.*]] = arith.constant 0 : index
+! CHECK:   %[[UB:.*]] = arith.constant 99 : index
+! CHECK:   %[[STEP:.*]] = arith.constant 1 : index
+! CHECK:   fir.do_loop %[[IV:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] {
+! CHECK:     %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV]] : (!fir.ref<!fir.array<100xi32>>, index) -> !fir.ref<i32>
+! CHECK:     %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV]] : (!fir.ref<!fir.array<100xi32>>, index) -> !fir.ref<i32>
+! CHECK:     %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
+! CHECK:     %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
+! CHECK:     %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
+! CHECK:     fir.store %[[COMBINED]] to %[[COORD1]] : !fir.ref<i32>
+! CHECK:   }
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100xi32>>
+! CHECK: }
+
 ! CHECK-LABEL: acc.reduction.recipe @reduction_add_i32 : i32 reduction_operator <add> init {
 ! CHECK: ^bb0(%{{.*}}: i32):
 ! CHECK:   %[[INIT:.*]] = arith.constant 0 : i32
@@ -100,6 +160,20 @@ subroutine acc_reduction_add_int(a, b)
 ! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<i32> {fir.bindc_name = "b"})
 ! CHECK:       acc.loop reduction(@reduction_add_i32 -> %[[B]] : !fir.ref<i32>)
 
+subroutine acc_reduction_add_int_array_1d(a, b)
+  integer :: a(100)
+  integer :: i, b(100)
+
+  !$acc loop reduction(+:b)
+  do i = 1, 100
+    b(i) = b(i) + a(i)
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPacc_reduction_add_int_array_1d(
+! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "b"})
+! CHECK:       acc.loop reduction(@reduction_add_ref_100xi32 -> %[[B]] : !fir.ref<!fir.array<100xi32>>)
+
 subroutine acc_reduction_add_float(a, b)
   real :: a(100), b
   integer :: i
@@ -114,6 +188,20 @@ subroutine acc_reduction_add_float(a, b)
 ! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<f32> {fir.bindc_name = "b"})
 ! CHECK:       acc.loop reduction(@reduction_add_f32 -> %[[B]] : !fir.ref<f32>)
 
+subroutine acc_reduction_add_float_array_1d(a, b)
+  real :: a(100), b(100)
+  integer :: i
+
+  !$acc loop reduction(+:b)
+  do i = 1, 100
+    b(i) = b(i) + a(i)
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPacc_reduction_add_float_array_1d(
+! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "b"})
+! CHECK:       acc.loop reduction(@reduction_add_ref_100xf32 -> %[[B]] : !fir.ref<!fir.array<100xf32>>)
+
 subroutine acc_reduction_mul_int(a, b)
   integer :: a(100)
   integer :: i, b
@@ -128,6 +216,20 @@ subroutine acc_reduction_mul_int(a, b)
 ! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<i32> {fir.bindc_name = "b"})
 ! CHECK:       acc.loop reduction(@reduction_mul_i32 -> %[[B]] : !fir.ref<i32>)
 
+subroutine acc_reduction_mul_int_array_1d(a, b)
+  integer :: a(100)
+  integer :: i, b(100)
+
+  !$acc loop reduction(*:b)
+  do i = 1, 100
+    b(i) = b(i) * a(i)
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPacc_reduction_mul_int_array_1d(
+! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "b"})
+! CHECK:       acc.loop reduction(@reduction_mul_ref_100xi32 -> %[[B]] : !fir.ref<!fir.array<100xi32>>)
+
 subroutine acc_reduction_mul_float(a, b)
   real :: a(100), b
   integer :: i
@@ -142,6 +244,19 @@ subroutine acc_reduction_mul_float(a, b)
 ! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<f32> {fir.bindc_name = "b"})
 ! CHECK:       acc.loop reduction(@reduction_mul_f32 -> %[[B]] : !fir.ref<f32>)
 
+subroutine acc_reduction_mul_float_array_1d(a, b)
+  real :: a(100), b(100)
+  integer :: i
+
+  !$acc loop reduction(*:b)
+  do i = 1, 100
+    b(i) = b(i) * a(i)
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPacc_reduction_mul_float_array_1d(
+! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "b"})
+! CHECK:       acc.loop reduction(@reduction_mul_ref_100xf32 -> %[[B]] : !fir.ref<!fir.array<100xf32>>)
 
 subroutine acc_reduction_min_int(a, b)
   integer :: a(100)

diff  --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 48ae0c96f9f4e..d4052a0f947a5 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -436,7 +436,7 @@ LogicalResult acc::FirstprivateRecipeOp::verifyRegions() {
 LogicalResult acc::ReductionRecipeOp::verifyRegions() {
   if (failed(verifyInitLikeSingleArgRegion(*this, getInitRegion(), "reduction",
                                            "init", getType(),
-                                           /*verifyYield=*/true)))
+                                           /*verifyYield=*/false)))
     return failure();
 
   if (getCombinerRegion().empty())

diff  --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir
index 31e4b4bdf74da..a3d938658e0e0 100644
--- a/mlir/test/Dialect/OpenACC/invalid.mlir
+++ b/mlir/test/Dialect/OpenACC/invalid.mlir
@@ -418,15 +418,6 @@ acc.reduction.recipe @reduction_i64 : i64 reduction_operator<add> init {
 
 // -----
 
-// expected-error at +1 {{expects init region to yield a value of the reduction type}}
-acc.reduction.recipe @reduction_i64 : i64 reduction_operator<add> init {
-^bb0(%0: i64):
-  %1 = arith.constant 0 : i32
-  acc.yield %1 : i32
-} combiner {}
-
-// -----
-
 // expected-error at +1 {{expects non-empty combiner region}}
 acc.reduction.recipe @reduction_i64 : i64 reduction_operator<add> init {
 ^bb0(%0: i64):