[flang-commits] [flang] 9893b26 - [Flang][OpenMP] Add support for integer multiplication reduction in worksharing-loop

Tue Aug 9 12:31:48 PDT 2022

Author: Dylan Fleming
Date: 2022-08-09T19:22:18Z
New Revision: 9893b26dfa75545404ab9d079a7b2de696f8cd4c

URL: https://github.com/llvm/llvm-project/commit/9893b26dfa75545404ab9d079a7b2de696f8cd4c
DIFF: https://github.com/llvm/llvm-project/commit/9893b26dfa75545404ab9d079a7b2de696f8cd4c.diff

LOG: [Flang][OpenMP] Add support for integer multiplication reduction in worksharing-loop

Adds support for reduction of multiplcation
by extending OpenMP.cpp::genOpenMPReduction()
and altering the identity constant emitted in
OpenMP.cpp::createReductionDelc()

This patch builds D130077 and as such,
only supports reductions for interger types in
worksharping loops.

Reviewed By: awarzynski

Differential Revision: https://reviews.llvm.org/D130767

Added: 
    flang/test/Lower/OpenMP/wsloop-reduction-int-add.f90
    flang/test/Lower/OpenMP/wsloop-reduction-int-mul.f90

Modified: 
    flang/lib/Lower/OpenMP.cpp

Removed: 
    flang/test/Lower/OpenMP/Todo/reduction-multiply.f90
    flang/test/Lower/OpenMP/wsloop-reduction-int.f90


################################################################################
diff  --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index ea8b239c75a7d..24f88f07f7450 100644

--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -783,28 +783,50 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   }
 }
 
+/// This function returns the identity value of the operator \p reductionOpName.
+/// For example:
+///    0 + x = x,
+///    1 * x = x
+static int getOperationIdentity(llvm::StringRef reductionOpName,
+                                mlir::Location loc) {
+  if (reductionOpName.contains("add"))
+    return 0;
+  else if (reductionOpName.contains("multiply"))
+    return 1;
+  TODO(loc, "Reduction of some intrinsic operators is not supported");
+}
+
+static Value getReductionInitValue(mlir::Location loc, mlir::Type type,
+                                   llvm::StringRef reductionOpName,
+                                   fir::FirOpBuilder &builder) {
+  return builder.create<mlir::arith::ConstantOp>(
+      loc, type,
+      builder.getIntegerAttr(type, getOperationIdentity(reductionOpName, loc)));
+}
+
 /// Creates an OpenMP reduction declaration and inserts it into the provided
 /// symbol table. The declaration has a constant initializer with the neutral
 /// value `initValue`, and the reduction combiner carried over from `reduce`.
 /// TODO: Generalize this for non-integer types, add atomic region.
-static omp::ReductionDeclareOp createReductionDecl(fir::FirOpBuilder &builder,
-                                                   llvm::StringRef name,
-                                                   mlir::Type type,
-                                                   mlir::Location loc) {
+static omp::ReductionDeclareOp createReductionDecl(
+    fir::FirOpBuilder &builder, llvm::StringRef reductionOpName,
+    Fortran::parser::DefinedOperator::IntrinsicOperator intrinsicOp,
+    mlir::Type type, mlir::Location loc) {
   OpBuilder::InsertionGuard guard(builder);
   mlir::ModuleOp module = builder.getModule();
   mlir::OpBuilder modBuilder(module.getBodyRegion());
-  auto decl = module.lookupSymbol<mlir::omp::ReductionDeclareOp>(name);
+  auto decl =
+      module.lookupSymbol<mlir::omp::ReductionDeclareOp>(reductionOpName);
   if (!decl)
-    decl = modBuilder.create<omp::ReductionDeclareOp>(loc, name, type);
+    decl =
+        modBuilder.create<omp::ReductionDeclareOp>(loc, reductionOpName, type);
   else
     return decl;
 
   builder.createBlock(&decl.initializerRegion(), decl.initializerRegion().end(),
                       {type}, {loc});
   builder.setInsertionPointToEnd(&decl.initializerRegion().back());
-  Value init = builder.create<mlir::arith::ConstantOp>(
-      loc, type, builder.getIntegerAttr(type, 0));
+  Value init = getReductionInitValue(loc, type, reductionOpName, builder);
   builder.create<omp::YieldOp>(loc, init);
 
   builder.createBlock(&decl.reductionRegion(), decl.reductionRegion().end(),
@@ -812,8 +834,20 @@ static omp::ReductionDeclareOp createReductionDecl(fir::FirOpBuilder &builder,
   builder.setInsertionPointToEnd(&decl.reductionRegion().back());
   mlir::Value op1 = decl.reductionRegion().front().getArgument(0);
   mlir::Value op2 = decl.reductionRegion().front().getArgument(1);
-  Value addRes = builder.create<mlir::arith::AddIOp>(loc, op1, op2);
-  builder.create<omp::YieldOp>(loc, addRes);
+
+  Value res;
+  switch (intrinsicOp) {
+  case Fortran::parser::DefinedOperator::IntrinsicOperator::Add:
+    res = builder.create<mlir::arith::AddIOp>(loc, op1, op2);
+    break;
+  case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply:
+    res = builder.create<mlir::arith::MulIOp>(loc, op1, op2);
+    break;
+  default:
+    TODO(loc, "Reduction of some intrinsic operators is not supported");
+  }
+
+  builder.create<omp::YieldOp>(loc, res);
   return decl;
 }
 
@@ -885,10 +919,18 @@ static std::string getReductionName(
     Fortran::parser::DefinedOperator::IntrinsicOperator intrinsicOp,
     mlir::Type ty) {
   std::string reductionName;
-  if (intrinsicOp == Fortran::parser::DefinedOperator::IntrinsicOperator::Add)
+
+  switch (intrinsicOp) {
+  case Fortran::parser::DefinedOperator::IntrinsicOperator::Add:
     reductionName = "add_reduction";
-  else
+    break;
+  case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply:
+    reductionName = "multiply_reduction";
+    break;
+  default:
     reductionName = "other_reduction";
+    break;
+  }
 
   return (llvm::Twine(reductionName) +
           (ty.isIntOrIndex() ? llvm::Twine("_i_") : llvm::Twine("_f_")) +
@@ -990,10 +1032,16 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
         const auto &intrinsicOp{
             std::get<Fortran::parser::DefinedOperator::IntrinsicOperator>(
                 redDefinedOp->u)};
-        if (intrinsicOp !=
-            Fortran::parser::DefinedOperator::IntrinsicOperator::Add)
+        switch (intrinsicOp) {
+        case Fortran::parser::DefinedOperator::IntrinsicOperator::Add:
+        case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply:
+          break;
+
+        default:
           TODO(currentLocation,
                "Reduction of some intrinsic operators is not supported");
+          break;
+        }
         for (const auto &ompObject : objectList.v) {
           if (const auto *name{
                   Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
@@ -1005,7 +1053,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
               if (redType.isIntOrIndex()) {
                 decl = createReductionDecl(
                     firOpBuilder, getReductionName(intrinsicOp, redType),
-                    redType, currentLocation);
+                    intrinsicOp, redType, currentLocation);
               } else {
                 TODO(currentLocation,
                      "Reduction of some types is not supported");
@@ -1604,8 +1652,8 @@ void Fortran::lower::genOpenMPDeclarativeConstruct(
 // Generate an OpenMP reduction operation. This implementation finds the chain :
 // load reduction var -> reduction_operation -> store reduction var and replaces
 // it with the reduction operation.
-// TODO: Currently assumes it is an integer addition reduction. Generalize this
-// for various reduction operation types.
+// TODO: Currently assumes it is an integer addition/multiplication reduction.
+// Generalize this for various reduction operation types.
 // TODO: Generate the reduction operation during lowering instead of creating
 // and removing operations since this is not a robust approach. Also, removing
 // ops in the builder (instead of a rewriter) is probably not the best approach.
@@ -1626,9 +1674,14 @@ void Fortran::lower::genOpenMPReduction(
         const auto &intrinsicOp{
             std::get<Fortran::parser::DefinedOperator::IntrinsicOperator>(
                 reductionOp->u)};
-        if (intrinsicOp !=
-            Fortran::parser::DefinedOperator::IntrinsicOperator::Add)
+
+        switch (intrinsicOp) {
+        case Fortran::parser::DefinedOperator::IntrinsicOperator::Add:
+        case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply:
+          break;
+        default:
           continue;
+        }
         for (const auto &ompObject : objectList.v) {
           if (const auto *name{
                   Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {

diff  --git a/flang/test/Lower/OpenMP/Todo/reduction-multiply.f90 b/flang/test/Lower/OpenMP/Todo/reduction-multiply.f90
deleted file mode 100644
index 3f7f2ef96f448..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/reduction-multiply.f90
+++ /dev/null
@@ -1,15 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: Reduction of some intrinsic operators is not supported
-subroutine reduction_multiply
-  integer :: x
-  !$omp parallel
-  !$omp do reduction(*:x)
-  do i=1, 100
-    x = x * i
-  end do
-  !$omp end do
-  !$omp end parallel
-  print *, x
-end subroutine

diff  --git a/flang/test/Lower/OpenMP/wsloop-reduction-int.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-int-add.f90
similarity index 100%
rename from flang/test/Lower/OpenMP/wsloop-reduction-int.f90
rename to flang/test/Lower/OpenMP/wsloop-reduction-int-add.f90

diff  --git a/flang/test/Lower/OpenMP/wsloop-reduction-int-mul.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-int-mul.f90
new file mode 100644
index 0000000000000..74245d11acbd8
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-int-mul.f90
@@ -0,0 +1,144 @@
+! RUN: bbc -emit-fir -fopenmp %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_I64_NAME:.*]] : i64 init {
+!CHECK: ^bb0(%{{.*}}: i64):
+!CHECK:  %[[C1_1:.*]] = arith.constant 1 : i64
+!CHECK:  omp.yield(%[[C1_1]] : i64)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64):
+!CHECK:  %[[RES:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i64
+!CHECK:  omp.yield(%[[RES]] : i64)
+!CHECK: }
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_I32_NAME:.*]] : i32 init {
+!CHECK: ^bb0(%{{.*}}: i32):
+!CHECK:  %[[C1_1:.*]] = arith.constant 1 : i32
+!CHECK:  omp.yield(%[[C1_1]] : i32)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
+!CHECK:  %[[RES:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i32
+!CHECK:  omp.yield(%[[RES]] : i32)
+!CHECK: }
+
+!CHECK-LABEL: func.func @_QPsimple_reduction
+!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
+!CHECK:  %[[C1_2:.*]] = arith.constant 1 : i32
+!CHECK:  fir.store %[[C1_2]] to %[[XREF]] : !fir.ref<i32>
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
+!CHECK:    %[[C10:.*]] = arith.constant 10 : i32
+!CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
+!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]])
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL]], %[[XREF]] : !fir.ref<i32>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+
+subroutine simple_reduction
+  integer :: x
+  x = 1
+  !$omp parallel
+  !$omp do reduction(*:x)
+  do i=1, 10
+    x = x * i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func.func @_QPsimple_reduction_switch_order
+!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
+!CHECK:  %[[C1_2:.*]] = arith.constant 1 : i32
+!CHECK:  fir.store %[[C1_2]] to %[[XREF]] : !fir.ref<i32>
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
+!CHECK:    %[[C10:.*]] = arith.constant 10 : i32
+!CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
+!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]])
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL]], %[[XREF]] : !fir.ref<i32>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+
+subroutine simple_reduction_switch_order
+  integer :: x
+  x = 1
+  !$omp parallel
+  !$omp do reduction(*:x)
+  do i=1, 10
+  x = i * x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func.func @_QPmultiple_reductions_same_type
+!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_same_typeEx"}
+!CHECK:  %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_same_typeEy"}
+!CHECK:  %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_same_typeEz"}
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[YREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[ZREF]] : !fir.ref<i32>) for  (%[[IVAL]]) : i32
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL1]], %[[XREF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL2]], %[[YREF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL3]], %[[ZREF]] : !fir.ref<i32>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+
+subroutine multiple_reductions_same_type
+  integer :: x,y,z
+  x = 1
+  y = 1
+  z = 1
+  !$omp parallel
+  !$omp do reduction(*:x,y,z)
+  do i=1, 10
+  x = x * i
+  y = y * i
+  z = z * i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func.func @_QPmultiple_reductions_
diff erent_type
+!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_
diff erent_typeEx"}
+!CHECK:  %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_
diff erent_typeEy"}
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>, @[[RED_I64_NAME]] -> %[[YREF]] : !fir.ref<i64>) for  (%[[IVAL:.*]]) : i32
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[C2_32:.*]] = arith.constant 2 : i32
+!CHECK:      omp.reduction %[[C2_32]], %[[XREF]] : !fir.ref<i32>
+!CHECK:      %[[C2_64:.*]] = arith.constant 2 : i64
+!CHECK:      omp.reduction %[[C2_64]], %[[YREF]] : !fir.ref<i64>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+
+subroutine multiple_reductions_
diff erent_type
+  integer :: x
+  integer(kind=8) :: y
+  !$omp parallel
+  !$omp do reduction(*:x,y)
+  do i=1, 10
+    x = x * 2_4
+    y = y * 2_8
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine