[flang-commits] [flang] [flang][openacc] Added acc::RecipeInterface for getting alloca insertion point. (PR #68464)

Slava Zakharin via flang-commits flang-commits at lists.llvm.org
Fri Oct 6 19:59:06 PDT 2023


https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/68464

Conversion of `hlfir.assign` operations inside OpenACC recipe operations
may result in `fir.alloca` insertion. FIRBuilder can only handle
alloca insertion inside FuncOp's and outlineable OpenMP operations.
I added a simple interface for OpenACC recipe operations that have
executable code inside all their regions, and alloca may be inserted
into the entry blocks of those regions always.

With our current approach the OptimizedBufferization pass is supposed
to lower these `hlfir.assign` operations into loops, because there
should not be conflicts between lhs/rhs. The pass is currently
only working on FuncOp, and this is why it does not optimize
`hlfir.assign` inside the recipes. I will fix it in a separate commit.

Since we run OptimizedBufferization only at >O0, these changes
should still be useful.

Note that the OpenACC codegen that applies the recipes should be aware
of potential alloca operations and produce appropriate stack clean-ups.


>From 67479855c9902d5e27a75c8d92c11bdde91f985e Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Fri, 6 Oct 2023 18:23:35 -0700
Subject: [PATCH] [flang][openacc] Added acc::RecipeInterface for getting
 alloca insertion point.

Conversion of `hlfir.assign` operations inside OpenACC recipe operations
may result in `fir.alloca` insertion. FIRBuilder can only handle
alloca insertion inside FuncOp's and outlineable OpenMP operations.
I added a simple interface for OpenACC recipe operations that have
executable code inside all their regions, and alloca may be inserted
into the entry blocks of those regions always.

With our current approach the OptimizedBufferization pass is supposed
to lower these `hlfir.assign` operations into loops, because there
should not be conflicts between lhs/rhs. The pass is currently
only working on FuncOp, and this is why it does not optimize
`hlfir.assign` inside the recipes. I will fix it in a separate commit.

Since we run OptimizedBufferization only at >O0, these changes
should still be useful.

Note that the OpenACC codegen that applies the recipes should be aware
of potential alloca operations and produce appropriate stack clean-ups.
---
 flang/include/flang/Optimizer/HLFIR/Passes.td |  3 ++
 flang/lib/Optimizer/Builder/FIRBuilder.cpp    | 15 ++++--
 .../convert-assign-inside-openacc-recipe.fir  | 51 +++++++++++++++++++
 .../mlir/Dialect/OpenACC/CMakeLists.txt       |  2 +
 mlir/include/mlir/Dialect/OpenACC/OpenACC.h   |  2 +
 .../mlir/Dialect/OpenACC/OpenACCInterfaces.h  | 20 ++++++++
 .../mlir/Dialect/OpenACC/OpenACCOps.td        |  7 +--
 .../Dialect/OpenACC/OpenACCOpsInterfaces.td   | 41 +++++++++++++++
 mlir/lib/Dialect/OpenACC/CMakeLists.txt       |  1 +
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp       |  1 +
 10 files changed, 137 insertions(+), 6 deletions(-)
 create mode 100644 flang/test/HLFIR/convert-assign-inside-openacc-recipe.fir
 create mode 100644 mlir/include/mlir/Dialect/OpenACC/OpenACCInterfaces.h
 create mode 100644 mlir/include/mlir/Dialect/OpenACC/OpenACCOpsInterfaces.td

diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.td b/flang/include/flang/Optimizer/HLFIR/Passes.td
index c6e503c3a2760e8..dae96b3f767ea17 100644
--- a/flang/include/flang/Optimizer/HLFIR/Passes.td
+++ b/flang/include/flang/Optimizer/HLFIR/Passes.td
@@ -13,6 +13,9 @@ include "mlir/Pass/PassBase.td"
 def ConvertHLFIRtoFIR : Pass<"convert-hlfir-to-fir", "::mlir::ModuleOp"> {
   let summary = "Lower High-Level FIR to FIR";
   let constructor = "hlfir::createConvertHLFIRtoFIRPass()";
+  let dependentDialects = [
+    "mlir::func::FuncDialect",
+  ];
 }
 
 def BufferizeHLFIR : Pass<"bufferize-hlfir", "::mlir::ModuleOp"> {
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index a14f3106a723291..d058186758a952b 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -18,6 +18,7 @@
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
 #include "flang/Optimizer/Support/FatalError.h"
 #include "flang/Optimizer/Support/InternalNames.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringExtras.h"
@@ -200,9 +201,17 @@ mlir::Value fir::FirOpBuilder::allocateLocal(
 
 /// Get the block for adding Allocas.
 mlir::Block *fir::FirOpBuilder::getAllocaBlock() {
-  auto iface =
-      getRegion().getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>();
-  return iface ? iface.getAllocaBlock() : getEntryBlock();
+  if (auto ompOutlineableIface =
+          getRegion()
+              .getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>()) {
+    return ompOutlineableIface.getAllocaBlock();
+  }
+  if (auto accRecipeIface =
+          getRegion().getParentOfType<mlir::acc::RecipeInterface>()) {
+    return accRecipeIface.getAllocaBlock(getRegion());
+  }
+
+  return getEntryBlock();
 }
 
 mlir::Value fir::FirOpBuilder::createTemporaryAlloc(
diff --git a/flang/test/HLFIR/convert-assign-inside-openacc-recipe.fir b/flang/test/HLFIR/convert-assign-inside-openacc-recipe.fir
new file mode 100644
index 000000000000000..5a272bb95cc27c8
--- /dev/null
+++ b/flang/test/HLFIR/convert-assign-inside-openacc-recipe.fir
@@ -0,0 +1,51 @@
+// Check that hlfir.assign codegen is able to insert fir.alloca's inside
+// the regions of the OpenACC recipe.
+// RUN: fir-opt %s -convert-hlfir-to-fir | FileCheck %s
+
+acc.reduction.recipe @reduction_add_box_heap_Uxi32 : !fir.box<!fir.heap<!fir.array<?xi32>>> reduction_operator <add> init {
+^bb0(%arg0: !fir.box<!fir.heap<!fir.array<?xi32>>>):
+  %c0_i32 = arith.constant 0 : i32
+  %c0 = arith.constant 0 : index
+  %0:3 = fir.box_dims %arg0, %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+  %1 = fir.shape %0#1 : (index) -> !fir.shape<1>
+  %2 = fir.allocmem !fir.array<?xi32>, %0#1 {bindc_name = ".tmp", uniq_name = ""}
+  %3:2 = hlfir.declare %2(%1) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+  hlfir.assign %c0_i32 to %3#0 : i32, !fir.box<!fir.array<?xi32>>
+  acc.yield %3#0 : !fir.box<!fir.array<?xi32>>
+} combiner {
+^bb0(%arg0: !fir.box<!fir.heap<!fir.array<?xi32>>>, %arg1: !fir.box<!fir.heap<!fir.array<?xi32>>>, %arg2: index, %arg3: index, %arg4: index):
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %0 = arith.subi %arg3, %arg2 : index
+  %1 = arith.addi %0, %c1 : index
+  %2 = arith.divsi %1, %arg4 : index
+  %3 = arith.cmpi sgt, %2, %c0 : index
+  %4 = arith.select %3, %2, %c0 : index
+  %5 = fir.shape %4 : (index) -> !fir.shape<1>
+  %6 = hlfir.designate %arg0 (%arg2:%arg3:%arg4)  shape %5 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+  %7 = hlfir.designate %arg1 (%arg2:%arg3:%arg4)  shape %5 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+  %8 = fir.allocmem !fir.array<?xi32>, %4 {bindc_name = ".tmp.array", uniq_name = ""}
+  %9:2 = hlfir.declare %8(%5) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+  %true = arith.constant true
+  %c1_0 = arith.constant 1 : index
+  fir.do_loop %arg5 = %c1_0 to %4 step %c1_0 unordered {
+    %13 = hlfir.designate %6 (%arg5)  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+    %14 = hlfir.designate %7 (%arg5)  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+    %15 = fir.load %13 : !fir.ref<i32>
+    %16 = fir.load %14 : !fir.ref<i32>
+    %17 = arith.addi %15, %16 : i32
+    %18 = hlfir.designate %9#0 (%arg5)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    hlfir.assign %17 to %18 temporary_lhs : i32, !fir.ref<i32>
+  }
+  %10 = fir.undefined tuple<!fir.box<!fir.array<?xi32>>, i1>
+  %11 = fir.insert_value %10, %true, [1 : index] : (tuple<!fir.box<!fir.array<?xi32>>, i1>, i1) -> tuple<!fir.box<!fir.array<?xi32>>, i1>
+  %12 = fir.insert_value %11, %9#0, [0 : index] : (tuple<!fir.box<!fir.array<?xi32>>, i1>, !fir.box<!fir.array<?xi32>>) -> tuple<!fir.box<!fir.array<?xi32>>, i1>
+  hlfir.assign %9#0 to %arg0 : !fir.box<!fir.array<?xi32>>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+  acc.yield %arg0 : !fir.box<!fir.heap<!fir.array<?xi32>>>
+}
+// CHECK-LABEL:   acc.reduction.recipe @reduction_add_box_heap_Uxi32 : !fir.box<!fir.heap<!fir.array<?xi32>>> reduction_operator <add> init {
+// CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.box<!fir.heap<!fir.array<?xi32>>>):
+// CHECK:           %[[VAL_1:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
+// CHECK-LABEL:   } combiner {
+// CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.box<!fir.heap<!fir.array<?xi32>>>, %[[VAL_1:.*]]: !fir.box<!fir.heap<!fir.array<?xi32>>>, %[[VAL_2:.*]]: index, %[[VAL_3:.*]]: index, %[[VAL_4:.*]]: index):
+// CHECK:           %[[VAL_5:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
diff --git a/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt b/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt
index 02e903da90d0975..9dee1280db3ec1a 100644
--- a/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt
@@ -24,3 +24,5 @@ mlir_tablegen(OpenACCTypeInterfaces.h.inc -gen-type-interface-decls)
 mlir_tablegen(OpenACCTypeInterfaces.cpp.inc -gen-type-interface-defs)
 add_public_tablegen_target(MLIROpenACCTypeInterfacesIncGen)
 add_dependencies(mlir-headers MLIROpenACCTypeInterfacesIncGen)
+
+add_mlir_interface(OpenACCOpsInterfaces)
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
index ca5876fba674599..bc4680656d4cf62 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
@@ -33,6 +33,8 @@
 #define GET_ATTRDEF_CLASSES
 #include "mlir/Dialect/OpenACC/OpenACCOpsAttributes.h.inc"
 
+#include "mlir/Dialect/OpenACC/OpenACCInterfaces.h"
+
 #define GET_OP_CLASSES
 #include "mlir/Dialect/OpenACC/OpenACCOps.h.inc"
 
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCInterfaces.h b/mlir/include/mlir/Dialect/OpenACC/OpenACCInterfaces.h
new file mode 100644
index 000000000000000..5ce094969728f46
--- /dev/null
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCInterfaces.h
@@ -0,0 +1,20 @@
+//===- OpenACCInterfaces.h - MLIR Interfaces for OpenACC --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares OpenACC Interface implementations for the OpenACC dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_OPENACC_OPENACCINTERFACES_H_
+#define MLIR_DIALECT_OPENACC_OPENACCINTERFACES_H_
+
+#include "mlir/IR/OpDefinition.h"
+
+#include "mlir/Dialect/OpenACC/OpenACCOpsInterfaces.h.inc"
+
+#endif // MLIR_DIALECT_OPENACC_OPENACCINTERFACES_H_
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 60156cc334c72ec..10018c9fc7e27e8 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -21,6 +21,7 @@ include "mlir/IR/OpBase.td"
 include "mlir/IR/SymbolInterfaces.td"
 include "mlir/Dialect/OpenACC/OpenACCBase.td"
 include "mlir/Dialect/OpenACC/OpenACCOpsTypes.td"
+include "mlir/Dialect/OpenACC/OpenACCOpsInterfaces.td"
 include "mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td"
 include "mlir/Dialect/OpenACCMPCommon/Interfaces/AtomicInterfaces.td"
 
@@ -519,7 +520,7 @@ def OpenACC_UpdateHostOp : OpenACC_DataExitOp<"update_host",
 //===----------------------------------------------------------------------===//
 
 def OpenACC_PrivateRecipeOp : OpenACC_Op<"private.recipe",
-    [IsolatedFromAbove, Symbol]> {
+    [IsolatedFromAbove, Symbol, RecipeInterface]> {
   let summary = "privatization recipe";
 
   let description = [{
@@ -576,7 +577,7 @@ def OpenACC_PrivateRecipeOp : OpenACC_Op<"private.recipe",
 //===----------------------------------------------------------------------===//
 
 def OpenACC_FirstprivateRecipeOp : OpenACC_Op<"firstprivate.recipe",
-    [IsolatedFromAbove, Symbol]> {
+    [IsolatedFromAbove, Symbol, RecipeInterface]> {
   let summary = "privatization recipe";
 
   let description = [{
@@ -642,7 +643,7 @@ def OpenACC_FirstprivateRecipeOp : OpenACC_Op<"firstprivate.recipe",
 //===----------------------------------------------------------------------===//
 
 def OpenACC_ReductionRecipeOp : OpenACC_Op<"reduction.recipe",
-    [IsolatedFromAbove, Symbol]> {
+    [IsolatedFromAbove, Symbol, RecipeInterface]> {
   let summary = "reduction recipe";
 
   let description = [{
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsInterfaces.td
new file mode 100644
index 000000000000000..4c721a328bce7fd
--- /dev/null
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsInterfaces.td
@@ -0,0 +1,41 @@
+//===-- OpenACCOpsInterfaces.td - OpenACC op interfaces ----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the OpenACC Dialect interfaces definition file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OPENACC_OPS_INTERFACES
+#define OPENACC_OPS_INTERFACES
+
+include "mlir/IR/OpBase.td"
+
+def RecipeInterface : OpInterface<"RecipeInterface"> {
+    let description = [{
+      OpenACC operations with one or more regions holding executable code.
+    }];
+    let cppNamespace = "::mlir::acc";
+    let methods = [
+      InterfaceMethod<
+        /*description=*/[{
+          For the given region of the operation return the block
+          inside the region, where an alloca-like operation should be inserted.
+          The default implementation returns the entry block of the region.
+        }],
+        /*retTy*/"::mlir::Block *",
+        /*methodName=*/"getAllocaBlock",
+        /*args=*/(ins "::mlir::Region &":$region),
+        /*methodBody=*/"",
+        /*defaultImplementation=*/[{
+          return &region.front();
+        }]
+      >,
+    ];
+}
+
+#endif // OPENACC_OPS_INTERFACES
diff --git a/mlir/lib/Dialect/OpenACC/CMakeLists.txt b/mlir/lib/Dialect/OpenACC/CMakeLists.txt
index 49a1e216b9381f3..27285246ef997c6 100644
--- a/mlir/lib/Dialect/OpenACC/CMakeLists.txt
+++ b/mlir/lib/Dialect/OpenACC/CMakeLists.txt
@@ -8,6 +8,7 @@ add_mlir_dialect_library(MLIROpenACCDialect
   MLIROpenACCOpsIncGen
   MLIROpenACCEnumsIncGen
   MLIROpenACCAttributesIncGen
+  MLIROpenACCOpsInterfacesIncGen
   MLIROpenACCTypeInterfacesIncGen
 
   LINK_LIBS PUBLIC
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 4cb758623093b7c..98dd53f41ffed6b 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -23,6 +23,7 @@ using namespace acc;
 
 #include "mlir/Dialect/OpenACC/OpenACCOpsDialect.cpp.inc"
 #include "mlir/Dialect/OpenACC/OpenACCOpsEnums.cpp.inc"
+#include "mlir/Dialect/OpenACC/OpenACCOpsInterfaces.cpp.inc"
 #include "mlir/Dialect/OpenACC/OpenACCTypeInterfaces.cpp.inc"
 
 namespace {



More information about the flang-commits mailing list