[Mlir-commits] [mlir] de72243 - [openmp][mlir] Parallel reduction LLVM IR generation

Ethan Luis McDonough llvmlistbot at llvm.org
Tue Aug 15 22:58:56 PDT 2023


Author: Ethan Luis McDonough
Date: 2023-08-16T00:58:49-05:00
New Revision: de7224399acdf2c7e9d452a1e29e63b8ff92887c

URL: https://github.com/llvm/llvm-project/commit/de7224399acdf2c7e9d452a1e29e63b8ff92887c
DIFF: https://github.com/llvm/llvm-project/commit/de7224399acdf2c7e9d452a1e29e63b8ff92887c.diff

LOG: [openmp][mlir] Parallel reduction LLVM IR generation

This patch extends the existing WsLoop reduction IR generation to parallel blocks.

Reviewed By: kiranchandramohan

Differential Revision: https://reviews.llvm.org/D155157

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
    mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
    mlir/test/Target/LLVMIR/openmp-reduction.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index d096791490cce9..5bd26523593895 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -188,6 +188,10 @@ def ParallelOp : OpenMP_Op<"parallel", [
   let builders = [
     OpBuilder<(ins CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes)>
   ];
+  let extraClassDeclaration = [{
+    /// Returns the number of reduction variables.
+    unsigned getNumReductionVars() { return getReductionVars().size(); }
+  }];
   let assemblyFormat = [{
     oilist( `reduction` `(`
               custom<ReductionVarList>(

diff  --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 7e955b91e19c5e..cae307000b12c2 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -253,64 +253,6 @@ static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
   llvm_unreachable("Unknown ClauseProcBindKind kind");
 }
 
-/// Converts the OpenMP parallel operation to LLVM IR.
-static LogicalResult
-convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
-                   LLVM::ModuleTranslation &moduleTranslation) {
-  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
-  // relying on captured variables.
-  LogicalResult bodyGenStatus = success();
-
-  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
-    // Save the alloca insertion point on ModuleTranslation stack for use in
-    // nested regions.
-    LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
-        moduleTranslation, allocaIP);
-
-    // ParallelOp has only one region associated with it.
-    builder.restoreIP(codeGenIP);
-    convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,
-                        moduleTranslation, bodyGenStatus);
-  };
-
-  // TODO: Perform appropriate actions according to the data-sharing
-  // attribute (shared, private, firstprivate, ...) of variables.
-  // Currently defaults to shared.
-  auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
-                    llvm::Value &, llvm::Value &vPtr,
-                    llvm::Value *&replacementValue) -> InsertPointTy {
-    replacementValue = &vPtr;
-
-    return codeGenIP;
-  };
-
-  // TODO: Perform finalization actions for variables. This has to be
-  // called for variables which have destructors/finalizers.
-  auto finiCB = [&](InsertPointTy codeGenIP) {};
-
-  llvm::Value *ifCond = nullptr;
-  if (auto ifExprVar = opInst.getIfExprVar())
-    ifCond = moduleTranslation.lookupValue(ifExprVar);
-  llvm::Value *numThreads = nullptr;
-  if (auto numThreadsVar = opInst.getNumThreadsVar())
-    numThreads = moduleTranslation.lookupValue(numThreadsVar);
-  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
-  if (auto bind = opInst.getProcBindVal())
-    pbKind = getProcBindKind(*bind);
-  // TODO: Is the Parallel construct cancellable?
-  bool isCancellable = false;
-
-  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
-      findAllocaInsertPoint(builder, moduleTranslation);
-  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel(
-      ompLoc, allocaIP, bodyGenCB, privCB, finiCB, ifCond, numThreads, pbKind,
-      isCancellable));
-
-  return bodyGenStatus;
-}
-
 /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
@@ -383,26 +325,56 @@ convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
 
 /// Returns a reduction declaration that corresponds to the given reduction
 /// operation in the given container. Currently only supports reductions inside
-/// WsLoopOp but can be easily extended.
-static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container,
-                                                 omp::ReductionOp reduction) {
-  SymbolRefAttr reductionSymbol;
+/// WsLoopOp and ParallelOp but can be easily extended as long as the given
+/// construct implements getNumReductionVars.
+template <typename T>
+static std::optional<omp::ReductionDeclareOp>
+findReductionDeclInContainer(T container, omp::ReductionOp reduction) {
   for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) {
     if (container.getReductionVars()[i] != reduction.getAccumulator())
       continue;
-    reductionSymbol = cast<SymbolRefAttr>((*container.getReductions())[i]);
-    break;
+
+    SymbolRefAttr reductionSymbol =
+        cast<SymbolRefAttr>((*container.getReductions())[i]);
+    auto declareOp =
+        SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
+            container, reductionSymbol);
+    return declareOp;
+  }
+  return std::nullopt;
+}
+
+/// Searches for a reduction in a provided region and the regions
+/// it is nested in
+static omp::ReductionDeclareOp findReductionDecl(Operation &containerOp,
+                                                 omp::ReductionOp reduction) {
+  std::optional<omp::ReductionDeclareOp> declareOp = std::nullopt;
+  Operation *container = &containerOp;
+
+  while (!declareOp.has_value() && container) {
+    // Check if current container is supported for reductions searches
+    if (auto par = dyn_cast<omp::ParallelOp>(*container)) {
+      declareOp = findReductionDeclInContainer(par, reduction);
+    } else if (auto loop = dyn_cast<omp::WsLoopOp>(*container)) {
+      declareOp = findReductionDeclInContainer(loop, reduction);
+    } else {
+      break;
+    }
+
+    // See if we can search parent for reductions as well
+    container = containerOp.getParentOp();
   }
-  assert(reductionSymbol &&
+
+  assert(declareOp.has_value() &&
          "reduction operation must be associated with a declaration");
 
-  return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
-      container, reductionSymbol);
+  return *declareOp;
 }
 
 /// Populates `reductions` with reduction declarations used in the given loop.
+template <typename T>
 static void
-collectReductionDecls(omp::WsLoopOp loop,
+collectReductionDecls(T loop,
                       SmallVectorImpl<omp::ReductionDeclareOp> &reductions) {
   std::optional<ArrayAttr> attr = loop.getReductions();
   if (!attr)
@@ -760,6 +732,62 @@ convertOmpTaskgroupOp(omp::TaskGroupOp tgOp, llvm::IRBuilderBase &builder,
   return bodyGenStatus;
 }
 
+/// Allocate space for privatized reduction variables.
+template <typename T>
+static void
+allocReductionVars(T loop, llvm::IRBuilderBase &builder,
+                   LLVM::ModuleTranslation &moduleTranslation,
+                   llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
+                   SmallVector<omp::ReductionDeclareOp> &reductionDecls,
+                   SmallVector<llvm::Value *> &privateReductionVariables,
+                   DenseMap<Value, llvm::Value *> &reductionVariableMap) {
+  unsigned numReductions = loop.getNumReductionVars();
+  privateReductionVariables.reserve(numReductions);
+  if (numReductions != 0) {
+    llvm::IRBuilderBase::InsertPointGuard guard(builder);
+    builder.restoreIP(allocaIP);
+    for (unsigned i = 0; i < numReductions; ++i) {
+      llvm::Value *var = builder.CreateAlloca(
+          moduleTranslation.convertType(reductionDecls[i].getType()));
+      privateReductionVariables.push_back(var);
+      reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
+    }
+  }
+}
+
+/// Collect reduction info
+template <typename T>
+static void collectReductionInfo(
+    T loop, llvm::IRBuilderBase &builder,
+    LLVM::ModuleTranslation &moduleTranslation,
+    SmallVector<omp::ReductionDeclareOp> &reductionDecls,
+    SmallVector<OwningReductionGen> &owningReductionGens,
+    SmallVector<OwningAtomicReductionGen> &owningAtomicReductionGens,
+    const SmallVector<llvm::Value *> &privateReductionVariables,
+    SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> &reductionInfos) {
+  unsigned numReductions = loop.getNumReductionVars();
+
+  for (unsigned i = 0; i < numReductions; ++i) {
+    owningReductionGens.push_back(
+        makeReductionGen(reductionDecls[i], builder, moduleTranslation));
+    owningAtomicReductionGens.push_back(
+        makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
+  }
+
+  // Collect the reduction information.
+  reductionInfos.reserve(numReductions);
+  for (unsigned i = 0; i < numReductions; ++i) {
+    llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
+    if (owningAtomicReductionGens[i])
+      atomicGen = owningAtomicReductionGens[i];
+    llvm::Value *variable =
+        moduleTranslation.lookupValue(loop.getReductionVars()[i]);
+    reductionInfos.push_back(
+        {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
+         privateReductionVariables[i], owningReductionGens[i], atomicGen});
+  }
+}
+
 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
@@ -788,21 +816,10 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);
 
-  // Allocate space for privatized reduction variables.
   SmallVector<llvm::Value *> privateReductionVariables;
   DenseMap<Value, llvm::Value *> reductionVariableMap;
-  unsigned numReductions = loop.getNumReductionVars();
-  privateReductionVariables.reserve(numReductions);
-  if (numReductions != 0) {
-    llvm::IRBuilderBase::InsertPointGuard guard(builder);
-    builder.restoreIP(allocaIP);
-    for (unsigned i = 0; i < numReductions; ++i) {
-      llvm::Value *var = builder.CreateAlloca(
-          moduleTranslation.convertType(reductionDecls[i].getType()));
-      privateReductionVariables.push_back(var);
-      reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
-    }
-  }
+  allocReductionVars(loop, builder, moduleTranslation, allocaIP, reductionDecls,
+                     privateReductionVariables, reductionVariableMap);
 
   // Store the mapping between reduction variables and their private copies on
   // ModuleTranslation stack. It can be then recovered when translating
@@ -813,7 +830,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   // Before the loop, store the initial values of reductions into reduction
   // variables. Although this could be done after allocas, we don't want to mess
   // up with the alloca insertion point.
-  for (unsigned i = 0; i < numReductions; ++i) {
+  for (unsigned i = 0; i < loop.getNumReductionVars(); ++i) {
     SmallVector<llvm::Value *> phis;
     if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
                                        "omp.reduction.neutral", builder,
@@ -908,33 +925,17 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   builder.restoreIP(afterIP);
 
   // Process the reductions if required.
-  if (numReductions == 0)
+  if (loop.getNumReductionVars() == 0)
     return success();
 
   // Create the reduction generators. We need to own them here because
   // ReductionInfo only accepts references to the generators.
   SmallVector<OwningReductionGen> owningReductionGens;
   SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
-  for (unsigned i = 0; i < numReductions; ++i) {
-    owningReductionGens.push_back(
-        makeReductionGen(reductionDecls[i], builder, moduleTranslation));
-    owningAtomicReductionGens.push_back(
-        makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
-  }
-
-  // Collect the reduction information.
   SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
-  reductionInfos.reserve(numReductions);
-  for (unsigned i = 0; i < numReductions; ++i) {
-    llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
-    if (owningAtomicReductionGens[i])
-      atomicGen = owningAtomicReductionGens[i];
-    llvm::Value *variable =
-        moduleTranslation.lookupValue(loop.getReductionVars()[i]);
-    reductionInfos.push_back(
-        {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
-         privateReductionVariables[i], owningReductionGens[i], atomicGen});
-  }
+  collectReductionInfo(loop, builder, moduleTranslation, reductionDecls,
+                       owningReductionGens, owningAtomicReductionGens,
+                       privateReductionVariables, reductionInfos);
 
   // The call to createReductions below expects the block to have a
   // terminator. Create an unreachable instruction to serve as terminator
@@ -954,6 +955,128 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   return success();
 }
 
+/// Converts the OpenMP parallel operation to LLVM IR.
+static LogicalResult
+convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
+                   LLVM::ModuleTranslation &moduleTranslation) {
+  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
+  // relying on captured variables.
+  LogicalResult bodyGenStatus = success();
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+
+  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
+    // Collect reduction declarations
+    SmallVector<omp::ReductionDeclareOp> reductionDecls;
+    collectReductionDecls(opInst, reductionDecls);
+
+    // Allocate reduction vars
+    SmallVector<llvm::Value *> privateReductionVariables;
+    DenseMap<Value, llvm::Value *> reductionVariableMap;
+    allocReductionVars(opInst, builder, moduleTranslation, allocaIP,
+                       reductionDecls, privateReductionVariables,
+                       reductionVariableMap);
+
+    // Store the mapping between reduction variables and their private copies on
+    // ModuleTranslation stack. It can be then recovered when translating
+    // omp.reduce operations in a separate call.
+    LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
+        moduleTranslation, reductionVariableMap);
+
+    // Initialize reduction vars
+    builder.restoreIP(allocaIP);
+    for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
+      SmallVector<llvm::Value *> phis;
+      if (failed(inlineConvertOmpRegions(
+              reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
+              builder, moduleTranslation, &phis)))
+        bodyGenStatus = failure();
+      assert(phis.size() == 1 &&
+             "expected one value to be yielded from the "
+             "reduction neutral element declaration region");
+      builder.restoreIP(allocaIP);
+      builder.CreateStore(phis[0], privateReductionVariables[i]);
+    }
+
+    // Save the alloca insertion point on ModuleTranslation stack for use in
+    // nested regions.
+    LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+        moduleTranslation, allocaIP);
+
+    // ParallelOp has only one region associated with it.
+    builder.restoreIP(codeGenIP);
+    auto regionBlock =
+        convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,
+                            moduleTranslation, bodyGenStatus);
+
+    // Process the reductions if required.
+    if (opInst.getNumReductionVars() > 0) {
+      // Collect reduction info
+      SmallVector<OwningReductionGen> owningReductionGens;
+      SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
+      SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
+      collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,
+                           owningReductionGens, owningAtomicReductionGens,
+                           privateReductionVariables, reductionInfos);
+
+      // Move to region cont block
+      builder.SetInsertPoint(regionBlock->getTerminator());
+
+      // Generate reductions from info
+      llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
+      builder.SetInsertPoint(tempTerminator);
+
+      llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
+          ompBuilder->createReductions(builder.saveIP(), allocaIP,
+                                       reductionInfos, false);
+      if (!contInsertPoint.getBlock()) {
+        bodyGenStatus = opInst->emitOpError() << "failed to convert reductions";
+        return;
+      }
+
+      tempTerminator->eraseFromParent();
+      builder.restoreIP(contInsertPoint);
+    }
+  };
+
+  // TODO: Perform appropriate actions according to the data-sharing
+  // attribute (shared, private, firstprivate, ...) of variables.
+  // Currently defaults to shared.
+  auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
+                    llvm::Value &, llvm::Value &vPtr,
+                    llvm::Value *&replacementValue) -> InsertPointTy {
+    replacementValue = &vPtr;
+
+    return codeGenIP;
+  };
+
+  // TODO: Perform finalization actions for variables. This has to be
+  // called for variables which have destructors/finalizers.
+  auto finiCB = [&](InsertPointTy codeGenIP) {};
+
+  llvm::Value *ifCond = nullptr;
+  if (auto ifExprVar = opInst.getIfExprVar())
+    ifCond = moduleTranslation.lookupValue(ifExprVar);
+  llvm::Value *numThreads = nullptr;
+  if (auto numThreadsVar = opInst.getNumThreadsVar())
+    numThreads = moduleTranslation.lookupValue(numThreadsVar);
+  auto pbKind = llvm::omp::OMP_PROC_BIND_default;
+  if (auto bind = opInst.getProcBindVal())
+    pbKind = getProcBindKind(*bind);
+  // TODO: Is the Parallel construct cancellable?
+  bool isCancellable = false;
+
+  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+      findAllocaInsertPoint(builder, moduleTranslation);
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+
+  builder.restoreIP(
+      ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
+                                 ifCond, numThreads, pbKind, isCancellable));
+
+  return bodyGenStatus;
+}
+
 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
@@ -1286,15 +1409,20 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
 /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
 /// mapping between reduction variables and their private equivalents to have
 /// been stored on the ModuleTranslation stack. Currently only supports
-/// reduction within WsLoopOp, but can be easily extended.
+/// reduction within WsLoopOp and ParallelOp, but can be easily extended.
 static LogicalResult
 convertOmpReductionOp(omp::ReductionOp reductionOp,
                       llvm::IRBuilderBase &builder,
                       LLVM::ModuleTranslation &moduleTranslation) {
   // Find the declaration that corresponds to the reduction op.
-  auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>();
-  omp::ReductionDeclareOp declaration =
-      findReductionDecl(reductionContainer, reductionOp);
+  omp::ReductionDeclareOp declaration;
+  Operation *reductionParent = reductionOp->getParentOp();
+  if (dyn_cast<omp::ParallelOp>(reductionParent) ||
+      dyn_cast<omp::WsLoopOp>(reductionParent)) {
+    declaration = findReductionDecl(*reductionParent, reductionOp);
+  } else {
+    llvm_unreachable("Unhandled reduction container");
+  }
   assert(declaration && "could not find reduction declaration");
 
   // Retrieve the mapping between reduction variables and their private
@@ -1302,11 +1430,13 @@ convertOmpReductionOp(omp::ReductionOp reductionOp,
   const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr;
   moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>(
       [&](const OpenMPVarMappingStackFrame &frame) {
-        reductionVariableMap = &frame.mapping;
-        return WalkResult::interrupt();
+        if (frame.mapping.contains(reductionOp.getAccumulator())) {
+          reductionVariableMap = &frame.mapping;
+          return WalkResult::interrupt();
+        }
+        return WalkResult::advance();
       });
   assert(reductionVariableMap && "couldn't find private reduction variables");
-
   // Translate the reduction operation by emitting the body of the corresponding
   // reduction declaration.
   Region &reductionRegion = declaration.getReductionRegion();

diff  --git a/mlir/test/Target/LLVMIR/openmp-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
index c17f39febd83d8..93ab578df9e4e8 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
@@ -416,3 +416,146 @@ llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) {
 // CHECK: define internal void @[[REDFUNC]]
 // CHECK: fadd float
 // CHECK: fmul float
+
+// -----
+
+omp.reduction.declare @add_f32 : f32
+init {
+^bb0(%arg: f32):
+  %0 = llvm.mlir.constant(0.0 : f32) : f32
+  omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+  %1 = llvm.fadd %arg0, %arg1 : f32
+  omp.yield (%1 : f32)
+}
+atomic {
+^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
+  %2 = llvm.load %arg3 : !llvm.ptr -> f32
+  llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
+  omp.yield
+}
+
+// CHECK-LABEL: @simple_reduction_parallel
+llvm.func @simple_reduction_parallel() {
+  %c1 = llvm.mlir.constant(1 : i32) : i32
+  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
+  omp.parallel reduction(@add_f32 -> %0 : !llvm.ptr) {
+    %1 = llvm.mlir.constant(2.0 : f32) : f32
+    omp.reduction %1, %0 : f32, !llvm.ptr
+    omp.terminator
+  }
+  llvm.return
+}
+
+// Call to the outlined function.
+// CHECK: call void {{.*}} @__kmpc_fork_call
+// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Outlined function.
+// CHECK: define internal void @[[OUTLINED]]
+
+// Private reduction variable and its initialization.
+// CHECK: %[[PRIVATE:.+]] = alloca float
+// CHECK: store float 0.000000e+00, ptr %[[PRIVATE]]
+
+// Update of the private variable
+// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
+// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
+// CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]]
+
+// Call to the reduction function.
+// CHECK: call i32 @__kmpc_reduce
+// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Atomic reduction.
+// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
+// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]]
+
+// Non-atomic reduction:
+// CHECK: fadd float
+// CHECK: call void @__kmpc_end_reduce
+// CHECK: br label %[[FINALIZE:.+]]
+
+// CHECK: [[FINALIZE]]:
+
+// Reduction function.
+// CHECK: define internal void @[[REDFUNC]]
+// CHECK: fadd float
+
+// -----
+
+omp.reduction.declare @add_i32 : i32
+init {
+^bb0(%arg: i32):
+  %0 = llvm.mlir.constant(0 : i32) : i32
+  omp.yield (%0 : i32)
+}
+combiner {
+^bb1(%arg0: i32, %arg1: i32):
+  %1 = llvm.add %arg0, %arg1 : i32
+  omp.yield (%1 : i32)
+}
+atomic {
+^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
+  %2 = llvm.load %arg3 : !llvm.ptr -> i32
+  llvm.atomicrmw add %arg2, %2 monotonic : !llvm.ptr, i32
+  omp.yield
+}
+
+// CHECK-LABEL: @parallel_nested_workshare_reduction
+llvm.func @parallel_nested_workshare_reduction(%ub : i64) {
+  %c1 = llvm.mlir.constant(1 : i32) : i32
+  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
+
+  %lb = llvm.mlir.constant(1 : i64) : i64
+  %step = llvm.mlir.constant(1 : i64) : i64
+  
+  omp.parallel reduction(@add_i32 -> %0 : !llvm.ptr) {
+    omp.wsloop for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+      %ival = llvm.trunc %iv : i64 to i32
+      omp.reduction %ival, %0 : i32, !llvm.ptr
+      omp.yield
+    }
+    omp.terminator
+  }
+
+  llvm.return
+}
+
+// Call to the outlined function.
+// CHECK: call void {{.*}} @__kmpc_fork_call
+// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Outlined function.
+// CHECK: define internal void @[[OUTLINED]]
+
+// Private reduction variable and its initialization.
+// CHECK: %[[PRIVATE:[0-9]+]] = alloca i32
+// CHECK: store i32 0, ptr %[[PRIVATE]]
+
+// Loop exit:
+// CHECK: call void @__kmpc_barrier
+
+// Call to the reduction function.
+// CHECK: call i32 @__kmpc_reduce
+// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Atomic reduction:
+// CHECK: %[[PARTIAL:.+]] = load i32, ptr %[[PRIVATE]]
+// CHECK: atomicrmw add ptr %{{.*}}, i32 %[[PARTIAL]]
+
+// Non-atomic reduction:
+// CHECK: add i32
+// CHECK: call void @__kmpc_end_reduce
+
+// Update of the private variable using the reduction region
+// (the body block currently comes after all the other blocks).
+// CHECK: %[[PARTIAL:.+]] = load i32, ptr %[[PRIVATE]]
+// CHECK: %[[UPDATED:.+]] = add i32 %[[PARTIAL]], {{.*}}
+// CHECK: store i32 %[[UPDATED]], ptr %[[PRIVATE]]
+
+// Reduction function.
+// CHECK: define internal void @[[REDFUNC]]
+// CHECK: add i32


        


More information about the Mlir-commits mailing list