[flang-commits] [clang] [flang] [flang][OpenMP] Add -f[no]-openmp-simd (PR #150269)

Thu Jul 24 03:01:08 PDT 2025

================
@@ -0,0 +1,360 @@
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Transforms/Utils.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/IRMapping.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include <llvm/Support/Debug.h>
+#include <mlir/IR/MLIRContext.h>
+#include <mlir/IR/Operation.h>
+#include <mlir/IR/PatternMatch.h>
+#include <mlir/Support/LLVM.h>
+
+namespace flangomp {
+#define GEN_PASS_DEF_SIMDONLYPASS
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+namespace {
+
+#define DEBUG_TYPE "omp-simd-only-pass"
+
+class SimdOnlyConversionPattern : public mlir::RewritePattern {
+public:
+  SimdOnlyConversionPattern(mlir::MLIRContext *ctx)
+      : mlir::RewritePattern(MatchAnyOpTypeTag{}, 1, ctx) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::Operation *op,
+                  mlir::PatternRewriter &rewriter) const override {
+    if (op->getDialect()->getNamespace() !=
+        mlir::omp::OpenMPDialect::getDialectNamespace())
+      return rewriter.notifyMatchFailure(op, "Not an OpenMP op");
+
+    if (auto simdOp = mlir::dyn_cast<mlir::omp::SimdOp>(op)) {
+      // Remove the composite attr given that the op will no longer be composite
+      if (simdOp.isComposite()) {
+        simdOp.setComposite(false);
+        return mlir::success();
+      }
+
+      return rewriter.notifyMatchFailure(op, "Op is a plain SimdOp");
+    }
+
+    if (op->getParentOfType<mlir::omp::SimdOp>())
+      return rewriter.notifyMatchFailure(op, "Op is nested under a SimdOp");
+
+    if (!mlir::isa<mlir::func::FuncOp>(op->getParentOp()) &&
+        (mlir::isa<mlir::omp::TerminatorOp>(op) ||
+         mlir::isa<mlir::omp::YieldOp>(op)))
+      return rewriter.notifyMatchFailure(op,
+                                         "Non top-level yield or terminator");
+
+    // SectionOp overrides its BlockArgInterface based on the parent SectionsOp.
+    // We need to make sure we only rewrite omp.sections once all omp.section
+    // ops inside it have been rewritten, otherwise the individual omp.section
+    // ops will not be able to access their argument values.
+    if (auto sectionsOp = mlir::dyn_cast<mlir::omp::SectionsOp>(op)) {
+      for (auto &opInSections : sectionsOp.getRegion().getOps())
+        if (mlir::isa<mlir::omp::SectionOp>(opInSections))
+          return rewriter.notifyMatchFailure(
+              op, "SectionsOp still contains individual sections");
+    }
+
+    LLVM_DEBUG(llvm::dbgs() << "SimdOnlyPass matched OpenMP op:\n");
+    LLVM_DEBUG(op->dump());
+
+    // Erase ops that don't need any special handling
+    if (mlir::isa<mlir::omp::BarrierOp>(op) ||
+        mlir::isa<mlir::omp::FlushOp>(op) ||
+        mlir::isa<mlir::omp::TaskyieldOp>(op) ||
+        mlir::isa<mlir::omp::MapBoundsOp>(op) ||
+        mlir::isa<mlir::omp::TargetEnterDataOp>(op) ||
+        mlir::isa<mlir::omp::TargetExitDataOp>(op) ||
+        mlir::isa<mlir::omp::TargetUpdateOp>(op) ||
+        mlir::isa<mlir::omp::OrderedOp>(op) ||
+        mlir::isa<mlir::omp::CancelOp>(op) ||
+        mlir::isa<mlir::omp::CancellationPointOp>(op) ||
+        mlir::isa<mlir::omp::ScanOp>(op) ||
+        mlir::isa<mlir::omp::TaskwaitOp>(op)) {
+      rewriter.eraseOp(op);
+      return mlir::success();
+    }
+
+    fir::FirOpBuilder builder(rewriter, op);
+    mlir::Location loc = op->getLoc();
+
+    auto inlineSimpleOp = [&](mlir::Operation *ompOp) -> bool {
+      if (!ompOp)
+        return false;
+
+      llvm::SmallVector<std::pair<mlir::Value, mlir::BlockArgument>>
+          blockArgsPairs;
+      if (auto iface =
+              mlir::dyn_cast<mlir::omp::BlockArgOpenMPOpInterface>(op)) {
+        iface.getBlockArgsPairs(blockArgsPairs);
+        for (auto [value, argument] : blockArgsPairs)
+          rewriter.replaceAllUsesWith(argument, value);
+      }
+
+      if (ompOp->getRegion(0).getBlocks().size() == 1) {
+        auto &block = *ompOp->getRegion(0).getBlocks().begin();
+        // This block is about to be removed so any arguments should have been
+        // replaced by now.
+        block.eraseArguments(0, block.getNumArguments());
+        if (auto terminatorOp =
+                mlir::dyn_cast<mlir::omp::TerminatorOp>(block.back())) {
+          rewriter.eraseOp(terminatorOp);
+        }
+        rewriter.inlineBlockBefore(&block, op, {});
+      } else {
+        // When dealing with multi-block regions we need to fix up the control
+        // flow
+        auto *origBlock = ompOp->getBlock();
+        auto *newBlock = rewriter.splitBlock(origBlock, ompOp->getIterator());
+        auto *innerFrontBlock = &ompOp->getRegion(0).getBlocks().front();
+        builder.setInsertionPointToEnd(origBlock);
+        builder.create<mlir::cf::BranchOp>(loc, innerFrontBlock);
+        // We are no longer passing any arguments to the first block in the
+        // region, so this should be safe to erase.
+        innerFrontBlock->eraseArguments(0, innerFrontBlock->getNumArguments());
+
+        for (auto &innerBlock : ompOp->getRegion(0).getBlocks()) {
+          // Remove now-unused block arguments
+          for (auto arg : innerBlock.getArguments()) {
+            if (arg.getUses().empty())
+              innerBlock.eraseArgument(arg.getArgNumber());
+          }
+          if (auto terminatorOp =
+                  mlir::dyn_cast<mlir::omp::TerminatorOp>(innerBlock.back())) {
+            builder.setInsertionPointToEnd(&innerBlock);
+            builder.create<mlir::cf::BranchOp>(loc, newBlock);
+            rewriter.eraseOp(terminatorOp);
+          }
+        }
+
+        rewriter.inlineRegionBefore(ompOp->getRegion(0), newBlock);
+      }
+
+      rewriter.eraseOp(op);
+      return true;
+    };
+
+    if (auto ompOp = mlir::dyn_cast<mlir::omp::LoopNestOp>(op)) {
+      mlir::Type indexType = builder.getIndexType();
+      mlir::Type oldIndexType = ompOp.getIVs().begin()->getType();
+      builder.setInsertionPoint(op);
+      auto one = builder.create<mlir::arith::ConstantIndexOp>(loc, 1);
+
+      // Generate the new loop nest
+      mlir::Block *nestBody = nullptr;
+      fir::DoLoopOp outerLoop = nullptr;
+      llvm::SmallVector<mlir::Value> loopIndArgs;
+      for (auto extent : ompOp.getLoopUpperBounds()) {
+        auto ub = builder.createConvert(loc, indexType, extent);
+        auto doLoop = builder.create<fir::DoLoopOp>(loc, one, ub, one, false);
+        nestBody = doLoop.getBody();
+        builder.setInsertionPointToStart(nestBody);
+        // Convert the indices to the type used inside the loop if needed
+        if (oldIndexType != indexType) {
+          auto convertedIndVar = builder.createConvert(
+              loc, oldIndexType, doLoop.getInductionVar());
+          loopIndArgs.push_back(convertedIndVar);
+        } else {
+          loopIndArgs.push_back(doLoop.getInductionVar());
+        }
+        if (!outerLoop)
+          outerLoop = doLoop;
+      }
+
+      // Move the omp loop body into the new loop body
+      if (ompOp->getRegion(0).getBlocks().size() == 1) {
+        auto &block = *ompOp->getRegion(0).getBlocks().begin();
+        rewriter.mergeBlocks(&block, nestBody, loopIndArgs);
+
+        // Find the new loop block terminator and move it before the end of the
+        // block
+        for (auto &loopBodyOp : nestBody->getOperations()) {
+          if (auto resultOp = mlir::dyn_cast<fir::ResultOp>(loopBodyOp)) {
+            rewriter.moveOpBefore(resultOp.getOperation(), &nestBody->back());
+            break;
+          }
+        }
+
+        // Remove omp.yield at the end of the loop body
+        if (auto yieldOp = mlir::dyn_cast<mlir::omp::YieldOp>(nestBody->back()))
+          rewriter.eraseOp(yieldOp);
+        // DoLoopOp does not support multi-block regions, thus if we're dealing
+        // with multiple blocks we need to convert it into basic control-flow
+        // operations.
+      } else {
+        rewriter.inlineRegionBefore(ompOp->getRegion(0), nestBody);
+        auto indVarArg = outerLoop->getRegion(0).front().getArgument(0);
+        // fir::convertDoLoopToCFG expects the induction variable to be of type
+        // index while the OpenMP LoopNestOp can have indices of different
+        // types. We need to work around it.
+        if (indVarArg.getType() != indexType)
+          indVarArg.setType(indexType);
+
+        auto loopBlocks =
+            fir::convertDoLoopToCFG(outerLoop, rewriter, false, false);
+        auto *conditionalBlock = loopBlocks.first;
+        auto *firstBlock =
+            conditionalBlock->getNextNode(); // Start of the loop body
+        auto *lastBlock = loopBlocks.second; // Incrementing induction variables
+
+        // If the induction variable is used within the loop and was originally
+        // not of type index, then we need to add a convert to the original type
+        // and replace its uses inside the loop body.
+        if (oldIndexType != indexType) {
+          indVarArg = conditionalBlock->getArgument(0);
+          builder.setInsertionPointToStart(firstBlock);
+          auto convertedIndVar =
+              builder.createConvert(loc, oldIndexType, indVarArg);
+          rewriter.replaceUsesWithIf(
+              indVarArg, convertedIndVar, [&](auto &use) -> bool {
+                return use.getOwner() != convertedIndVar.getDefiningOp() &&
+                       use.getOwner()->getBlock() != lastBlock;
+              });
+        }
+
+        // There might be an unused convert and an unused argument to the block.
+        // If so, remove them.
+        if (lastBlock->front().getUses().empty())
+          lastBlock->front().erase();
+        for (auto arg : lastBlock->getArguments()) {
+          if (arg.getUses().empty())
+            lastBlock->eraseArgument(arg.getArgNumber());
+        }
+
+        // Any loop blocks that end in omp.yield should just branch to
+        // lastBlock.
+        for (auto *loopBlock = conditionalBlock; loopBlock != lastBlock;
+             loopBlock = loopBlock->getNextNode()) {
+          if (auto yieldOp =
+                  mlir::dyn_cast<mlir::omp::YieldOp>(loopBlock->back())) {
+            builder.setInsertionPointToEnd(loopBlock);
+            builder.create<mlir::cf::BranchOp>(loc, lastBlock);
+            rewriter.eraseOp(yieldOp);
+          }
+        }
+      }
+
+      rewriter.eraseOp(ompOp);
+      return mlir::success();
+    }
+
+    if (auto mapInfoOp = mlir::dyn_cast<mlir::omp::MapInfoOp>(op)) {
+      mapInfoOp.getResult().replaceAllUsesWith(mapInfoOp.getVarPtr());
+      rewriter.eraseOp(mapInfoOp);
+      return mlir::success();
+    }
+
+    if (auto atomicReadOp = mlir::dyn_cast<mlir::omp::AtomicReadOp>(op)) {
+      builder.setInsertionPoint(op);
+      auto loadOp = builder.create<fir::LoadOp>(loc, atomicReadOp.getX());
+      auto storeOp = builder.create<fir::StoreOp>(loc, loadOp.getResult(),
+                                                  atomicReadOp.getV());
+      rewriter.replaceOp(op, storeOp);
+      return mlir::success();
+    }
+
+    if (auto atomicWriteOp = mlir::dyn_cast<mlir::omp::AtomicWriteOp>(op)) {
+      auto storeOp = builder.create<fir::StoreOp>(loc, atomicWriteOp.getExpr(),
+                                                  atomicWriteOp.getX());
+      rewriter.replaceOp(op, storeOp);
+      return mlir::success();
+    }
+
+    if (auto atomicUpdateOp = mlir::dyn_cast<mlir::omp::AtomicUpdateOp>(op)) {
+      assert("one block in region" &&
+             atomicUpdateOp.getRegion().getBlocks().size() == 1);
+      auto &block = *atomicUpdateOp.getRegion().getBlocks().begin();
+      builder.setInsertionPointToStart(&block);
+
+      // Load the update `x` operand and replace its uses within the block
+      auto loadOp = builder.create<fir::LoadOp>(loc, atomicUpdateOp.getX());
+      rewriter.replaceUsesWithIf(
+          block.getArgument(0), loadOp.getResult(),
+          [&](auto &op) { return op.get().getParentBlock() == █ });
+
+      // Store the result back into `x` in line with omp.yield semantics for
+      // this op
+      auto yieldOp = mlir::cast<mlir::omp::YieldOp>(block.back());
+      assert("only one yield operand" && yieldOp->getNumOperands() == 1);
+      builder.setInsertionPointAfter(yieldOp);
+      builder.create<fir::StoreOp>(loc, yieldOp->getOperand(0),
+                                   atomicUpdateOp.getX());
+      rewriter.eraseOp(yieldOp);
+
+      // Inline the final block and remove the now-empty op
+      assert("only one block argument" && block.getNumArguments() == 1);
+      block.eraseArguments(0, block.getNumArguments());
+      rewriter.inlineBlockBefore(&block, op, {});
+      rewriter.eraseOp(op);
+      return mlir::success();
+    }
+
+    if (auto threadPrivateOp = mlir::dyn_cast<mlir::omp::ThreadprivateOp>(op)) {
+      threadPrivateOp.getTlsAddr().replaceAllUsesWith(
+          threadPrivateOp.getSymAddr());
+      rewriter.eraseOp(threadPrivateOp);
+      return mlir::success();
+    }
+
+    if (inlineSimpleOp(mlir::dyn_cast<mlir::omp::TeamsOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::ParallelOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::SingleOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::SectionOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::SectionsOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::WsloopOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::LoopOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::TargetOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::TargetDataOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::DistributeOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::TaskOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::TaskloopOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::MasterOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::CriticalOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::OrderedRegionOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::AtomicCaptureOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::MaskedOp>(op)))
+      return mlir::success();
+
+    op->emitOpError("OpenMP operation left unhandled after SimdOnly pass.");
+    return mlir::failure();
+  }
+};
+
+class SimdOnlyPass : public flangomp::impl::SimdOnlyPassBase<SimdOnlyPass> {
+
+public:
+  SimdOnlyPass() = default;
+
+  void runOnOperation() override {
+    mlir::func::FuncOp func = getOperation();
----------------
tblah wrote:

What about OpenMP operations which live in module scope e.g. reduction and privatization declarations? These won't lead to any codegen on their own but I think it would be better for this pass to also erase them (it should generally be safe to remove the entire op so long as it is not used inside of an omp.simd clause).

https://github.com/llvm/llvm-project/pull/150269