[flang-commits] [flang] a023fd2 - [Flang][CodeGen] Rematerialize pure ops with impure implementation (#196352)

Wed May 13 06:29:52 PDT 2026

Author: Tom Eccles
Date: 2026-05-13T14:29:47+01:00
New Revision: a023fd290ec084663b4dc482dc0a1d77940628ea

URL: https://github.com/llvm/llvm-project/commit/a023fd290ec084663b4dc482dc0a1d77940628ea
DIFF: https://github.com/llvm/llvm-project/commit/a023fd290ec084663b4dc482dc0a1d77940628ea.diff

LOG: [Flang][CodeGen] Rematerialize pure ops with impure implementation (#196352)

Some NoMemoryEffect FIR ops have an impure implementation in the LLVM
dialect which generates allocas.

Some OpenMP operations have bodies that will be outlined and may be run
on different threads. These OpenMP operations are not IsolatedFromAbove.
Values used directly inside of the regions of these operations are
implicitly shared between threads.

It is important that the allocas generated to implement NoMemoryEffect
FIR ops do not lead to accidental sharing between threads. This commit
adds a fir codegen pass which avoids this by rematerializing these FIR
operations inside of outlinable OpenMP regions.

The problem is not inherent to the OpenMP dialect: every time an MLIR
value is defined above an MLIR region where it is used, the use is
semantically a copy of the value into that region. Making this copy
explicit for all uses would not be practical. What this pass is doing is
to copy (by rematerializing) only these values where that is relevant.

Only operations implementing OutlinableOpenMPOpInterface are handled in
this pass because these were the operations which already had special
handling for alloca placement in flang codegen. The pass is designed to
be easy to extent to other operations containing region boundaries which
require this re-materialization.

Similarly, the pass should be easy to extend to other operations which
should be rematerialized at these region boundaries.

Some fir.embox/fir.rebox operations are not speculatable; these are not
susceptible to the original bug because CSE will not hoist them outside
of the outlinable regions. I chose to support these anyway for
completeness and because of the mlir copying semantics described above.
It is safe to speculate these operations here because the rematerialized
operations are rematerialized immediately before where the values would
be used. Furthermore, if a value is used in a nested outlinable region,
that value must always dominante that nested region by SSA construction.

RFC:
https://discourse.llvm.org/t/rfc-make-outlinable-openmp-operations-isolatedfromabove/90565

Fixes: #170781

Assisted-by: Codex

Added: 
    flang/lib/Optimizer/CodeGen/RematerializeFIRBoxOps.cpp
    flang/test/Fir/rematerialize-fir-box-ops-pipeline.fir
    flang/test/Fir/rematerialize-fir-box-ops.fir

Modified: 
    flang/include/flang/Optimizer/CodeGen/CGPasses.td
    flang/include/flang/Optimizer/CodeGen/CodeGen.h
    flang/lib/Optimizer/CodeGen/CMakeLists.txt
    flang/lib/Optimizer/Passes/Pipelines.cpp
    flang/test/Driver/mlir-debug-pass-pipeline.f90
    flang/test/Driver/mlir-pass-pipeline.f90
    flang/test/Fir/basic-program.fir

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/CodeGen/CGPasses.td b/flang/include/flang/Optimizer/CodeGen/CGPasses.td
index 5c39de9041bc3..168463f1514a7 100644

--- a/flang/include/flang/Optimizer/CodeGen/CGPasses.td
+++ b/flang/include/flang/Optimizer/CodeGen/CGPasses.td
@@ -62,6 +62,18 @@ def CodeGenRewrite : Pass<"cg-rewrite", "mlir::ModuleOp"> {
   ];
 }
 
+def RematerializeFIRBoxOpsPass
+    : Pass<"fir-rematerialize-box-ops", "mlir::func::FuncOp"> {
+  let summary = "Rematerialize FIR box operations before selected uses.";
+  let description = [{
+    Clone selected FIR box operations to preserve region-local descriptor
+    storage when their results are used inside nested regions.
+  }];
+  let dependentDialects = [
+    "fir::FIROpsDialect", "mlir::omp::OpenMPDialect"
+  ];
+}
+
 def TargetRewritePass : Pass<"target-rewrite", "mlir::ModuleOp"> {
   let summary = "Rewrite some FIR dialect into target specific forms.";
   let description = [{

diff  --git a/flang/include/flang/Optimizer/CodeGen/CodeGen.h b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
index e9a07a8dde5cd..b7a9397edfe6d 100644
--- a/flang/include/flang/Optimizer/CodeGen/CodeGen.h
+++ b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
@@ -25,6 +25,7 @@ struct NameUniquer;
 
 #define GEN_PASS_DECL_FIRTOLLVMLOWERING
 #define GEN_PASS_DECL_CODEGENREWRITE
+#define GEN_PASS_DECL_REMATERIALIZEFIRBOXOPSPASS
 #define GEN_PASS_DECL_TARGETREWRITEPASS
 #define GEN_PASS_DECL_BOXEDPROCEDUREPASS
 #define GEN_PASS_DECL_LOWERREPACKARRAYSPASS

diff  --git a/flang/lib/Optimizer/CodeGen/CMakeLists.txt b/flang/lib/Optimizer/CodeGen/CMakeLists.txt
index fd116fbea0f0e..69e1bfc4b8081 100644
--- a/flang/lib/Optimizer/CodeGen/CMakeLists.txt
+++ b/flang/lib/Optimizer/CodeGen/CMakeLists.txt
@@ -6,6 +6,7 @@ add_flang_library(FIRCodeGen
   LLVMInsertChainFolder.cpp
   LowerRepackArrays.cpp
   PreCGRewrite.cpp
+  RematerializeFIRBoxOps.cpp
   TBAABuilder.cpp
   Target.cpp
   TargetRewrite.cpp

diff  --git a/flang/lib/Optimizer/CodeGen/RematerializeFIRBoxOps.cpp b/flang/lib/Optimizer/CodeGen/RematerializeFIRBoxOps.cpp
new file mode 100644
index 0000000000000..4e3410d569e8b
--- /dev/null
+++ b/flang/lib/Optimizer/CodeGen/RematerializeFIRBoxOps.cpp
@@ -0,0 +1,220 @@
+//===-- RematerializeFIRBoxOps.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Some NoMemoryEffect FIR operations create allocas as an implementation detail
+// of their conversion to the LLVM dialect. These allocas must not be
+// accidentally shared across 
diff erent threads when OpenMP outlining is used.
+// This pass rematerializes selected operations into the outlined regions,
+// which ensures that the allocas are correctly located inside of the outlined
+// function.
+//
+// Operations rematerialized by this pass are re-created at each use inside of
+// the affected regions. LLVM-IR CSE later in the pipeline should merge these
+// where possible. However, CSE must not be run between this pass and the
+// FIR-to-LLVM conversion because MLIR CSE will completely undo the actions of
+// this pass. This is because the side effects on the FIR operations do not
+// represent the side effects produced by their implementation in the LLVM
+// dialect. This pass makes FIR-to-LLVM descriptor allocation safe for outlined
+// regions.
+
+#include "flang/Optimizer/CodeGen/CodeGen.h"
+
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/IRMapping.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace fir {
+#define GEN_PASS_DEF_REMATERIALIZEFIRBOXOPSPASS
+#include "flang/Optimizer/CodeGen/CGPasses.h.inc"
+} // namespace fir
+
+namespace {
+
+/// Returns true if \p op has a region that should be rematerialized into.
+static bool isRematerializationRegionOp(mlir::Operation *op) {
+  return mlir::isa<mlir::omp::OutlineableOpenMPOpInterface>(op);
+}
+
+/// Return true for FIR box/class conversions whose operand may need to be
+/// rematerialized while preserving the type expected by the use.
+static bool isBoxConvert(mlir::Operation *op) {
+  auto convert = mlir::dyn_cast_or_null<fir::ConvertOp>(op);
+  return convert && mlir::isa<fir::BaseBoxType>(convert.getValue().getType()) &&
+         mlir::isa<fir::BaseBoxType>(convert.getType());
+}
+
+/// Return true if \p op should be cloned into rematerialization regions.
+static bool shouldRematerialize(mlir::Operation *op) {
+  if (!op)
+    return false;
+
+  if (mlir::isa<fir::EmboxOp, fir::ReboxOp>(op)) {
+    assert(
+        mlir::isMemoryEffectFree(op) &&
+        "This transformation is not safe for operations with memory effects");
+    // Not all Embox and Rebox operations are speculatable. This should be safe
+    // because SSA can only express a use of the non-speculatable value inside
+    // of the region requiring rematerialization if that non-speculatable value
+    // always dominates the region. Therefore we are not adding any new UB from
+    // absent boxes/arguments. Furthermore, the newly rematerialized operations
+    // are created at the use site of the original value as a further guarantee
+    // that the rematerializations are only executed if the original value was
+    // executed.
+    return true;
+  }
+
+  // Rematerializing box-to-box conversions is safe and allows rematerialization
+  // of emboxes which are only used inside of the region through box-to-box
+  // conversions. Conversions to !fir.box<none> are common before calls to
+  // runtime functions.
+  if (isBoxConvert(op)) {
+    assert(
+        mlir::isPure(op) &&
+        "This transformation is not safe for operations with memory effects");
+    return shouldRematerialize(
+        mlir::cast<fir::ConvertOp>(op).getValue().getDefiningOp());
+  }
+
+  return false;
+}
+
+/// Return true if \p definingOp can be rematerialized into \p useRegion.
+/// The use can be rematerialized if the defining operation is located
+/// in an ancestor region of the use and the definition operation is
+/// rematerializable.
+static bool canRematerializeDefInRegion(mlir::Operation *definingOp,
+                                        mlir::Region &useRegion) {
+  if (!definingOp || !shouldRematerialize(definingOp))
+    return false;
+
+  mlir::Region *opRegion = definingOp->getParentRegion();
+  for (mlir::Region *ancestor = useRegion.getParentRegion(); ancestor;
+       ancestor = ancestor->getParentRegion()) {
+    if (opRegion == ancestor)
+      return true;
+  }
+  return false;
+}
+
+static mlir::Value cloneRematerializedValue(
+    mlir::IRRewriter &rewriter, mlir::Region &useRegion, mlir::Value value,
+    mlir::IRMapping &mapping,
+    llvm::SmallVectorImpl<mlir::Operation *> &eraseCandidates) {
+  if (mlir::Value mappedValue = mapping.lookupOrNull(value))
+    return mappedValue;
+
+  mlir::Operation *definingOp = value.getDefiningOp();
+  if (!canRematerializeDefInRegion(definingOp, useRegion))
+    return value;
+
+  // Clone rematerializable dependencies first so the cloned operation uses
+  // cloned operands when possible.
+  for (mlir::Value operand : definingOp->getOperands())
+    cloneRematerializedValue(rewriter, useRegion, operand, mapping,
+                             eraseCandidates);
+
+  rewriter.clone(*definingOp, mapping);
+  eraseCandidates.push_back(definingOp);
+  return mapping.lookup(value);
+}
+
+struct RematerializationSite {
+  /// Operation whose operands will be rewritten. This is the insertion point
+  /// for the rematerialized operations.
+  mlir::Operation *user;
+  /// Operands of \c user that use values needing rematerialization.
+  llvm::SmallVector<mlir::OpOperand *> uses;
+
+  RematerializationSite(mlir::Operation *user,
+                        llvm::SmallVector<mlir::OpOperand *> uses)
+      : user(user), uses(std::move(uses)) {}
+};
+
+/// Walk \p regionOwner and collect all rematerializable operands that use
+/// values defined outside the region.
+static void collectRematerializableUses(
+    mlir::Operation *regionOwner,
+    llvm::SmallVectorImpl<RematerializationSite> &rematSites) {
+  regionOwner->walk<mlir::WalkOrder::PreOrder>(
+      [&](mlir::Operation *op) -> mlir::WalkResult {
+        // Don't walk into nested rematerialization regions. They will be
+        // processed in their own calls to this function.
+        if (op != regionOwner && isRematerializationRegionOp(op))
+          return mlir::WalkResult::skip();
+
+        llvm::SmallVector<mlir::OpOperand *> rematerializableUses;
+        for (mlir::OpOperand &operand : op->getOpOperands()) {
+          if (canRematerializeDefInRegion(operand.get().getDefiningOp(),
+                                          *op->getParentRegion()))
+            rematerializableUses.push_back(&operand);
+        }
+        if (!rematerializableUses.empty())
+          rematSites.emplace_back(op, std::move(rematerializableUses));
+
+        return mlir::WalkResult::advance();
+      });
+}
+
+/// Rematerialize supported values defined outside of \p regionOwner into the
+/// region
+static void rematerializeInRegion(mlir::IRRewriter &rewriter,
+                                  mlir::Operation *regionOwner) {
+  llvm::SmallVector<RematerializationSite> rematSites;
+  collectRematerializableUses(regionOwner, rematSites);
+  if (rematSites.empty())
+    return;
+
+  mlir::OpBuilder::InsertionGuard guard(rewriter);
+
+  llvm::SmallVector<mlir::Operation *> eraseCandidates;
+  for (RematerializationSite &rematSite : rematSites) {
+    rewriter.setInsertionPoint(rematSite.user);
+    mlir::IRMapping mapping;
+    for (mlir::OpOperand *use : rematSite.uses) {
+      mlir::Region *useRegion = use->getOwner()->getParentRegion();
+      mlir::Value newValue = cloneRematerializedValue(
+          rewriter, *useRegion, use->get(), mapping, eraseCandidates);
+      use->set(newValue);
+    }
+  }
+
+  llvm::DenseSet<mlir::Operation *> erased;
+  for (mlir::Operation *op : llvm::reverse(eraseCandidates))
+    if (erased.insert(op).second && op->use_empty())
+      rewriter.eraseOp(op);
+}
+
+class RematerializeFIRBoxOpsPass
+    : public fir::impl::RematerializeFIRBoxOpsPassBase<
+          RematerializeFIRBoxOpsPass> {
+public:
+  using RematerializeFIRBoxOpsPassBase<
+      RematerializeFIRBoxOpsPass>::RematerializeFIRBoxOpsPassBase;
+
+  void runOnOperation() override final {
+    mlir::Operation *top = getOperation();
+
+    llvm::SmallVector<mlir::Operation *> regionOwners;
+    top->walk([&](mlir::Operation *op) {
+      if (isRematerializationRegionOp(op))
+        regionOwners.push_back(op);
+    });
+
+    mlir::IRRewriter rewriter(top->getContext());
+    for (mlir::Operation *op : regionOwners)
+      rematerializeInRegion(rewriter, op);
+  }
+};
+
+} // namespace

diff  --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 920d6f86a355e..18fc9da7de80c 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -398,6 +398,11 @@ void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
       pm, fir::createAbstractResultOpt);
   addPassToGPUModuleOperations<PassConstructor>(pm,
                                                 fir::createAbstractResultOpt);
+  pm.addPass(fir::createRematerializeFIRBoxOpsPass());
+  // Do not run CSE between rematerialization and FIR-to-LLVM lowering. CSE will
+  // undo the createRematerializeFIRBoxOps pass.
+  // LLVM-level CSE can clean up redundant operations after FIR box conversion
+  // has materialized region-local allocas.
   fir::addCodeGenRewritePass(
       pm, (config.DebugInfo != llvm::codegenoptions::NoDebugInfo));
   fir::addExternalNameConversionPass(pm, config.Underscoring);

diff  --git a/flang/test/Driver/mlir-debug-pass-pipeline.f90 b/flang/test/Driver/mlir-debug-pass-pipeline.f90
index 3f6bde2ded67b..24eda49e86f88 100644
--- a/flang/test/Driver/mlir-debug-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-debug-pass-pipeline.f90
@@ -115,6 +115,7 @@
 ! ALL-NEXT:     AbstractResultOpt
 ! ALL-NEXT:   'func.func' Pipeline
 ! ALL-NEXT:     AbstractResultOpt
+! ALL-NEXT:     RematerializeFIRBoxOpsPass
 ! ALL-NEXT:   'gpu.module' Pipeline
 ! ALL-NEXT:   Pipeline Collection : ['func.func', 'gpu.func']
 ! ALL-NEXT:   'func.func' Pipeline

diff  --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90
index 630076a7947ff..3bb06c678657e 100644
--- a/flang/test/Driver/mlir-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-pass-pipeline.f90
@@ -168,6 +168,7 @@
 ! ALL-NEXT:    AbstractResultOpt
 ! ALL-NEXT:  'func.func' Pipeline
 ! ALL-NEXT:    AbstractResultOpt
+! ALL-NEXT:    RematerializeFIRBoxOpsPass
 ! ALL-NEXT:  'gpu.module' Pipeline
 ! ALL-NEXT:   Pipeline Collection : ['func.func', 'gpu.func']
 ! ALL-NEXT:   'func.func' Pipeline

diff  --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir
index 1e26b388267b6..d76fbc3df0704 100644
--- a/flang/test/Fir/basic-program.fir
+++ b/flang/test/Fir/basic-program.fir
@@ -156,6 +156,7 @@ func.func @_QQmain() {
 // PASSES-NEXT:    AbstractResultOpt
 // PASSES-NEXT:  'func.func' Pipeline
 // PASSES-NEXT:    AbstractResultOpt
+// PASSES-NEXT:    RematerializeFIRBoxOpsPass
 // PASSES-NEXT:  'gpu.module' Pipeline
 // PASSES-NEXT:   Pipeline Collection : ['func.func', 'gpu.func']
 // PASSES-NEXT:   'func.func' Pipeline

diff  --git a/flang/test/Fir/rematerialize-fir-box-ops-pipeline.fir b/flang/test/Fir/rematerialize-fir-box-ops-pipeline.fir
new file mode 100644
index 0000000000000..4c5414a3b8656
--- /dev/null
+++ b/flang/test/Fir/rematerialize-fir-box-ops-pipeline.fir
@@ -0,0 +1,32 @@
+// RUN: tco -test-gen %s | FileCheck %s
+
+// Test the real FIR-to-LLVM pipeline. In particular, this guards against
+// adding a FIR-level CSE-like pass between fir-rematerialize-box-ops and
+// FIR-to-LLVM lowering, which could merge FIR box operations across OpenMP
+// regions before their descriptor allocas are materialized.
+
+func.func private @use_box(!fir.box<!fir.array<?xi32>>)
+
+func.func @box_used_in_sibling_tasks(%arg0: !fir.ref<!fir.array<?xi32>>, %n: index) {
+  %shape = fir.shape %n : (index) -> !fir.shape<1>
+  %box = fir.embox %arg0(%shape) : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
+  omp.task {
+    fir.call @use_box(%box) : (!fir.box<!fir.array<?xi32>>) -> ()
+    omp.terminator
+  }
+  omp.task {
+    fir.call @use_box(%box) : (!fir.box<!fir.array<?xi32>>) -> ()
+    omp.terminator
+  }
+  return
+}
+
+// CHECK-LABEL: llvm.func @box_used_in_sibling_tasks(
+// CHECK:       omp.task {
+// CHECK:         llvm.alloca
+// CHECK:         llvm.call @use_box
+// CHECK:         omp.terminator
+// CHECK:       omp.task {
+// CHECK:         llvm.alloca
+// CHECK:         llvm.call @use_box
+// CHECK:         omp.terminator

diff  --git a/flang/test/Fir/rematerialize-fir-box-ops.fir b/flang/test/Fir/rematerialize-fir-box-ops.fir
new file mode 100644
index 0000000000000..2cde6dccac5c3
--- /dev/null
+++ b/flang/test/Fir/rematerialize-fir-box-ops.fir
@@ -0,0 +1,142 @@
+// RUN: fir-opt --split-input-file --fir-rematerialize-box-ops %s | FileCheck %s
+
+// The dummy constants inside regions give FileCheck stable anchors for the
+// rematerialized operations' insertion points.
+
+func.func private @use_box_10(!fir.box<!fir.array<10xi32>>)
+func.func private @use_box_dyn(!fir.box<!fir.array<?xi32>>)
+
+// CHECK-LABEL: func.func @embox_mixed_uses(
+func.func @embox_mixed_uses(%arg0: !fir.ref<!fir.array<10xi32>>) {
+  %c10 = arith.constant 10 : index
+  %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+  // CHECK: %[[OUTER_BOX:.*]] = fir.embox
+  %box = fir.embox %arg0(%shape) : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xi32>>
+  // CHECK: fir.call @use_box_10(%[[OUTER_BOX]])
+  fir.call @use_box_10(%box) : (!fir.box<!fir.array<10xi32>>) -> ()
+  // CHECK: omp.task {
+  omp.task {
+    // CHECK: arith.constant 0 : i32
+    %c0 = arith.constant 0 : i32
+    // CHECK-NEXT: %[[TASK_BOX:.*]] = fir.embox
+    // CHECK-NEXT: fir.call @use_box_10(%[[TASK_BOX]])
+    fir.call @use_box_10(%box) : (!fir.box<!fir.array<10xi32>>) -> ()
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func.func private @use_box_10(!fir.box<!fir.array<10xi32>>)
+
+// CHECK-LABEL: func.func @rebox_chain(
+func.func @rebox_chain(%arg0: !fir.ref<!fir.array<10xi32>>) {
+  %c10 = arith.constant 10 : index
+  %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+  %box = fir.embox %arg0(%shape) : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xi32>>
+  %rebox = fir.rebox %box : (!fir.box<!fir.array<10xi32>>) -> !fir.box<!fir.array<10xi32>>
+  // CHECK: omp.task {
+  omp.task {
+    // CHECK: arith.constant 0 : i32
+    %c0 = arith.constant 0 : i32
+    // CHECK-NEXT: %[[TASK_BOX:.*]] = fir.embox
+    // CHECK-NEXT: %[[TASK_REBOX:.*]] = fir.rebox %[[TASK_BOX]]
+    // CHECK-NEXT: fir.call @use_box_10(%[[TASK_REBOX]])
+    fir.call @use_box_10(%rebox) : (!fir.box<!fir.array<10xi32>>) -> ()
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func.func private @use_box_10(!fir.box<!fir.array<10xi32>>)
+
+// CHECK-LABEL: func.func @nested_regions(
+func.func @nested_regions(%arg0: !fir.ref<!fir.array<10xi32>>) {
+  %c10 = arith.constant 10 : index
+  %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+  %box = fir.embox %arg0(%shape) : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xi32>>
+  // CHECK: omp.parallel {
+  omp.parallel {
+    // CHECK: arith.constant 0 : i32
+    %c0 = arith.constant 0 : i32
+    // CHECK-NEXT: %[[PAR_BOX:.*]] = fir.embox
+    // CHECK-NEXT: fir.call @use_box_10(%[[PAR_BOX]])
+    fir.call @use_box_10(%box) : (!fir.box<!fir.array<10xi32>>) -> ()
+    // CHECK: omp.task {
+    omp.task {
+      // CHECK: arith.constant 1 : i32
+      %c1 = arith.constant 1 : i32
+      // CHECK-NEXT: %[[TASK_BOX:.*]] = fir.embox
+      // CHECK-NEXT: fir.call @use_box_10(%[[TASK_BOX]])
+      fir.call @use_box_10(%box) : (!fir.box<!fir.array<10xi32>>) -> ()
+      omp.terminator
+    }
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func.func private @use_box_dyn(!fir.box<!fir.array<?xi32>>)
+
+// CHECK-LABEL: func.func @rebox_maybe_absent_box(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>
+func.func @rebox_maybe_absent_box(%arg0: !fir.box<!fir.array<?xi32>>) {
+  %rebox = fir.rebox %arg0 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+  // CHECK: omp.task {
+  omp.task {
+    // CHECK: arith.constant 0 : i32
+    %c0 = arith.constant 0 : i32
+    // CHECK-NEXT: %[[TASK_REBOX:.*]] = fir.rebox %[[ARG0]]
+    // CHECK-NEXT: fir.call @use_box_dyn(%[[TASK_REBOX]])
+    fir.call @use_box_dyn(%rebox) : (!fir.box<!fir.array<?xi32>>) -> ()
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func.func private @use_class_none(!fir.class<none>)
+
+// CHECK-LABEL: func.func @embox_maybe_absent_source_box(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<none>, %[[SOURCE_BOX:.*]]: !fir.class<!fir.array<?xnone>>
+func.func @embox_maybe_absent_source_box(%arg0: !fir.ref<none>, %arg1: !fir.class<!fir.array<?xnone>>) {
+  %box = fir.embox %arg0 source_box %arg1 : (!fir.ref<none>, !fir.class<!fir.array<?xnone>>) -> !fir.class<none>
+  // CHECK: omp.task {
+  omp.task {
+    // CHECK: arith.constant 0 : i32
+    %c0 = arith.constant 0 : i32
+    // CHECK-NEXT: %[[TASK_EMBOX:.*]] = fir.embox %[[ARG0]] source_box %[[SOURCE_BOX]]
+    // CHECK-NEXT: fir.call @use_class_none(%[[TASK_EMBOX]])
+    fir.call @use_class_none(%box) : (!fir.class<none>) -> ()
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func.func private @use_box_none(!fir.box<none>)
+
+// CHECK-LABEL: func.func @converted_embox_live_in(
+func.func @converted_embox_live_in(%arg0: !fir.ref<!fir.array<?xi32>>, %n: index) {
+  %shape = fir.shape %n : (index) -> !fir.shape<1>
+  %box = fir.embox %arg0(%shape) : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
+  %converted = fir.convert %box : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+  // CHECK: omp.task {
+  omp.task {
+    // CHECK: arith.constant 0 : i32
+    %c0 = arith.constant 0 : i32
+    // CHECK-NEXT: %[[TASK_BOX:.*]] = fir.embox
+    // CHECK-NEXT: %[[TASK_CONVERT:.*]] = fir.convert %[[TASK_BOX]]
+    // CHECK-NEXT: fir.call @use_box_none(%[[TASK_CONVERT]])
+    fir.call @use_box_none(%converted) : (!fir.box<none>) -> ()
+    omp.terminator
+  }
+  return
+}