[llvm-branch-commits] [flang] [Flang][OpenMP] Add combined construct information (PR #198783)

Wed May 20 06:39:16 PDT 2026

llvmorg-github-actions[bot] wrote:




@llvm/pr-subscribers-flang-fir-hlfir

Author: Sergio Afonso (skatrak)

<details>
<summary>Changes</summary>

This patch adds the `omp.combined` attribute to OpenMP dialect operations following changes to the `ComposableOpInterface`.

This attribute is added to operations representing non-innermost leaf constructs of a combined construct and to standalone block-associated constructs that can be combined with their parent construct.

Changes are made to the OpenMP lowering logic, as well as the do-concurrent, workshare and workdistribute transformation passes.

---

Patch is 53.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/198783.diff


33 Files Affected:

- (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+56-20) 
- (modified) flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp (+6-2) 
- (modified) flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp (+5-1) 
- (modified) flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp (+7) 
- (modified) flang/test/Integration/OpenMP/workshare-array-array-assign.f90 (+2-1) 
- (modified) flang/test/Integration/OpenMP/workshare-axpy.f90 (+2-1) 
- (modified) flang/test/Integration/OpenMP/workshare-forall-sliced-array.f90 (+2-1) 
- (modified) flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90 (+2-1) 
- (modified) flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 (+4) 
- (added) flang/test/Lower/OpenMP/compound.f90 (+1094) 
- (modified) flang/test/Lower/OpenMP/multiple-entry-points.f90 (+1) 
- (modified) flang/test/Lower/OpenMP/workshare.f90 (+3-3) 
- (modified) flang/test/Transforms/DoConcurrent/basic_device.f90 (+3-3) 
- (modified) flang/test/Transforms/DoConcurrent/basic_host.f90 (+1-1) 
- (modified) flang/test/Transforms/DoConcurrent/basic_host.mlir (+1-1) 
- (modified) flang/test/Transforms/DoConcurrent/local_device.mlir (+5-5) 
- (modified) flang/test/Transforms/DoConcurrent/locality_specifiers_simple.mlir (+1-1) 
- (modified) flang/test/Transforms/DoConcurrent/non_const_bounds.f90 (+1-1) 
- (modified) flang/test/Transforms/DoConcurrent/reduce_add.mlir (+1-1) 
- (modified) flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir (+1-1) 
- (modified) flang/test/Transforms/DoConcurrent/reduce_device.mlir (+4-4) 
- (modified) flang/test/Transforms/DoConcurrent/reduce_device_min.f90 (+2-2) 
- (modified) flang/test/Transforms/DoConcurrent/reduce_local.mlir (+1-1) 
- (modified) flang/test/Transforms/DoConcurrent/reduction_symbol_resultion.f90 (+2-2) 
- (modified) flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 (+1-1) 
- (modified) flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 (+1-1) 
- (modified) flang/test/Transforms/DoConcurrent/use_loop_bounds_in_body.f90 (+6-6) 
- (modified) flang/test/Transforms/OpenMP/lower-workdistribute-doloop.mlir (+1-1) 
- (modified) flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir (+2-2) 
- (modified) flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir (+2-2) 
- (modified) flang/test/Transforms/OpenMP/lower-workdistribute-fission.mlir (+1-1) 
- (modified) flang/test/Transforms/OpenMP/lower-workdistribute-runtime-assign-scalar.mlir (+2-2) 
- (modified) flang/test/Transforms/omp-function-filtering-todo.mlir (+2-2) 


``````````diff

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 3db1ec256ea73..f7e26bb11bc99 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -68,6 +68,25 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
                            const ConstructQueue &queue,
                            ConstructQueue::const_iterator item);
 
+/// Return the directive that is immediately nested inside of the given
+/// \c parent evaluation, if it is its only non-end-statement nested evaluation
+/// and it represents an OpenMP construct.
+lower::pft::Evaluation *
+extractOnlyOmpNestedEval(lower::pft::Evaluation &parent) {
+  if (!parent.hasNestedEvaluations())
+    return nullptr;
+
+  auto &nested{parent.getFirstNestedEvaluation()};
+  if (!nested.isA<parser::OpenMPConstruct>())
+    return nullptr;
+
+  for (auto &sibling : parent.getNestedEvaluations())
+    if (&sibling != &nested && !sibling.isEndStmt())
+      return nullptr;
+
+  return &nested;
+}
+
 namespace {
 /// Structure holding information that is needed to pass host-evaluated
 /// information to later lowering stages.
@@ -298,25 +317,6 @@ class OpenMPPatternProcessor {
     }
   }
 
-  /// Return the directive that is immediately nested inside of the given
-  /// \c parent evaluation, if it is its only non-end-statement nested
-  /// evaluation and it represents an OpenMP construct.
-  lower::pft::Evaluation *
-  extractOnlyOmpNestedEval(lower::pft::Evaluation &parent) {
-    if (!parent.hasNestedEvaluations())
-      return nullptr;
-
-    auto &nested{parent.getFirstNestedEvaluation()};
-    if (!nested.isA<parser::OpenMPConstruct>())
-      return nullptr;
-
-    for (auto &sibling : parent.getNestedEvaluations())
-      if (&sibling != &nested && !sibling.isEndStmt())
-        return nullptr;
-
-    return &nested;
-  }
-
 protected:
   semantics::SemanticsContext &semaCtx;
 
@@ -4033,7 +4033,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
     // Lowered in the enclosing genSectionsOp.
     break;
   case llvm::omp::Directive::OMPD_sections:
-    genSectionsOp(converter, symTable, semaCtx, eval, loc, queue, item);
+    newOp = genSectionsOp(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_simd:
     newOp =
@@ -4120,6 +4120,42 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
   finalizeStmtCtx();
   if (loopLeaf)
     symTable.popScope();
+
+  // Add the omp.combined attribute to eligible ops. In this case, all
+  // composable ops that are not loop-associated, except for the ones that can
+  // only appear as the innermost leaf construct.
+  if (!loopLeaf &&
+      llvm::isa_and_present<mlir::omp::ComposableOpInterface>(newOp) &&
+      !llvm::isa<mlir::omp::SectionsOp, mlir::omp::WorkshareOp,
+                 mlir::omp::WorkdistributeOp>(newOp)) {
+    bool isCombined = false;
+    if (std::next(item) != queue.end()) {
+      // Non-innermost leafs of a combined construct must always hold the
+      // attribute.
+      isCombined = true;
+    } else if (lower::pft::Evaluation *nestedEval =
+                   extractOnlyOmpNestedEval(eval)) {
+      // Combinable constructs that are immediately nested with no other
+      // statements or directives preventing them from being combined need the
+      // attribute as well.
+      OmpDirectiveSet combinableDirs =
+          (llvm::omp::blockConstructSet &
+           ~OmpDirectiveSet{llvm::omp::Directive::OMPD_ordered,
+                            llvm::omp::Directive::OMPD_scope,
+                            llvm::omp::Directive::OMPD_taskgroup}) |
+          (llvm::omp::loopConstructSet & ~llvm::omp::loopTransformationSet);
+      const auto &ompEval = nestedEval->get<parser::OpenMPConstruct>();
+      llvm::omp::Directive nestedDir =
+          parser::omp::GetOmpDirectiveName(ompEval).v;
+      llvm::omp::Directive firstLeafDir =
+          llvm::omp::getLeafConstructsOrSelf(nestedDir).front();
+
+      if (combinableDirs.test(firstLeafDir))
+        isCombined = true;
+    }
+    if (isCombined)
+      llvm::cast<mlir::omp::ComposableOpInterface>(newOp).setCombined(true);
+  }
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
index 244c462ed93f7..b30c87cb9160b 100644
--- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
@@ -326,14 +326,18 @@ class DoConcurrentConversion
       targetOp =
           genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns,
                       targetClauseOps, loopNestClauseOps, liveInShapeInfoMap);
-      genTeamsOp(rewriter, loop, mapper);
+      auto teamsOp = genTeamsOp(rewriter, loop, mapper);
+      targetOp.setCombined(true);
+      teamsOp.setCombined(true);
     }
 
     mlir::omp::ParallelOp parallelOp =
         genParallelOp(rewriter, loop, ivInfos, mapper);
 
-    // Only set as composite when part of `distribute parallel do`.
+    // Only set as composite when part of `distribute parallel do`, and only set
+    // as combined when part of `parallel do`.
     parallelOp.setComposite(mapToDevice);
+    parallelOp.setCombined(!mapToDevice);
 
     if (!mapToDevice)
       genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, loopNestClauseOps);
diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp
index 6f2e398b549f0..84ac1bec4d98c 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp
@@ -274,8 +274,10 @@ fissionWorkdistribute(omp::WorkdistributeOp workdistribute) {
     }
 
     if (parallelize && hoisted.empty() &&
-        parallelize->getNextNode() == terminator)
+        parallelize->getNextNode() == terminator) {
+      teams.setCombined(true);
       break;
+    }
     if (parallelize) {
       auto newTeams = rewriter.cloneWithoutRegions(teams);
       auto *newTeamsBlock = rewriter.createBlock(
@@ -290,6 +292,7 @@ fissionWorkdistribute(omp::WorkdistributeOp workdistribute) {
       parallelize->replaceAllUsesWith(cloned);
       parallelize->erase();
       omp::TerminatorOp::create(rewriter, loc);
+      newTeams.setCombined(true);
       changed = true;
     }
   }
@@ -1582,6 +1585,7 @@ genIsolatedTargetOp(omp::TargetOp targetOp, SmallVector<Value> &postMapOperands,
       targetOp.getPrivateMapsAttr(),
       omp::TargetExecModeAttr::get(targetOp->getContext(),
                                    omp::TargetExecMode::spmd));
+  isolatedTargetOp.setCombined(true);
   auto *isolatedTargetBlock =
       rewriter.createBlock(&isolatedTargetOp.getRegion(),
                            isolatedTargetOp.getRegion().begin(), {}, {});
diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
index a41d8d8826501..b8231bc35c999 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
@@ -612,6 +612,13 @@ LogicalResult lowerWorkshare(mlir::omp::WorkshareOp wsOp, DominanceInfo &di) {
     term->erase();
     newOp->erase();
     wsOp->erase();
+
+    // If this was part of a combined construct (e.g. 'parallel workshare'), the
+    // changes we just made to the region can be incompatible with a combined
+    // construct, such as containing multiple block-associated constructs in it.
+    if (auto parentOp =
+            dyn_cast<omp::ComposableOpInterface>(parentBlock->getParentOp()))
+      parentOp.setCombined(false);
   } else {
     // Otherwise just change the operation to an omp.single.
 
diff --git a/flang/test/Integration/OpenMP/workshare-array-array-assign.f90 b/flang/test/Integration/OpenMP/workshare-array-array-assign.f90
index e9ec5d9175beb..3ccb46ebeebea 100644
--- a/flang/test/Integration/OpenMP/workshare-array-array-assign.f90
+++ b/flang/test/Integration/OpenMP/workshare-array-array-assign.f90
@@ -23,7 +23,7 @@ subroutine sb1(x, y)
 ! HLFIR:         omp.terminator
 ! HLFIR:       }
 ! HLFIR:       omp.terminator
-! HLFIR:     }
+! HLFIR:     } {omp.combined}
 
 ! FIR:     omp.parallel {
 ! FIR:       omp.wsloop nowait {
@@ -32,3 +32,4 @@ subroutine sb1(x, y)
 ! FIR:       omp.barrier
 ! FIR:       omp.terminator
 ! FIR:     }
+! FIR-NOT: omp.combined
diff --git a/flang/test/Integration/OpenMP/workshare-axpy.f90 b/flang/test/Integration/OpenMP/workshare-axpy.f90
index 12246e54d3432..bcf8c2d2c8ed4 100644
--- a/flang/test/Integration/OpenMP/workshare-axpy.f90
+++ b/flang/test/Integration/OpenMP/workshare-axpy.f90
@@ -32,7 +32,7 @@ subroutine sb1(a, x, y, z)
 ! HLFIR:      }
 ! HLFIR-NOT:  omp.barrier
 ! HLFIR:      omp.terminator
-! HLFIR:    }
+! HLFIR:    } {omp.combined}
 ! HLFIR:    return
 ! HLFIR:  }
 ! HLFIR:}
@@ -55,3 +55,4 @@ subroutine sb1(a, x, y, z)
 ! FIR:      omp.barrier
 ! FIR:      omp.terminator
 ! FIR:    }
+! FIR-NOT:omp.combined
diff --git a/flang/test/Integration/OpenMP/workshare-forall-sliced-array.f90 b/flang/test/Integration/OpenMP/workshare-forall-sliced-array.f90
index 88d1062b091bf..e841213c2f1bf 100644
--- a/flang/test/Integration/OpenMP/workshare-forall-sliced-array.f90
+++ b/flang/test/Integration/OpenMP/workshare-forall-sliced-array.f90
@@ -36,7 +36,7 @@ subroutine workshare_forall_sliced(a1)
 ! HLFIR:           omp.terminator
 ! HLFIR:         }
 ! HLFIR:         omp.terminator
-! HLFIR:       }
+! HLFIR:       } {omp.combined}
 
 ! After workshare lowering, the forall should be in omp.single (since it
 ! contains operations that are not safe to parallelize across threads).
@@ -51,6 +51,7 @@ subroutine workshare_forall_sliced(a1)
 ! FIR:         omp.barrier
 ! FIR:         omp.terminator
 ! FIR:       }
+! FIR-NOT:   omp.combined
 
 ! Verify LLVM IR is generated successfully (the original issue caused crashes)
 ! LLVM-LABEL: define {{.*}}workshare_forall_sliced
diff --git a/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90 b/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90
index 6c180cd639997..43e6cc4bef7b7 100644
--- a/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90
+++ b/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90
@@ -24,7 +24,7 @@ subroutine sb1(a, x)
 ! HLFIR:         omp.terminator
 ! HLFIR:       }
 ! HLFIR:       omp.terminator
-! HLFIR:     }
+! HLFIR:     } {omp.combined}
 
 ! FIR:     omp.parallel {
 ! FIR:       %[[SCALAR_ALLOCA:.*]] = fir.alloca i32
@@ -43,3 +43,4 @@ subroutine sb1(a, x)
 ! FIR:       }
 ! FIR:       omp.barrier
 ! FIR:       omp.terminator
+! FIR-NOT:   omp.combined
diff --git a/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 b/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90
index 9b8ef66b48f47..1a9c9a031d9c4 100644
--- a/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90
+++ b/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90
@@ -26,12 +26,14 @@ program test
 ! HLFIR-O3:        hlfir.destroy
 ! HLFIR-O3:        omp.terminator
 ! HLFIR-O3:      omp.terminator
+! HLFIR-O3:    omp.combined
 
 ! FIR-O3:    omp.parallel {
 ! FIR-O3:      omp.wsloop nowait {
 ! FIR-O3:        omp.loop_nest
 ! FIR-O3:      omp.barrier
 ! FIR-O3:      omp.terminator
+! FIR-O3-NOT:omp.combined
 
 ! HLFIR-O0:    omp.parallel {
 ! HLFIR-O0:      omp.workshare {
@@ -40,6 +42,7 @@ program test
 ! HLFIR-O0:        hlfir.destroy
 ! HLFIR-O0:        omp.terminator
 ! HLFIR-O0:      omp.terminator
+! HLFIR-O0:    omp.combined
 
 ! Check the copyprivate copy function
 ! FIR-O0:  func.func private @_workshare_copy_heap_{{.*}}(%[[DST:.*]]: {{.*}}, %[[SRC:.*]]: {{.*}})
@@ -63,3 +66,4 @@ program test
 ! FIR-O0:        omp.terminator
 ! FIR-O0:      omp.barrier
 ! FIR-O0:      omp.terminator
+! FIR-O0-NOT:omp.combined
diff --git a/flang/test/Lower/OpenMP/compound.f90 b/flang/test/Lower/OpenMP/compound.f90
new file mode 100644
index 0000000000000..d61745345a640
--- /dev/null
+++ b/flang/test/Lower/OpenMP/compound.f90
@@ -0,0 +1,1094 @@
+! This test checks lowering of compound (combined and composite) constructs.
+! Specifically, it makes sure that the proper ComposableOpInterface attributes
+! are set.
+
+! RUN: bbc -fopenmp -fopenmp-version=60 -emit-hlfir %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 %s -o - | FileCheck %s
+
+! ------------------------------------------------------------------------------
+! COMPOSITE CONSTRUCTS
+! ------------------------------------------------------------------------------
+
+subroutine distribute_parallel_do()
+  implicit none
+  integer :: i
+
+  !$omp teams
+  !$omp distribute parallel do
+  do i=1, 10
+  end do
+  !$omp end teams
+end subroutine
+
+! CHECK-LABEL: func.func @_QPdistribute_parallel_do
+! CHECK:         omp.parallel
+! CHECK:           omp.distribute
+! CHECK-NEXT:        omp.wsloop
+! CHECK-NEXT:          omp.loop_nest
+! CHECK:                 omp.yield
+! CHECK-NEXT:          }
+! CHECK-NEXT:        } {{{.*}}omp.composite{{.*}}}
+! CHECK-NEXT:      } {{{.*}}omp.composite{{.*}}}
+! CHECK:           omp.terminator
+! CHECK-NEXT:    } {{{.*}}omp.composite{{.*}}}
+
+subroutine distribute_parallel_do_simd()
+  implicit none
+  integer :: i
+
+  !$omp teams
+  !$omp distribute parallel do simd
+  do i=1, 10
+  end do
+  !$omp end teams
+end subroutine
+
+! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd
+! CHECK:         omp.parallel
+! CHECK:           omp.distribute
+! CHECK-NEXT:        omp.wsloop
+! CHECK-NEXT:          omp.simd
+! CHECK-NEXT:            omp.loop_nest
+! CHECK:                   omp.yield
+! CHECK-NEXT:            }
+! CHECK-NEXT:          } {{{.*}}omp.composite{{.*}}}
+! CHECK-NEXT:        } {{{.*}}omp.composite{{.*}}}
+! CHECK-NEXT:      } {{{.*}}omp.composite{{.*}}}
+! CHECK:           omp.terminator
+! CHECK-NEXT:    } {{{.*}}omp.composite{{.*}}}
+
+subroutine distribute_simd()
+  implicit none
+  integer :: i
+
+  !$omp teams
+  !$omp distribute simd
+  do i=1, 10
+  end do
+  !$omp end teams
+end subroutine
+
+! CHECK-LABEL: func.func @_QPdistribute_simd
+! CHECK:         omp.distribute
+! CHECK-NEXT:      omp.simd
+! CHECK-NEXT:        omp.loop_nest
+! CHECK:               omp.yield
+! CHECK-NEXT:        }
+! CHECK-NEXT:      } {{{.*}}omp.composite{{.*}}}
+! CHECK-NEXT:    } {{{.*}}omp.composite{{.*}}}
+
+subroutine do_simd()
+  implicit none
+  integer :: i
+
+  !$omp do simd
+  do i=1, 10
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPdo_simd
+! CHECK:         omp.wsloop
+! CHECK-NEXT:      omp.simd
+! CHECK-NEXT:        omp.loop_nest
+! CHECK:               omp.yield
+! CHECK-NEXT:        }
+! CHECK-NEXT:      } {{{.*}}omp.composite{{.*}}}
+! CHECK-NEXT:    } {{{.*}}omp.composite{{.*}}}
+
+! TODO: Add taskloop simd once supported by lowering.
+
+! ------------------------------------------------------------------------------
+! COMBINED CONSTRUCTS
+! ------------------------------------------------------------------------------
+
+subroutine masked_taskloop()
+  implicit none
+  integer :: i
+
+  !$omp masked taskloop
+  do i=1, 10
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmasked_taskloop
+! CHECK:         omp.masked
+! CHECK:           omp.taskloop.context
+! CHECK:             omp.taskloop.wrapper
+! CHECK-NEXT:          omp.loop_nest
+! CHECK:                 omp.yield
+! CHECK-NEXT:          }
+! CHECK-NEXT:        }
+! CHECK-NOT:         omp.combined
+! CHECK:             omp.terminator
+! CHECK-NEXT:      } {{{.*}}omp.combined{{.*}}}
+! CHECK:           omp.terminator
+! CHECK-NEXT:    } {{{.*}}omp.combined{{.*}}}
+
+subroutine master_taskloop()
+  implicit none
+  integer :: i
+
+  !$omp master taskloop
+  do i=1, 10
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPmaster_taskloop
+! CHECK:         omp.master
+! CHECK:           omp.taskloop.context
+! CHECK:             omp.taskloop.wrapper
+! CHECK-NEXT:          omp.loop_nest
+! CHECK:                 omp.yield
+! CHECK-NEXT:          }
+! CHECK-NEXT:        }
+! CHECK-NOT:         omp.combined
+! CHECK:             omp.terminator
+! CHECK-NEXT:      } {{{.*}}omp.combined{{.*}}}
+! CHECK:           omp.terminator
+! CHECK-NEXT:    } {{{.*}}omp.combined{{.*}}}
+
+subroutine parallel_do()
+  implicit none
+  integer :: i
+
+  !$omp parallel do
+  do i=1, 10
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPparallel_do
+! CHECK:         omp.parallel
+! CHECK:           omp.wsloop
+! CHECK-NEXT:        omp.loop_nest
+! CHECK:               omp.yield
+! CHECK-NEXT:        }
+! CHECK-NEXT:      }
+! CHECK-NOT:       omp.combined
+! CHECK:           omp.terminator
+! CHECK-NEXT:    } {{{.*}}omp.combined{{.*}}}
+
+subroutine parallel_loop()
+  implicit none
+  integer :: i
+
+  !$omp parallel loop
+  do i=1, 10
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPparallel_loop
+! CHECK:         omp.parallel
+! CHECK:           omp.wsloop
+! CHECK-NEXT:        omp.loop_nest
+! CHECK:               omp.yield
+! CHECK-NEXT:        }
+! CHECK-NEXT:      }
+! CHECK-NOT:       omp.combined
+! CHECK:           omp.terminator
+! CHECK-NEXT:    } {{{.*}}omp.combined{{.*}}}
+
+subroutine parallel_masked()
+  implicit none
+
+  !$omp parallel masked
+  call foo()
+  !$omp end parallel masked
+end subroutine
+
+! CHECK-LABEL: func.func @_QPparallel_masked
+! CHECK:         omp.parallel
+! CHECK:           omp.masked
+! CHECK:             omp.terminator
+! CHECK-NEXT:      }
+! CHECK-NOT:       omp.combined
+! CHECK:           omp.terminator
+! CHECK-NEXT:    } {{{.*}}omp.combined{{.*}}}
+
+subroutine parallel_master()
+  implicit none
+
+  !$omp parallel master
+  call foo()
+  !$omp end parallel master
+end subroutine
+
+! CHECK-LABEL: func.func @_QPparallel_master
+! CHECK:         omp.parallel
+! CHECK:           omp.master
+! CHECK:             omp.terminator
+! CHECK-NEXT:      }
+! CHECK-NOT:       omp.combined
+! CHECK:           omp.terminator
+! CHECK-NEXT:    } {{{.*}}omp.combined{{.*}}}
+
+subroutine parallel_sections()
+  implicit none
+
+  !$omp parallel sections
+  call foo()
+  !$omp end parallel sections
+end subroutine
+
+! CHECK-LABEL: func.func @_QPparallel_sections
+! CHECK:         omp.parallel
+! CHECK:           omp.sections
+! CHECK:             omp.section
+! CHECK:               omp.terminator
+! CHECK-NEXT:        }
+! CHECK:           }
+! CHECK-NOT:       omp.combined
+! CHECK:           omp.terminator
+! CHECK-NEXT:    } {{{.*}}omp.combined{{.*}}}
+
+subroutine parallel_workshare()
+  implicit none
+  integer :: x(10)
+
+  !$omp parallel workshare
+  x = 1
+  !$omp end parallel workshare
+end subroutine
+
+! CHECK-LABEL: func.func @_QPparallel_workshare
+! CHECK:         omp.parallel
+! CHECK:           omp.workshare
+! CHECK:             omp.terminator
+! CHECK-NEXT:      }
+! CHECK-NOT:       omp.combined
+! CHECK:           omp.terminator
+! CHECK-NEXT:    } {{{.*}}omp.combined{{.*}}}
+
+subroutine target_loop()
+  implicit none
+  integer :: i
+
+  !$omp target loop
+  do i=1, 10
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtarget_loop
+! CHECK:         omp.target
+! CHECK:           omp.simd
+! CHECK-NEXT:        omp.loop_nest
+! CHECK:               omp.yield
+! CHECK-NEXT:        }
+! CHECK-NEXT:      }
+! CHECK-NOT:       omp.combined
+! CHECK:           omp.terminator
+! CHECK-NEXT:    } {{{.*}}omp.combined{{.*}}}
+
+subroutine target_parallel()
+  implicit none
+
+  !$omp target parallel
+  call foo()
+  !$omp end target parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtarget_parallel
+! CHECK:         omp.target
+! CHECK:           omp.parallel
+! CHECK:             omp.terminator
+! CHECK-NEXT:      }
+! CHECK-NOT:       omp.combined
+! CHECK:           omp.terminator
+! CHECK-NEXT:    } {{{.*}}omp.combined{{.*}}}
+
+subroutine target_simd()
+  implicit none
+  integer :: i
+
+  !$omp target simd
+  do i=1, 10
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtarget_simd
+! CHECK:         omp.target
+! CHECK:           omp.simd
+! CHECK-NEXT:        omp.loop_nest
+! CHECK:               omp.yield
+! CHECK-NEXT:        }
+! CHECK-NEXT:      }
+! CHECK-NOT:       omp.combined
+! CHECK:           omp.terminator
+! CHECK-NEXT:    } {{{.*}}omp.combined{{.*}}}
+
+subroutine tar...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/198783