[Openmp-commits] [clang] [flang] [llvm] [openmp] [Clang][OpenMP][LoopTransformations] Add support for "#pragma omp fuse" loop transformation direcrive and "looprange" clause (PR #139293)

Alexey Bataev via Openmp-commits openmp-commits at lists.llvm.org
Fri May 9 11:12:06 PDT 2025


================
@@ -15451,6 +15819,500 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective(
                                          buildPreInits(Context, PreInits));
 }
 
+StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef<OMPClause *> Clauses,
+                                                Stmt *AStmt,
+                                                SourceLocation StartLoc,
+                                                SourceLocation EndLoc) {
+
+  ASTContext &Context = getASTContext();
+  DeclContext *CurrContext = SemaRef.CurContext;
+  Scope *CurScope = SemaRef.getCurScope();
+  CaptureVars CopyTransformer(SemaRef);
+
+  // Ensure the structured block is not empty
+  if (!AStmt) {
+    return StmtError();
+  }
+
+  unsigned NumLoops = 1;
+  unsigned LoopSeqSize = 1;
+
+  // Defer transformation in dependent contexts
+  // The NumLoopNests argument is set to a placeholder 1 (even though
+  // using looprange fuse could yield up to 3 top level loop nests)
+  // because a dependent context could prevent determining its true value
+  if (CurrContext->isDependentContext()) {
+    return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses,
+                                    NumLoops, LoopSeqSize, AStmt, nullptr,
+                                    nullptr);
+  }
+
+  // Validate that the potential loop sequence is transformable for fusion
+  // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops
+  SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers;
+  SmallVector<Stmt *> LoopStmts;
+  SmallVector<SmallVector<Stmt *, 0>> OriginalInits;
+  SmallVector<SmallVector<Stmt *, 0>> TransformsPreInits;
+  SmallVector<SmallVector<Stmt *, 0>> LoopSequencePreInits;
+  SmallVector<OMPLoopCategory, 0> LoopCategories;
+  if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops,
+                                      LoopHelpers, LoopStmts, OriginalInits,
+                                      TransformsPreInits, LoopSequencePreInits,
+                                      LoopCategories, Context)) {
+    return StmtError();
+  }
+
+  // Handle clauses, which can be any of the following: [looprange, apply]
+  const OMPLoopRangeClause *LRC =
+      OMPExecutableDirective::getSingleClause<OMPLoopRangeClause>(Clauses);
+
+  // The clause arguments are invalidated if any error arises
+  // such as non-constant or non-positive arguments
+  if (LRC && (!LRC->getFirst() || !LRC->getCount()))
+    return StmtError();
+
+  // Delayed semantic check of LoopRange constraint
+  // Evaluates the loop range arguments and returns the first and count values
+  auto EvaluateLoopRangeArguments = [&Context](Expr *First, Expr *Count,
+                                               uint64_t &FirstVal,
+                                               uint64_t &CountVal) {
+    llvm::APSInt FirstInt = First->EvaluateKnownConstInt(Context);
+    llvm::APSInt CountInt = Count->EvaluateKnownConstInt(Context);
+    FirstVal = FirstInt.getZExtValue();
+    CountVal = CountInt.getZExtValue();
+  };
+
+  // OpenMP [6.0, Restrictions]
+  // first + count - 1 must not evaluate to a value greater than the
+  // loop sequence length of the associated canonical loop sequence.
+  auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal,
+                           unsigned NumLoops) -> bool {
+    return FirstVal + CountVal - 1 <= NumLoops;
+  };
+  uint64_t FirstVal = 1, CountVal = 0, LastVal = LoopSeqSize;
+
+  // Validates the loop range after evaluating the semantic information
+  // and ensures that the range is valid for the given loop sequence size.
+  // Expressions are evaluated at compile time to obtain constant values.
+  if (LRC) {
+    EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal,
+                               CountVal);
+    if (CountVal == 1)
+      SemaRef.Diag(LRC->getCountLoc(), diag::warn_omp_redundant_fusion)
+          << getOpenMPDirectiveName(OMPD_fuse);
+
+    if (!ValidLoopRange(FirstVal, CountVal, LoopSeqSize)) {
+      SemaRef.Diag(LRC->getFirstLoc(), diag::err_omp_invalid_looprange)
+          << getOpenMPDirectiveName(OMPD_fuse) << (FirstVal + CountVal - 1)
+          << LoopSeqSize;
+      return StmtError();
+    }
+
+    LastVal = FirstVal + CountVal - 1;
+  }
+
+  // Complete fusion generates a single canonical loop nest
+  // However looprange clause generates several loop nests
+  unsigned NumLoopNests = LRC ? LoopSeqSize - CountVal + 1 : 1;
+
+  // Emit a warning for redundant loop fusion when the sequence contains only
+  // one loop.
+  if (LoopSeqSize == 1)
+    SemaRef.Diag(AStmt->getBeginLoc(), diag::warn_omp_redundant_fusion)
+        << getOpenMPDirectiveName(OMPD_fuse);
+
+  assert(LoopHelpers.size() == LoopSeqSize &&
+         "Expecting loop iteration space dimensionality to match number of "
+         "affected loops");
+  assert(OriginalInits.size() == LoopSeqSize &&
+         "Expecting loop iteration space dimensionality to match number of "
+         "affected loops");
+
+  // Select the type with the largest bit width among all induction variables
+  QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType();
+  for (unsigned int I = FirstVal; I < LastVal; ++I) {
+    QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType();
+    if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) {
+      IVType = CurrentIVType;
+    }
+  }
+  uint64_t IVBitWidth = Context.getIntWidth(IVType);
+
+  // Create pre-init declarations for all loops lower bounds, upper bounds,
+  // strides and num-iterations for every top level loop in the fusion
+  SmallVector<VarDecl *, 4> LBVarDecls;
+  SmallVector<VarDecl *, 4> STVarDecls;
+  SmallVector<VarDecl *, 4> NIVarDecls;
+  SmallVector<VarDecl *, 4> UBVarDecls;
+  SmallVector<VarDecl *, 4> IVVarDecls;
+
+  // Helper lambda to create variables for bounds, strides, and other
+  // expressions. Generates both the variable declaration and the corresponding
+  // initialization statement.
+  auto CreateHelperVarAndStmt =
+      [&SemaRef = this->SemaRef, &Context, &CopyTransformer,
+       &IVType](Expr *ExprToCopy, const std::string &BaseName, unsigned I,
----------------
alexey-bataev wrote:

```suggestion
      [&, &SemaRef = SemaRef](Expr *ExprToCopy, const std::string &BaseName, unsigned I,
```


https://github.com/llvm/llvm-project/pull/139293


More information about the Openmp-commits mailing list