[clang] [llvm] [Clang] [OpenMP] Add support for '#pragma omp stripe'. (PR #119891)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 24 12:02:53 PST 2024
================
@@ -14477,6 +14484,279 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
buildPreInits(Context, PreInits));
}
+StmtResult SemaOpenMP::ActOnOpenMPStripeDirective(ArrayRef<OMPClause *> Clauses,
+ Stmt *AStmt,
+ SourceLocation StartLoc,
+ SourceLocation EndLoc) {
+ ASTContext &Context = getASTContext();
+ Scope *CurScope = SemaRef.getCurScope();
+
+ const auto *SizesClause =
+ OMPExecutableDirective::getSingleClause<OMPSizesClause>(Clauses);
+ if (!SizesClause ||
+ llvm::any_of(SizesClause->getSizesRefs(), [](Expr *E) { return !E; }))
+ return StmtError();
+ unsigned NumLoops = SizesClause->getNumSizes();
+
+ // Empty statement should only be possible if there already was an error.
+ if (!AStmt)
+ return StmtError();
+
+ // Verify and diagnose loop nest.
+ SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops);
+ Stmt *Body = nullptr;
+ SmallVector<SmallVector<Stmt *, 0>, 4> OriginalInits;
+ if (!checkTransformableLoopNest(OMPD_stripe, AStmt, NumLoops, LoopHelpers,
+ Body, OriginalInits))
+ return StmtError();
+
+ // Delay tiling to when template is completely instantiated.
+ if (SemaRef.CurContext->isDependentContext())
+ return OMPStripeDirective::Create(Context, StartLoc, EndLoc, Clauses,
+ NumLoops, AStmt, nullptr, nullptr);
+
+ assert(LoopHelpers.size() == NumLoops &&
+ "Expecting loop iteration space dimensionality to match number of "
+ "affected loops");
+ assert(OriginalInits.size() == NumLoops &&
+ "Expecting loop iteration space dimensionality to match number of "
+ "affected loops");
+
+ // Collect all affected loop statements.
+ SmallVector<Stmt *> LoopStmts(NumLoops, nullptr);
+ collectLoopStmts(AStmt, LoopStmts);
+
+ SmallVector<Stmt *, 4> PreInits;
+ CaptureVars CopyTransformer(SemaRef);
+
+ // Create iteration variables for the generated loops.
+ SmallVector<VarDecl *, 4> FloorIndVars;
+ SmallVector<VarDecl *, 4> StripeIndVars;
+ FloorIndVars.resize(NumLoops);
+ StripeIndVars.resize(NumLoops);
+ for (unsigned I = 0; I < NumLoops; ++I) {
+ OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers[I];
+
+ assert(LoopHelper.Counters.size() == 1 &&
+ "Expect single-dimensional loop iteration space");
+ auto *OrigCntVar = cast<DeclRefExpr>(LoopHelper.Counters.front());
+ std::string OrigVarName = OrigCntVar->getNameInfo().getAsString();
+ DeclRefExpr *IterVarRef = cast<DeclRefExpr>(LoopHelper.IterationVarRef);
+ QualType CntTy = IterVarRef->getType();
+
+ // Iteration variable for the floor (i.e. outer) loop.
+ {
+ std::string FloorCntName =
+ (Twine(".floor_") + llvm::utostr(I) + ".iv." + OrigVarName).str();
+ VarDecl *FloorCntDecl =
+ buildVarDecl(SemaRef, {}, CntTy, FloorCntName, nullptr, OrigCntVar);
+ FloorIndVars[I] = FloorCntDecl;
+ }
+
+ // Iteration variable for the stripe (i.e. inner) loop.
+ {
+ std::string StripeCntName =
+ (Twine(".stripe_") + llvm::utostr(I) + ".iv." + OrigVarName).str();
+
+ // Reuse the iteration variable created by checkOpenMPLoop. It is also
+ // used by the expressions to derive the original iteration variable's
+ // value from the logical iteration number.
+ auto *StripeCntDecl = cast<VarDecl>(IterVarRef->getDecl());
+ StripeCntDecl->setDeclName(
+ &SemaRef.PP.getIdentifierTable().get(StripeCntName));
+ StripeIndVars[I] = StripeCntDecl;
+ }
+
+ addLoopPreInits(Context, LoopHelper, LoopStmts[I], OriginalInits[I],
+ PreInits);
+ }
+
+ // Once the original iteration values are set, append the innermost body.
+ Stmt *Inner = Body;
+
+ auto MakeDimStripeSize = [&SemaRef = this->SemaRef, &CopyTransformer,
+ &Context, SizesClause, CurScope](int I) -> Expr * {
+ Expr *DimStripeSizeExpr = SizesClause->getSizesRefs()[I];
+ if (isa<ConstantExpr>(DimStripeSizeExpr))
+ return AssertSuccess(CopyTransformer.TransformExpr(DimStripeSizeExpr));
+
+ // When the stripe size is not a constant but a variable, it is possible to
+ // pass non-positive numbers. For instance:
+ // \code{c}
+ // int a = 0;
+ // #pragma omp stripe sizes(a)
+ // for (int i = 0; i < 42; ++i)
+ // body(i);
+ // \endcode
+ // Although there is no meaningful interpretation of the stripe size, the
+ // body should still be executed 42 times to avoid surprises. To preserve
+ // the invariant that every loop iteration is executed exactly once and not
+ // cause an infinite loop, apply a minimum stripe size of one.
+ // Build expr:
+ // \code{c}
+ // (TS <= 0) ? 1 : TS
+ // \endcode
+ QualType DimTy = DimStripeSizeExpr->getType();
+ uint64_t DimWidth = Context.getTypeSize(DimTy);
+ IntegerLiteral *Zero = IntegerLiteral::Create(
+ Context, llvm::APInt::getZero(DimWidth), DimTy, {});
+ IntegerLiteral *One =
+ IntegerLiteral::Create(Context, llvm::APInt(DimWidth, 1), DimTy, {});
+ Expr *Cond = AssertSuccess(SemaRef.BuildBinOp(
+ CurScope, {}, BO_LE,
+ AssertSuccess(CopyTransformer.TransformExpr(DimStripeSizeExpr)), Zero));
+ Expr *MinOne = new (Context) ConditionalOperator(
+ Cond, {}, One, {},
+ AssertSuccess(CopyTransformer.TransformExpr(DimStripeSizeExpr)), DimTy,
+ VK_PRValue, OK_Ordinary);
+ return MinOne;
+ };
+
+ // Create stripe loops from the inside to the outside.
+ for (int I = NumLoops - 1; I >= 0; --I) {
+ OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers[I];
+ Expr *NumIterations = LoopHelper.NumIterations;
+ auto *OrigCntVar = cast<DeclRefExpr>(LoopHelper.Counters[0]);
+ QualType IVTy = NumIterations->getType();
+ Stmt *LoopStmt = LoopStmts[I];
+
+ // Commonly used variables. One of the constraints of an AST is that every
+ // node object must appear at most once, hence we define lamdas that create
+ // a new AST node at every use.
+ auto MakeStripeIVRef = [&SemaRef = this->SemaRef, &StripeIndVars, I, IVTy,
+ OrigCntVar]() {
+ return buildDeclRefExpr(SemaRef, StripeIndVars[I], IVTy,
+ OrigCntVar->getExprLoc());
+ };
+ auto MakeFloorIVRef = [&SemaRef = this->SemaRef, &FloorIndVars, I, IVTy,
+ OrigCntVar]() {
+ return buildDeclRefExpr(SemaRef, FloorIndVars[I], IVTy,
+ OrigCntVar->getExprLoc());
+ };
+
+ // For init-statement: auto .stripe.iv = .floor.iv
+ SemaRef.AddInitializerToDecl(
+ StripeIndVars[I],
+ SemaRef.DefaultLvalueConversion(MakeFloorIVRef()).get(),
+ /*DirectInit=*/false);
+ Decl *CounterDecl = StripeIndVars[I];
+ StmtResult InitStmt = new (Context)
+ DeclStmt(DeclGroupRef::Create(Context, &CounterDecl, 1),
+ OrigCntVar->getBeginLoc(), OrigCntVar->getEndLoc());
+ if (!InitStmt.isUsable())
+ return StmtError();
+
+ // For cond-expression:
+ // .stripe.iv < min(.floor.iv + DimStripeSize, NumIterations)
+ ExprResult EndOfStripe =
+ SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_Add,
+ MakeFloorIVRef(), MakeDimStripeSize(I));
+ if (!EndOfStripe.isUsable())
+ return StmtError();
+ ExprResult IsPartialStripe =
+ SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT,
+ NumIterations, EndOfStripe.get());
+ if (!IsPartialStripe.isUsable())
+ return StmtError();
+ ExprResult MinStripeAndIterSpace = SemaRef.ActOnConditionalOp(
+ LoopHelper.Cond->getBeginLoc(), LoopHelper.Cond->getEndLoc(),
+ IsPartialStripe.get(), NumIterations, EndOfStripe.get());
+ if (!MinStripeAndIterSpace.isUsable())
+ return StmtError();
+ ExprResult CondExpr =
+ SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT,
+ MakeStripeIVRef(), MinStripeAndIterSpace.get());
+ if (!CondExpr.isUsable())
+ return StmtError();
+
+ // For incr-statement: ++.stripe.iv
+ ExprResult IncrStmt = SemaRef.BuildUnaryOp(
+ CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, MakeStripeIVRef());
+ if (!IncrStmt.isUsable())
+ return StmtError();
+
+ // Statements to set the original iteration variable's value from the
+ // logical iteration number.
+ // Generated for loop is:
+ // \code
+ // Original_for_init;
+ // for (auto .stripe.iv = .floor.iv;
+ // .stripe.iv < min(.floor.iv + DimStripeSize, NumIterations);
+ // ++.stripe.iv) {
+ // Original_Body;
+ // Original_counter_update;
+ // }
+ // \endcode
+ // FIXME: If the innermost body is an loop itself, inserting these
+ // statements stops it being recognized as a perfectly nested loop (e.g.
+ // for applying tiling again). If this is the case, sink the expressions
+ // further into the inner loop.
+ SmallVector<Stmt *, 4> BodyParts;
+ BodyParts.append(LoopHelper.Updates.begin(), LoopHelper.Updates.end());
+ if (auto *SourceCXXFor = dyn_cast<CXXForRangeStmt>(LoopStmt))
+ BodyParts.push_back(SourceCXXFor->getLoopVarStmt());
+ BodyParts.push_back(Inner);
+ Inner = CompoundStmt::Create(Context, BodyParts, FPOptionsOverride(),
+ Inner->getBeginLoc(), Inner->getEndLoc());
+ Inner = new (Context)
+ ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr,
+ IncrStmt.get(), Inner, LoopHelper.Init->getBeginLoc(),
+ LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc());
+ }
+
+ // Create floor loops from the inside to the outside.
+ for (int I = NumLoops - 1; I >= 0; --I) {
+ auto &LoopHelper = LoopHelpers[I];
+ Expr *NumIterations = LoopHelper.NumIterations;
+ DeclRefExpr *OrigCntVar = cast<DeclRefExpr>(LoopHelper.Counters[0]);
+ QualType IVTy = NumIterations->getType();
+
+ // Commonly used variables. One of the constraints of an AST is that every
+ // node object must appear at most once, hence we define lamdas that create
+ // a new AST node at every use.
+ auto MakeFloorIVRef = [&SemaRef = this->SemaRef, &FloorIndVars, I, IVTy,
+ OrigCntVar]() {
----------------
alexey-bataev wrote:
Lots of similar lambdas with the same functionality, maybe create just a single one
https://github.com/llvm/llvm-project/pull/119891
More information about the llvm-commits
mailing list