[clang] 1462e63 - [OPENMP]PR53344: Emit code for final update of the inscan reduction vars in worksharing loops.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Thu Apr 28 10:42:24 PDT 2022


Author: Alexey Bataev
Date: 2022-04-28T10:41:28-07:00
New Revision: 1462e63f67a965defec035c3bc17a5ddeb366964

URL: https://github.com/llvm/llvm-project/commit/1462e63f67a965defec035c3bc17a5ddeb366964
DIFF: https://github.com/llvm/llvm-project/commit/1462e63f67a965defec035c3bc17a5ddeb366964.diff

LOG: [OPENMP]PR53344: Emit code for final update of the inscan reduction vars in worksharing loops.

Need to emit final update of the inscan reduction variables. For
worksharing loops, the reduction values are stored in the temp array,
need to copy the last element to the original var at the end of the
construct.

Differential Revision: https://reviews.llvm.org/D121156

Added: 
    

Modified: 
    clang/lib/CodeGen/CGStmtOpenMP.cpp
    clang/test/OpenMP/parallel_for_scan_codegen.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 4d8b0afed6e92..d6f81b33263f8 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3515,6 +3515,57 @@ static void emitScanBasedDirectiveDecls(
   }
 }
 
+/// Copies final inscan reductions values to the original variables.
+/// The code is the following:
+/// \code
+/// <orig_var> = buffer[num_iters-1];
+/// \endcode
+static void emitScanBasedDirectiveFinals(
+    CodeGenFunction &CGF, const OMPLoopDirective &S,
+    llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
+  llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
+      NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
+  SmallVector<const Expr *, 4> Shareds;
+  SmallVector<const Expr *, 4> LHSs;
+  SmallVector<const Expr *, 4> RHSs;
+  SmallVector<const Expr *, 4> Privates;
+  SmallVector<const Expr *, 4> CopyOps;
+  SmallVector<const Expr *, 4> CopyArrayElems;
+  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
+    assert(C->getModifier() == OMPC_REDUCTION_inscan &&
+           "Only inscan reductions are expected.");
+    Shareds.append(C->varlist_begin(), C->varlist_end());
+    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+    Privates.append(C->privates().begin(), C->privates().end());
+    CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
+    CopyArrayElems.append(C->copy_array_elems().begin(),
+                          C->copy_array_elems().end());
+  }
+  // Create temp var and copy LHS value to this temp value.
+  // LHS = TMP[LastIter];
+  llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
+      OMPScanNumIterations,
+      llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false));
+  for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
+    const Expr *PrivateExpr = Privates[I];
+    const Expr *OrigExpr = Shareds[I];
+    const Expr *CopyArrayElem = CopyArrayElems[I];
+    CodeGenFunction::OpaqueValueMapping IdxMapping(
+        CGF,
+        cast<OpaqueValueExpr>(
+            cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+        RValue::get(OMPLast));
+    LValue DestLVal = CGF.EmitLValue(OrigExpr);
+    LValue SrcLVal = CGF.EmitLValue(CopyArrayElem);
+    CGF.EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(CGF),
+                    SrcLVal.getAddress(CGF),
+                    cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
+                    cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
+                    CopyOps[I]);
+  }
+}
+
 /// Emits the code for the directive with inscan reductions.
 /// The code is the following:
 /// \code
@@ -3709,6 +3760,8 @@ static bool emitWorksharingDirective(CodeGenFunction &CGF,
     if (!isOpenMPParallelDirective(S.getDirectiveKind()))
       emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
     emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
+    if (!isOpenMPParallelDirective(S.getDirectiveKind()))
+      emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
   } else {
     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
                                                      HasCancel);
@@ -4282,23 +4335,25 @@ void CodeGenFunction::EmitOMPParallelForDirective(
     (void)emitWorksharingDirective(CGF, S, S.hasCancel());
   };
   {
-    if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
+    const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
+      CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
+      CGCapturedStmtInfo CGSI(CR_OpenMP);
+      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
+      OMPLoopScope LoopScope(CGF, S);
+      return CGF.EmitScalarExpr(S.getNumIterations());
+    };
+    bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
                      [](const OMPReductionClause *C) {
                        return C->getModifier() == OMPC_REDUCTION_inscan;
-                     })) {
-      const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
-        CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
-        CGCapturedStmtInfo CGSI(CR_OpenMP);
-        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
-        OMPLoopScope LoopScope(CGF, S);
-        return CGF.EmitScalarExpr(S.getNumIterations());
-      };
+                     });
+    if (IsInscan)
       emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
-    }
     auto LPCRegion =
         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
     emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
                                    emitEmptyBoundParameters);
+    if (IsInscan)
+      emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
   }
   // Check for outer lastprivate conditional update.
   checkForLastprivateConditionalUpdate(*this, S);
@@ -4313,23 +4368,25 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
     (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
   };
   {
-    if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
+    const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
+      CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
+      CGCapturedStmtInfo CGSI(CR_OpenMP);
+      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
+      OMPLoopScope LoopScope(CGF, S);
+      return CGF.EmitScalarExpr(S.getNumIterations());
+    };
+    bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
                      [](const OMPReductionClause *C) {
                        return C->getModifier() == OMPC_REDUCTION_inscan;
-                     })) {
-      const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
-        CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
-        CGCapturedStmtInfo CGSI(CR_OpenMP);
-        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
-        OMPLoopScope LoopScope(CGF, S);
-        return CGF.EmitScalarExpr(S.getNumIterations());
-      };
+                     });
+    if (IsInscan)
       emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
-    }
     auto LPCRegion =
         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
     emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
                                    emitEmptyBoundParameters);
+    if (IsInscan)
+      emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
   }
   // Check for outer lastprivate conditional update.
   checkForLastprivateConditionalUpdate(*this, S);

diff  --git a/clang/test/OpenMP/parallel_for_scan_codegen.cpp b/clang/test/OpenMP/parallel_for_scan_codegen.cpp
index 975d7c3d4e4b0..21d4a4467a615 100644
--- a/clang/test/OpenMP/parallel_for_scan_codegen.cpp
+++ b/clang/test/OpenMP/parallel_for_scan_codegen.cpp
@@ -27,6 +27,13 @@ void baz(int n) {
   // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
 
   // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+  // CHECK: [[LAST:%.+]] = mul nsw i64 9, %
+  // CHECK: [[LAST_REF:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[LAST]]
+  // CHECK: [[BC:%.+]] = bitcast float* [[LAST_REF]] to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 bitcast ([10 x float]* @_ZZ3baziE1a to i8*), i8* align 4 [[BC]], i64 %{{.+}}, i1 false)
+  // CHECK: [[LAST_REF_B:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 9
+  // CHECK: [[LAST_VAL:%.+]] = load double, double* [[LAST_REF_B]],
+  // CHECK: store double [[LAST_VAL]], double* @_ZZ3baziE1b,
 
   // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
 


        


More information about the cfe-commits mailing list