[clang] 10c7b9f - [OPENMP]Fix PR49115: Incorrect results for scan directive.
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 16 06:26:42 PDT 2021
Author: Alexey Bataev
Date: 2021-04-16T06:25:35-07:00
New Revision: 10c7b9f64fa6c0257fe8a7f89123afb5e463ebda
URL: https://github.com/llvm/llvm-project/commit/10c7b9f64fa6c0257fe8a7f89123afb5e463ebda
DIFF: https://github.com/llvm/llvm-project/commit/10c7b9f64fa6c0257fe8a7f89123afb5e463ebda.diff
LOG: [OPENMP]Fix PR49115: Incorrect results for scan directive.
For combined worksharing directives need to emit the temp arrays outside
of the parallel region and update them in the master thread only.
Differential Revision: https://reviews.llvm.org/D100187
Added:
Modified:
clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/lib/Sema/SemaOpenMP.cpp
clang/test/OpenMP/parallel_for_scan_codegen.cpp
clang/test/OpenMP/parallel_for_simd_scan_codegen.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 827102ba7d7a2..7d8744651a4ee 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -241,11 +241,22 @@ class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
if (const Expr *E = TG->getReductionRef())
CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
}
+ // Temp copy arrays for inscan reductions should not be emitted as they are
+ // not used in simd only mode.
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
+ for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
+ if (C->getModifier() != OMPC_REDUCTION_inscan)
+ continue;
+ for (const Expr *E : C->copy_array_temps())
+ CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
+ }
const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
while (CS) {
for (auto &C : CS->captures()) {
if (C.capturesVariable() || C.capturesVariableByCopy()) {
auto *VD = C.getCapturedVar();
+ if (CopyArrayTemps.contains(VD))
+ continue;
assert(VD == VD->getCanonicalDecl() &&
"Canonical decl must be captured.");
DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
@@ -3295,53 +3306,30 @@ emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
return {LBVal, UBVal};
}
-/// Emits the code for the directive with inscan reductions.
+/// Emits internal temp array declarations for the directive with inscan
+/// reductions.
/// The code is the following:
/// \code
/// size num_iters = <num_iters>;
/// <type> buffer[num_iters];
-/// #pragma omp ...
-/// for (i: 0..<num_iters>) {
-/// <input phase>;
-/// buffer[i] = red;
-/// }
-/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
-/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
-/// buffer[i] op= buffer[i-pow(2,k)];
-/// #pragma omp ...
-/// for (0..<num_iters>) {
-/// red = InclusiveScan ? buffer[i] : buffer[i-1];
-/// <scan phase>;
-/// }
/// \endcode
-static void emitScanBasedDirective(
+static void emitScanBasedDirectiveDecls(
CodeGenFunction &CGF, const OMPLoopDirective &S,
- llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
- llvm::function_ref<void(CodeGenFunction &)> FirstGen,
- llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
+ llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
SmallVector<const Expr *, 4> Shareds;
SmallVector<const Expr *, 4> Privates;
SmallVector<const Expr *, 4> ReductionOps;
- SmallVector<const Expr *, 4> LHSs;
- SmallVector<const Expr *, 4> RHSs;
- SmallVector<const Expr *, 4> CopyOps;
SmallVector<const Expr *, 4> CopyArrayTemps;
- SmallVector<const Expr *, 4> CopyArrayElems;
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
assert(C->getModifier() == OMPC_REDUCTION_inscan &&
"Only inscan reductions are expected.");
Shareds.append(C->varlist_begin(), C->varlist_end());
Privates.append(C->privates().begin(), C->privates().end());
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
- LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
- RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
- CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
CopyArrayTemps.append(C->copy_array_temps().begin(),
C->copy_array_temps().end());
- CopyArrayElems.append(C->copy_array_elems().begin(),
- C->copy_array_elems().end());
}
{
// Emit buffers for each reduction variables.
@@ -3370,6 +3358,49 @@ static void emitScanBasedDirective(
++Count;
}
}
+}
+
+/// Emits the code for the directive with inscan reductions.
+/// The code is the following:
+/// \code
+/// #pragma omp ...
+/// for (i: 0..<num_iters>) {
+/// <input phase>;
+/// buffer[i] = red;
+/// }
+/// #pragma omp master // in parallel region
+/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
+/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
+/// buffer[i] op= buffer[i-pow(2,k)];
+/// #pragma omp barrier // in parallel region
+/// #pragma omp ...
+/// for (0..<num_iters>) {
+/// red = InclusiveScan ? buffer[i] : buffer[i-1];
+/// <scan phase>;
+/// }
+/// \endcode
+static void emitScanBasedDirective(
+ CodeGenFunction &CGF, const OMPLoopDirective &S,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
+ llvm::function_ref<void(CodeGenFunction &)> FirstGen,
+ llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
+ llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
+ NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
+ SmallVector<const Expr *, 4> Privates;
+ SmallVector<const Expr *, 4> ReductionOps;
+ SmallVector<const Expr *, 4> LHSs;
+ SmallVector<const Expr *, 4> RHSs;
+ SmallVector<const Expr *, 4> CopyArrayElems;
+ for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
+ assert(C->getModifier() == OMPC_REDUCTION_inscan &&
+ "Only inscan reductions are expected.");
+ Privates.append(C->privates().begin(), C->privates().end());
+ ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+ CopyArrayElems.append(C->copy_array_elems().begin(),
+ C->copy_array_elems().end());
+ }
CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
{
// Emit loop with input phase:
@@ -3382,90 +3413,108 @@ static void emitScanBasedDirective(
CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
FirstGen(CGF);
}
- // Emit prefix reduction:
- // for (int k = 0; k <= ceil(log2(n)); ++k)
- llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
- llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
- llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
- llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
- llvm::Value *Arg =
- CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
- llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
- F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
- LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
- LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
- llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
- OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
- auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
- CGF.EmitBlock(LoopBB);
- auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
- // size pow2k = 1;
- auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
- Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
- Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
- // for (size i = n - 1; i >= 2 ^ k; --i)
- // tmp[i] op= tmp[i-pow2k];
- llvm::BasicBlock *InnerLoopBB =
- CGF.createBasicBlock("omp.inner.log.scan.body");
- llvm::BasicBlock *InnerExitBB =
- CGF.createBasicBlock("omp.inner.log.scan.exit");
- llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
- CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
- CGF.EmitBlock(InnerLoopBB);
- auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
- IVal->addIncoming(NMin1, LoopBB);
- {
- CodeGenFunction::OMPPrivateScope PrivScope(CGF);
- auto *ILHS = LHSs.begin();
- auto *IRHS = RHSs.begin();
- for (const Expr *CopyArrayElem : CopyArrayElems) {
- const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
- const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
- Address LHSAddr = Address::invalid();
- {
- CodeGenFunction::OpaqueValueMapping IdxMapping(
- CGF,
- cast<OpaqueValueExpr>(
- cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
- RValue::get(IVal));
- LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
- }
- PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
- Address RHSAddr = Address::invalid();
- {
- llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
- CodeGenFunction::OpaqueValueMapping IdxMapping(
- CGF,
- cast<OpaqueValueExpr>(
- cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
- RValue::get(OffsetIVal));
- RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
+ // #pragma omp barrier // in parallel region
+ auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
+ &ReductionOps,
+ &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ // Emit prefix reduction:
+ // #pragma omp master // in parallel region
+ // for (int k = 0; k <= ceil(log2(n)); ++k)
+ llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
+ llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
+ llvm::Function *F =
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
+ llvm::Value *Arg =
+ CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
+ llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
+ F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
+ LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
+ LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
+ llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
+ OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
+ auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
+ CGF.EmitBlock(LoopBB);
+ auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
+ // size pow2k = 1;
+ auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
+ Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
+ Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
+ // for (size i = n - 1; i >= 2 ^ k; --i)
+ // tmp[i] op= tmp[i-pow2k];
+ llvm::BasicBlock *InnerLoopBB =
+ CGF.createBasicBlock("omp.inner.log.scan.body");
+ llvm::BasicBlock *InnerExitBB =
+ CGF.createBasicBlock("omp.inner.log.scan.exit");
+ llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
+ CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+ CGF.EmitBlock(InnerLoopBB);
+ auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
+ IVal->addIncoming(NMin1, LoopBB);
+ {
+ CodeGenFunction::OMPPrivateScope PrivScope(CGF);
+ auto *ILHS = LHSs.begin();
+ auto *IRHS = RHSs.begin();
+ for (const Expr *CopyArrayElem : CopyArrayElems) {
+ const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+ const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+ Address LHSAddr = Address::invalid();
+ {
+ CodeGenFunction::OpaqueValueMapping IdxMapping(
+ CGF,
+ cast<OpaqueValueExpr>(
+ cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+ RValue::get(IVal));
+ LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
+ }
+ PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
+ Address RHSAddr = Address::invalid();
+ {
+ llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
+ CodeGenFunction::OpaqueValueMapping IdxMapping(
+ CGF,
+ cast<OpaqueValueExpr>(
+ cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+ RValue::get(OffsetIVal));
+ RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
+ }
+ PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
+ ++ILHS;
+ ++IRHS;
}
- PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
- ++ILHS;
- ++IRHS;
+ PrivScope.Privatize();
+ CGF.CGM.getOpenMPRuntime().emitReduction(
+ CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
+ {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
}
- PrivScope.Privatize();
- CGF.CGM.getOpenMPRuntime().emitReduction(
- CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
- {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
- }
- llvm::Value *NextIVal =
- CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
- IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
- CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
- CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
- CGF.EmitBlock(InnerExitBB);
- llvm::Value *Next =
- CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
- Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
- // pow2k <<= 1;
- llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
- Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
- llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
- CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
- auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
- CGF.EmitBlock(ExitBB);
+ llvm::Value *NextIVal =
+ CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
+ IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
+ CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
+ CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+ CGF.EmitBlock(InnerExitBB);
+ llvm::Value *Next =
+ CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
+ Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
+ // pow2k <<= 1;
+ llvm::Value *NextPow2K =
+ CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
+ Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
+ llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
+ CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
+ auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
+ CGF.EmitBlock(ExitBB);
+ };
+ if (isOpenMPParallelDirective(S.getDirectiveKind())) {
+ CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
+ CGF.CGM.getOpenMPRuntime().emitBarrierCall(
+ CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
+ /*ForceSimpleCall=*/true);
+ } else {
+ RegionCodeGenTy RCG(CodeGen);
+ RCG(CGF);
+ }
CGF.OMPFirstScanLoop = false;
SecondGen(CGF);
@@ -3502,6 +3551,8 @@ static bool emitWorksharingDirective(CodeGenFunction &CGF,
emitForLoopBounds,
emitDispatchForLoopBounds);
};
+ if (!isOpenMPParallelDirective(S.getDirectiveKind()))
+ emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
} else {
CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
@@ -3955,6 +4006,19 @@ void CodeGenFunction::EmitOMPParallelForDirective(
(void)emitWorksharingDirective(CGF, S, S.hasCancel());
};
{
+ if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
+ [](const OMPReductionClause *C) {
+ return C->getModifier() == OMPC_REDUCTION_inscan;
+ })) {
+ const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
+ CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
+ CGCapturedStmtInfo CGSI(CR_OpenMP);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
+ OMPLoopScope LoopScope(CGF, S);
+ return CGF.EmitScalarExpr(S.getNumIterations());
+ };
+ emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
+ }
auto LPCRegion =
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
@@ -3973,6 +4037,19 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
(void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
};
{
+ if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
+ [](const OMPReductionClause *C) {
+ return C->getModifier() == OMPC_REDUCTION_inscan;
+ })) {
+ const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
+ CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
+ CGCapturedStmtInfo CGSI(CR_OpenMP);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
+ OMPLoopScope LoopScope(CGF, S);
+ return CGF.EmitScalarExpr(S.getNumIterations());
+ };
+ emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
+ }
auto LPCRegion =
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index ecab867964b91..2a45a095eb0fd 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -4596,6 +4596,17 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
}
}
}
+ if (ThisCaptureRegion == OMPD_parallel) {
+ // Capture temp arrays for inscan reductions.
+ for (OMPClause *C : Clauses) {
+ if (auto *RC = dyn_cast<OMPReductionClause>(C)) {
+ if (RC->getModifier() != OMPC_REDUCTION_inscan)
+ continue;
+ for (Expr *E : RC->copy_array_temps())
+ MarkDeclarationsReferencedInExpr(E);
+ }
+ }
+ }
if (++CompletedRegions == CaptureRegions.size())
DSAStack->setBodyComplete();
SR = ActOnCapturedRegionEnd(SR.get());
diff --git a/clang/test/OpenMP/parallel_for_scan_codegen.cpp b/clang/test/OpenMP/parallel_for_scan_codegen.cpp
index d4d6208f43cc6..7e42d7ec49e06 100644
--- a/clang/test/OpenMP/parallel_for_scan_codegen.cpp
+++ b/clang/test/OpenMP/parallel_for_scan_codegen.cpp
@@ -10,7 +10,7 @@
#ifndef HEADER
#define HEADER
-void foo();
+void foo(int n);
void bar();
// CHECK: define{{.*}} void @{{.*}}baz{{.*}}(i32 %n)
@@ -18,10 +18,16 @@ void baz(int n) {
static float a[10];
static double b;
- // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+ // CHECK: call i8* @llvm.stacksave()
+ // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
+
+ // float a_buffer[10][n];
+ // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
+ // double b_buffer[10];
+ // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
+
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
- // CHECK: call i8* @llvm.stacksave()
// CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
// float a_buffer[10][n];
@@ -29,6 +35,9 @@ void baz(int n) {
// double b_buffer[10];
// CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
+ // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+ // CHECK: call void @llvm.stackrestore(i8*
+
#pragma omp parallel for reduction(inscan, +:a[:n], b)
for (int i = 0; i < 10; ++i) {
// CHECK: call void @__kmpc_for_static_init_4(
@@ -37,13 +46,13 @@ void baz(int n) {
// CHECK: store double 0.000000e+00, double* [[B_PRIV_ADDR:%.+]],
// CHECK: br label %[[DISPATCH:[^,]+]]
// CHECK: [[INPUT_PHASE:.+]]:
- // CHECK: call void @{{.+}}foo{{.+}}()
+ // CHECK: call void @{{.+}}foo{{.+}}(
// a_buffer[i][0..n] = a_priv[[0..n];
// CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
// CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
- // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
- // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
+ // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
+ // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF:%.+]], i64 [[IDX]]
// CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
// CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
// CHECK: [[DEST:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
@@ -51,7 +60,7 @@ void baz(int n) {
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
// b_buffer[i] = b_priv;
- // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX]]
+ // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF:%.+]], i64 [[BASE_IDX]]
// CHECK: [[B_PRIV:%.+]] = load double, double* [[B_PRIV_ADDR]],
// CHECK: store double [[B_PRIV]], double* [[B_BUF_IDX]],
// CHECK: br label %[[LOOP_CONTINUE:.+]]
@@ -62,7 +71,7 @@ void baz(int n) {
// CHECK: call void @llvm.stackrestore(i8* %
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_barrier(
- foo();
+ foo(n);
#pragma omp scan inclusive(a[:n], b)
// CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01)
// CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]])
@@ -128,7 +137,7 @@ void baz(int n) {
// CHECK: br label %[[DISPATCH:[^,]+]]
// Skip the before scan body.
- // CHECK: call void @{{.+}}foo{{.+}}()
+ // CHECK: call void @{{.+}}foo{{.+}}(
// CHECK: [[EXIT_INSCAN:[^,]+]]:
// CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
@@ -158,17 +167,8 @@ void baz(int n) {
// CHECK: [[LOOP_CONTINUE]]:
// CHECK: call void @llvm.stackrestore(i8* %
// CHECK: call void @__kmpc_for_static_fini(
- // CHECK: call void @llvm.stackrestore(i8*
}
- // CHECK: call i8* @llvm.stacksave()
- // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
-
- // float a_buffer[10][n];
- // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
-
- // double b_buffer[10];
- // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
#pragma omp parallel for reduction(inscan, +:a[:n], b)
for (int i = 0; i < 10; ++i) {
// CHECK: call void @__kmpc_for_static_init_4(
@@ -178,15 +178,15 @@ void baz(int n) {
// CHECK: br label %[[DISPATCH:[^,]+]]
// Skip the before scan body.
- // CHECK: call void @{{.+}}foo{{.+}}()
+ // CHECK: call void @{{.+}}foo{{.+}}(
// CHECK: [[EXIT_INSCAN:[^,]+]]:
// a_buffer[i][0..n] = a_priv[[0..n];
// CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
// CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
- // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
- // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
+ // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
+ // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF:%.+]], i64 [[IDX]]
// CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
// CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
// CHECK: [[DEST:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
@@ -194,7 +194,7 @@ void baz(int n) {
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
// b_buffer[i] = b_priv;
- // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX]]
+ // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF:%.+]], i64 [[BASE_IDX]]
// CHECK: [[B_PRIV:%.+]] = load double, double* [[B_PRIV_ADDR]],
// CHECK: store double [[B_PRIV]], double* [[B_BUF_IDX]],
// CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
@@ -210,7 +210,7 @@ void baz(int n) {
// CHECK: call void @llvm.stackrestore(i8* %
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_barrier(
- foo();
+ foo(n);
#pragma omp scan exclusive(a[:n], b)
// CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01)
// CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]])
@@ -276,7 +276,7 @@ void baz(int n) {
// CHECK: br label %[[DISPATCH:[^,]+]]
// CHECK: [[SCAN_PHASE:.+]]:
- // CHECK: call void @{{.+}}foo{{.+}}()
+ // CHECK: call void @{{.+}}foo{{.+}}(
// CHECK: br label %[[LOOP_CONTINUE:.+]]
// CHECK: [[DISPATCH]]:
@@ -305,7 +305,6 @@ void baz(int n) {
// CHECK: [[LOOP_CONTINUE]]:
// CHECK: call void @llvm.stackrestore(i8* %
// CHECK: call void @__kmpc_for_static_fini(
- // CHECK: call void @llvm.stackrestore(i8*
}
}
diff --git a/clang/test/OpenMP/parallel_for_simd_scan_codegen.cpp b/clang/test/OpenMP/parallel_for_simd_scan_codegen.cpp
index e93c950fb84fe..71c6447c0ff95 100644
--- a/clang/test/OpenMP/parallel_for_simd_scan_codegen.cpp
+++ b/clang/test/OpenMP/parallel_for_simd_scan_codegen.cpp
@@ -18,10 +18,15 @@ void baz(int n) {
static float a[10];
static double b;
- // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+ // CHECK: call i8* @llvm.stacksave()
+ // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
+
+ // float a_buffer[10][n];
+ // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
+ // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
+
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
- // CHECK: call i8* @llvm.stacksave()
// CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
// float a_buffer[10][n];
@@ -29,6 +34,9 @@ void baz(int n) {
// double b_buffer[10];
// CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
+ // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+ // CHECK: call void @llvm.stackrestore(i8*
+
#pragma omp parallel for simd reduction(inscan, +:a[:n], b)
for (int i = 0; i < 10; ++i) {
// CHECK: call void @__kmpc_for_static_init_4(
@@ -42,8 +50,8 @@ void baz(int n) {
// a_buffer[i][0..n] = a_priv[[0..n];
// CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
// CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
- // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
- // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
+ // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
+ // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF:%.+]], i64 [[IDX]]
// CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
// CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
// CHECK: [[DEST:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
@@ -51,7 +59,7 @@ void baz(int n) {
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
// b_buffer[i] = b_priv;
- // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX]]
+ // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF:%.+]], i64 [[BASE_IDX]]
// CHECK: [[B_PRIV:%.+]] = load double, double* [[B_PRIV_ADDR]],
// CHECK: store double [[B_PRIV]], double* [[B_BUF_IDX]],
// CHECK: br label %[[LOOP_CONTINUE:.+]]
@@ -158,17 +166,8 @@ void baz(int n) {
// CHECK: [[LOOP_CONTINUE]]:
// CHECK: call void @llvm.stackrestore(i8* %
// CHECK: call void @__kmpc_for_static_fini(
- // CHECK: call void @llvm.stackrestore(i8*
}
- // CHECK: call i8* @llvm.stacksave()
- // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
-
- // float a_buffer[10][n];
- // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
-
- // double b_buffer[10];
- // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
#pragma omp parallel for simd reduction(inscan, +:a[:n], b)
for (int i = 0; i < 10; ++i) {
// CHECK: call void @__kmpc_for_static_init_4(
@@ -185,8 +184,8 @@ void baz(int n) {
// a_buffer[i][0..n] = a_priv[[0..n];
// CHECK: [[BASE_IDX_I:%.+]] = load i32, i32* [[IV_ADDR:%.+]],
// CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
- // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
- // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[IDX]]
+ // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
+ // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, float* [[A_BUF:%.+]], i64 [[IDX]]
// CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[A_PRIV_ADDR:%.+]], i64 0, i64 0
// CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
// CHECK: [[DEST:%.+]] = bitcast float* [[A_BUF_IDX]] to i8*
@@ -194,7 +193,7 @@ void baz(int n) {
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SRC]], i64 [[BYTES]], i1 false)
// b_buffer[i] = b_priv;
- // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 [[BASE_IDX]]
+ // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, double* [[B_BUF:%.+]], i64 [[BASE_IDX]]
// CHECK: [[B_PRIV:%.+]] = load double, double* [[B_PRIV_ADDR]],
// CHECK: store double [[B_PRIV]], double* [[B_BUF_IDX]],
// CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
@@ -305,7 +304,6 @@ void baz(int n) {
// CHECK: [[LOOP_CONTINUE]]:
// CHECK: call void @llvm.stackrestore(i8* %
// CHECK: call void @__kmpc_for_static_fini(
- // CHECK: call void @llvm.stackrestore(i8*
}
}
More information about the cfe-commits
mailing list