[clang] fb80e67 - [OPENMP50]Codegen for scan directive in simd loops.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 11 06:08:30 PDT 2020


Author: Alexey Bataev
Date: 2020-06-11T09:01:23-04:00
New Revision: fb80e67f10eea7177b0ff9c618c8231363b6f2fc

URL: https://github.com/llvm/llvm-project/commit/fb80e67f10eea7177b0ff9c618c8231363b6f2fc
DIFF: https://github.com/llvm/llvm-project/commit/fb80e67f10eea7177b0ff9c618c8231363b6f2fc.diff

LOG: [OPENMP50]Codegen for scan directive in simd loops.

Added codegen for scandirectives in simd loop. The codegen transforms
original code:

```
int x = 0;
 #pragma omp simd reduction(inscan, +: x)
for (..) {
  <first part>
  #pragma omp scan inclusive(x)
  <second part>
}
```
into
```
int x = 0;
for (..) {
  int x_priv = 0;
  <first part>
  x = x_priv + x;
  x_priv = x;
  <second part>
}
```
and
```
int x = 0;
 #pragma omp simd reduction(inscan, +: x)
for (..) {
  <first part>
  #pragma omp scan exclusive(x)
  <second part>
}
```
into
```
int x = 0;
for (..) {
  int x_priv = 0;
  <second part>
  int temp = x;
  x = x_priv + x;
  x_priv = temp;
  <first part>
}
```

Differential revision: https://reviews.llvm.org/D78232

Added: 
    clang/test/OpenMP/scan_codegen.cpp

Modified: 
    clang/lib/CodeGen/CGStmtOpenMP.cpp
    clang/lib/Sema/SemaOpenMP.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 664aeccfe7b9..a94cca758b22 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2075,6 +2075,15 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
   if (const auto *C = D.getSingleClause<OMPOrderClause>())
     if (C->getKind() == OMPC_ORDER_concurrent)
       LoopStack.setParallel(/*Enable=*/true);
+  if ((D.getDirectiveKind() == OMPD_simd ||
+       (getLangOpts().OpenMPSimd &&
+        isOpenMPSimdDirective(D.getDirectiveKind()))) &&
+      llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
+                   [](const OMPReductionClause *C) {
+                     return C->getModifier() == OMPC_REDUCTION_inscan;
+                   }))
+    // Disable parallel access in case of prefix sum.
+    LoopStack.setParallel(/*Enable=*/false);
 }
 
 void CodeGenFunction::EmitOMPSimdFinal(
@@ -2270,6 +2279,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
 }
 
 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
+  ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
+  OMPFirstScanLoop = true;
   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
     emitOMPSimdRegion(CGF, S, Action);
   };
@@ -4191,14 +4202,15 @@ void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
 }
 
 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
-  // Do not emit code for non-simd directives in simd-only mode.
-  if (getLangOpts().OpenMPSimd && !OMPParentLoopDirectiveForScan)
+  if (!OMPParentLoopDirectiveForScan)
     return;
   const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
+  bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
   SmallVector<const Expr *, 4> Shareds;
   SmallVector<const Expr *, 4> Privates;
   SmallVector<const Expr *, 4> LHSs;
   SmallVector<const Expr *, 4> RHSs;
+  SmallVector<const Expr *, 4> ReductionOps;
   SmallVector<const Expr *, 4> CopyOps;
   SmallVector<const Expr *, 4> CopyArrayTemps;
   SmallVector<const Expr *, 4> CopyArrayElems;
@@ -4209,13 +4221,109 @@ void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
     Privates.append(C->privates().begin(), C->privates().end());
     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
     CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
     CopyArrayTemps.append(C->copy_array_temps().begin(),
                           C->copy_array_temps().end());
     CopyArrayElems.append(C->copy_array_elems().begin(),
                           C->copy_array_elems().end());
   }
-  bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
+  if (ParentDir.getDirectiveKind() == OMPD_simd ||
+      (getLangOpts().OpenMPSimd &&
+       isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
+    // For simd directive and simd-based directives in simd only mode, use the
+    // following codegen:
+    // int x = 0;
+    // #pragma omp simd reduction(inscan, +: x)
+    // for (..) {
+    //   <first part>
+    //   #pragma omp scan inclusive(x)
+    //   <second part>
+    //  }
+    // is transformed to:
+    // int x = 0;
+    // for (..) {
+    //   int x_priv = 0;
+    //   <first part>
+    //   x = x_priv + x;
+    //   x_priv = x;
+    //   <second part>
+    // }
+    // and
+    // int x = 0;
+    // #pragma omp simd reduction(inscan, +: x)
+    // for (..) {
+    //   <first part>
+    //   #pragma omp scan exclusive(x)
+    //   <second part>
+    // }
+    // to
+    // int x = 0;
+    // for (..) {
+    //   int x_priv = 0;
+    //   <second part>
+    //   int temp = x;
+    //   x = x_priv + x;
+    //   x_priv = temp;
+    //   <first part>
+    // }
+    llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
+    EmitBranch(IsInclusive
+                   ? OMPScanReduce
+                   : BreakContinueStack.back().ContinueBlock.getBlock());
+    EmitBlock(OMPScanDispatch);
+    {
+      // New scope for correct construction/destruction of temp variables for
+      // exclusive scan.
+      LexicalScope Scope(*this, S.getSourceRange());
+      EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
+      EmitBlock(OMPScanReduce);
+      if (!IsInclusive) {
+        // Create temp var and copy LHS value to this temp value.
+        // TMP = LHS;
+        for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
+          const Expr *PrivateExpr = Privates[I];
+          const Expr *TempExpr = CopyArrayTemps[I];
+          EmitAutoVarDecl(
+              *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
+          LValue DestLVal = EmitLValue(TempExpr);
+          LValue SrcLVal = EmitLValue(LHSs[I]);
+          EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
+                      SrcLVal.getAddress(*this),
+                      cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
+                      cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
+                      CopyOps[I]);
+        }
+      }
+      CGM.getOpenMPRuntime().emitReduction(
+          *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
+          {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
+      for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
+        const Expr *PrivateExpr = Privates[I];
+        LValue DestLVal;
+        LValue SrcLVal;
+        if (IsInclusive) {
+          DestLVal = EmitLValue(RHSs[I]);
+          SrcLVal = EmitLValue(LHSs[I]);
+        } else {
+          const Expr *TempExpr = CopyArrayTemps[I];
+          DestLVal = EmitLValue(RHSs[I]);
+          SrcLVal = EmitLValue(TempExpr);
+        }
+        EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
+                    SrcLVal.getAddress(*this),
+                    cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
+                    cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
+                    CopyOps[I]);
+      }
+    }
+    EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
+    OMPScanExitBlock = IsInclusive
+                           ? BreakContinueStack.back().ContinueBlock.getBlock()
+                           : OMPScanReduce;
+    EmitBlock(OMPAfterScanBlock);
+    return;
+  }
   if (!IsInclusive) {
     EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
     EmitBlock(OMPScanExitBlock);
@@ -6313,6 +6421,7 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective(
     }
     if (isOpenMPSimdDirective(D.getDirectiveKind())) {
       (void)GlobalsScope.Privatize();
+      ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
       emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
     } else {
       if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {

diff  --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index c689280e0ccd..3235d1d1ebb6 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -15150,24 +15150,37 @@ static bool actOnOMPReductionKindClause(
           S.ActOnFinishFullExpr(CopyOpRes.get(), /*DiscardedValue=*/true);
       if (!CopyOpRes.isUsable())
         continue;
-      // Build temp array for prefix sum.
-      auto *Dim = new (S.Context)
-          OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue);
-      QualType ArrayTy =
-          S.Context.getVariableArrayType(PrivateTy, Dim, ArrayType::Normal,
-                                         /*IndexTypeQuals=*/0, {ELoc, ELoc});
-      VarDecl *TempArrayVD =
-          buildVarDecl(S, ELoc, ArrayTy, D->getName(),
-                       D->hasAttrs() ? &D->getAttrs() : nullptr);
-      // Add a constructor to the temp decl.
-      S.ActOnUninitializedDecl(TempArrayVD);
-      TempArrayRes = buildDeclRefExpr(S, TempArrayVD, ArrayTy, ELoc);
-      TempArrayElem =
-          S.DefaultFunctionArrayLvalueConversion(TempArrayRes.get());
-      auto *Idx = new (S.Context)
-          OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue);
-      TempArrayElem = S.CreateBuiltinArraySubscriptExpr(TempArrayElem.get(),
-                                                        ELoc, Idx, ELoc);
+      // For simd directive and simd-based directives in simd mode no need to
+      // construct temp array, need just a single temp element.
+      if (Stack->getCurrentDirective() == OMPD_simd ||
+          (S.getLangOpts().OpenMPSimd &&
+           isOpenMPSimdDirective(Stack->getCurrentDirective()))) {
+        VarDecl *TempArrayVD =
+            buildVarDecl(S, ELoc, PrivateTy, D->getName(),
+                         D->hasAttrs() ? &D->getAttrs() : nullptr);
+        // Add a constructor to the temp decl.
+        S.ActOnUninitializedDecl(TempArrayVD);
+        TempArrayRes = buildDeclRefExpr(S, TempArrayVD, PrivateTy, ELoc);
+      } else {
+        // Build temp array for prefix sum.
+        auto *Dim = new (S.Context)
+            OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue);
+        QualType ArrayTy =
+            S.Context.getVariableArrayType(PrivateTy, Dim, ArrayType::Normal,
+                                           /*IndexTypeQuals=*/0, {ELoc, ELoc});
+        VarDecl *TempArrayVD =
+            buildVarDecl(S, ELoc, ArrayTy, D->getName(),
+                         D->hasAttrs() ? &D->getAttrs() : nullptr);
+        // Add a constructor to the temp decl.
+        S.ActOnUninitializedDecl(TempArrayVD);
+        TempArrayRes = buildDeclRefExpr(S, TempArrayVD, ArrayTy, ELoc);
+        TempArrayElem =
+            S.DefaultFunctionArrayLvalueConversion(TempArrayRes.get());
+        auto *Idx = new (S.Context)
+            OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue);
+        TempArrayElem = S.CreateBuiltinArraySubscriptExpr(TempArrayElem.get(),
+                                                          ELoc, Idx, ELoc);
+      }
     }
 
     // OpenMP [2.15.4.6, Restrictions, p.2]

diff  --git a/clang/test/OpenMP/scan_codegen.cpp b/clang/test/OpenMP/scan_codegen.cpp
new file mode 100644
index 000000000000..3dc9c232974e
--- /dev/null
+++ b/clang/test/OpenMP/scan_codegen.cpp
@@ -0,0 +1,277 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+//
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+void foo();
+void bar();
+
+// CHECK-LABEL: baz
+void baz() {
+  int a = 0;
+
+  // CHECK: store i32 0, i32* [[A_ADDR:%.+]],
+  // CHECK: store i32 0, i32* [[OMP_CNT:%.+]],
+  // CHECK: br label %[[OMP_HEADER:.+]]
+
+  // CHECK: [[OMP_HEADER]]:
+  // CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]],
+  // CHECK: [[CMP:%.+]] = icmp slt i32 [[CNT_VAL]], 10
+  // CHECK: br i1 [[CMP]], label %[[OMP_BODY:.+]], label %[[OMP_END:.+]]
+#pragma omp simd reduction(inscan, + : a)
+  for (int i = 0; i < 10; ++i) {
+    // CHECK: [[OMP_BODY]]:
+
+    // i = OMP_CNT*1 + 0;
+    // CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]],
+    // CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT_VAL]], 1
+    // CHECK: [[ADD:%.+]] = add nsw i32 0, [[MUL]]
+    // CHECK: store i32 [[ADD]], i32* [[I_ADDR:%.+]],
+
+    // A_PRIV = 0;
+    // CHECK: store i32 0, i32* [[A_PRIV_ADDR:%.+]],
+
+    // goto DISPATCH;
+    // CHECK: br label %[[DISPATCH:[^,]+]]
+
+    // INPUT_PHASE:
+    // foo();
+    // goto REDUCE;
+    // CHECK: [[INPUT_PHASE:.+]]:
+    // CHECK: call void @{{.*}}foo{{.*}}()
+    // CHECK: br label %[[REDUCE:[^,]+]]
+    foo();
+
+    // DISPATCH:
+    // goto INPUT_PHASE;
+    // CHECK: [[DISPATCH]]:
+    // CHECK: br label %[[INPUT_PHASE]]
+
+    // REDUCE:
+    // A = A_PRIV + A;
+    // A_PRIV = A;
+    // goto SCAN_PHASE;
+    // CHECK: [[REDUCE]]:
+    // CHECK: [[A:%.+]] = load i32, i32* [[A_ADDR]],
+    // CHECK: [[A_PRIV:%.+]] = load i32, i32* [[A_PRIV_ADDR]],
+    // CHECK: [[SUM:%.+]] = add nsw i32 [[A]], [[A_PRIV]]
+    // CHECK: store i32 [[SUM]], i32* [[A_ADDR]],
+    // CHECK: [[A:%.+]] = load i32, i32* [[A_ADDR]],
+    // CHECK: store i32 [[A]], i32* [[A_PRIV_ADDR]],
+    // CHECK: br label %[[SCAN_PHASE:[^,]+]]
+#pragma omp scan inclusive(a)
+
+    // SCAN_PHASE:
+    // bar();
+    // goto CONTINUE;
+    // CHECK: [[SCAN_PHASE]]:
+    // CHECK: call void @{{.*}}bar{{.*}}()
+    // CHECK: br label %[[CONTINUE:[^,]+]]
+    bar();
+
+    // CHECK: [[CONTINUE]]:
+    // CHECK: br label %[[INC_BLOCK:[^,]+]]
+
+    // ++OMP_CNT;
+    // CHECK: [[INC_BLOCK]]:
+    // CHECK: [[CNT:%.+]] = load i32, i32* [[OMP_CNT]],
+    // CHECK: [[INC:%.+]] = add nsw i32 [[CNT]], 1
+    // CHECK: store i32 [[INC]], i32* [[OMP_CNT]],
+    // CHECK: br label %[[OMP_HEADER]]
+  }
+  // CHECK: [[OMP_END]]:
+}
+
+struct S {
+  int a;
+  S() {}
+  ~S() {}
+  S& operator+(const S&);
+  S& operator=(const S&);
+};
+
+// CHECK-LABEL: xyz
+void xyz() {
+  S s[2];
+
+  // CHECK: [[S_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ADDR:%.+]], i{{.+}} 0, i{{.+}} 0
+  // CHECK: [[S_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_BEGIN]], i{{.+}} 2
+  // CHECK: br label %[[ARRAY_INIT:.+]]
+  // CHECK: [[ARRAY_INIT]]:
+  // CHECK: [[S_CUR:%.+]] = phi %struct.S* [ [[S_BEGIN]], %{{.+}} ], [ [[S_NEXT:%.+]], %[[ARRAY_INIT]] ]
+  // CHECK: call void [[CONSTR:@.+]](%struct.S* [[S_CUR]])
+  // CHECK: [[S_NEXT]] = getelementptr inbounds %struct.S, %struct.S* [[S_CUR]], i{{.+}} 1
+  // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_NEXT]], [[S_END]]
+  // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_INIT]]
+  // CHECK: [[DONE]]:
+  // CHECK: store i32 0, i32* [[OMP_CNT:%.+]],
+  // CHECK: br label %[[OMP_HEADER:.+]]
+
+  // CHECK: [[OMP_HEADER]]:
+  // CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]],
+  // CHECK: [[CMP:%.+]] = icmp slt i32 [[CNT_VAL]], 10
+  // CHECK: br i1 [[CMP]], label %[[OMP_BODY:.+]], label %[[OMP_END:.+]]
+#pragma omp simd reduction(inscan, + : s)
+  for (int i = 0; i < 10; ++i) {
+    // CHECK: [[OMP_BODY]]:
+
+    // i = OMP_CNT*1 + 0;
+    // CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]],
+    // CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT_VAL]], 1
+    // CHECK: [[ADD:%.+]] = add nsw i32 0, [[MUL]]
+    // CHECK: store i32 [[ADD]], i32* [[I_ADDR:%.+]],
+
+    // S S_PRIV[2];
+    // CHECK: [[S_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_PRIV_ADDR:%.+]], i{{.+}} 0, i{{.+}} 0
+    // CHECK: [[S_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_BEGIN]], i{{.+}} 2
+    // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_BEGIN]], [[S_END]]
+    // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_INIT:[^,]+]]
+    // CHECK: [[ARRAY_INIT]]:
+    // CHECK: [[S_CUR:%.+]] = phi %struct.S* [ [[S_BEGIN]], %[[OMP_BODY]] ], [ [[S_NEXT:%.+]], %[[ARRAY_INIT]] ]
+    // CHECK: call void [[CONSTR]](%struct.S* [[S_CUR]])
+    // CHECK: [[S_NEXT]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_CUR]], i{{.+}} 1
+    // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_NEXT]], [[S_END]]
+    // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_INIT]]
+    // CHECK: [[DONE]]:
+    // CHECK: [[LHS_BEGIN:%.+]] = bitcast [2 x %struct.S]* [[S_ADDR]] to %struct.S*
+    // CHECK: [[RHS_BEGIN:%.+]] = bitcast [2 x %struct.S]* [[S_PRIV_ADDR]] to %struct.S*
+
+    // goto DISPATCH;
+    // CHECK: br label %[[DISPATCH:[^,]+]]
+
+    // SCAN_PHASE:
+    // foo();
+    // goto CONTINUE;
+    // CHECK: [[SCAN_PHASE:.+]]:
+    // CHECK: call void @{{.*}}foo{{.*}}()
+    // CHECK: br label %[[CONTINUE:[^,]+]]
+    foo();
+
+    // DISPATCH:
+    // goto INPUT_PHASE;
+    // CHECK: [[DISPATCH]]:
+    // CHECK: br label %[[INPUT_PHASE:[^,]+]]
+
+    // REDUCE:
+    // TEMP = S;
+    // S = S_PRIV + S;
+    // S_PRIV = TEMP;
+    // goto SCAN_PHASE;
+    // CHECK: [[REDUCE:.+]]:
+
+    // S TEMP[2];
+    // CHECK: [[TEMP_ARR_BEG:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TEMP_ARR:%.+]], i32 0, i32 0
+    // CHECK: [[TEMP_ARR_END:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[TEMP_ARR_BEG]], i64 2
+    // CHECK: br label %[[BODY:[^,]+]]
+    // CHECK: [[BODY]]:
+    // CHECK: [[CUR:%.+]] = phi %struct.S* [ [[TEMP_ARR_BEG]], %[[REDUCE]] ], [ [[NEXT:%.+]], %[[BODY]] ]
+    // CHECK: call void [[CONSTR]](%struct.S* [[CUR]])
+    // CHECK: [[NEXT]] = getelementptr inbounds %struct.S, %struct.S* [[CUR]], i64 1
+    // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[NEXT]], [[TEMP_ARR_END]]
+    // CHECK: br i1 [[IS_DONE]], label %[[EXIT:[^,]+]], label %[[BODY]]
+    // CHECK: [[EXIT]]:
+
+    // TEMP = S;
+    // CHECK: [[TEMP_ARR_BEG:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TEMP_ARR]], i32 0, i32 0
+    // CHECK: [[TEMP_ARR_END:%.+]] = getelementptr %struct.S, %struct.S* [[TEMP_ARR_BEG]], i64 2
+    // CHECK: [[IS_EMPTY:%.+]] = icmp eq %struct.S* [[TEMP_ARR_BEG]], [[TEMP_ARR_END]]
+    // CHECK: br i1 [[IS_EMPTY]], label %[[EXIT:[^,]+]], label %[[BODY:[^,]+]]
+    // CHECK: [[BODY]]:
+    // CHECK: [[CUR_SRC:%.+]] = phi %struct.S* [ [[LHS_BEGIN]], %{{.+}} ], [ [[SRC_NEXT:%.+]], %[[BODY]] ]
+    // CHECK: [[CUR_DEST:%.+]] = phi %struct.S* [ [[TEMP_ARR_BEG]], %{{.+}} ], [ [[DEST_NEXT:%.+]], %[[BODY]] ]
+    // CHECK: call {{.*}}%struct.S* [[S_COPY:@.+]](%struct.S* [[CUR_DEST]], %struct.S* {{.*}}[[CUR_SRC]])
+    // CHECK: [[DEST_NEXT:%.+]] = getelementptr %struct.S, %struct.S* [[CUR_DEST]], i32 1
+    // CHECK: [[SRC_NEXT:%.+]] = getelementptr %struct.S, %struct.S* [[CUR_SRC]], i32 1
+    // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[DEST_NEXT]], [[TEMP_ARR_END]]
+    // CHECK: br i1 [[IS_DONE]], label %[[EXIT]], label %[[BODY]]
+    // CHECK: [[EXIT]]:
+
+    // S = S_PRIV + S;
+    // CHECK: [[LHS_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[LHS_BEGIN]], i{{.+}} 2
+    // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[LHS_BEGIN]], [[LHS_END]]
+    // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_REDUCE_COPY:[^,]+]]
+    // CHECK: [[ARRAY_REDUCE_COPY]]:
+    // CHECK: [[SRC_CUR:%.+]] = phi %struct.S* [ [[RHS_BEGIN]], %[[EXIT]] ], [ [[SRC_NEXT:%.+]], %[[ARRAY_REDUCE_COPY]] ]
+    // CHECK: [[DEST_CUR:%.+]] = phi %struct.S* [ [[LHS_BEGIN]], %[[EXIT]] ], [ [[DEST_NEXT:%.+]], %[[ARRAY_REDUCE_COPY]] ]
+    // CHECK: [[SUM:%.+]] = call {{.*}}%struct.S* @{{.+}}(%struct.S* [[DEST_CUR]], %struct.S* {{.*}}[[SRC_CUR]])
+    // CHECK: call {{.*}}%struct.S* [[S_COPY]](%struct.S* [[DEST_CUR]], %struct.S* {{.*}}[[SUM]])
+    // CHECK: [[DEST_NEXT]] = getelementptr {{.*}}%struct.S, %struct.S* [[DEST_CUR]], i{{.+}} 1
+    // CHECK: [[SRC_NEXT]] = getelementptr {{.*}}%struct.S, %struct.S* [[SRC_CUR]], i{{.+}} 1
+    // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[DEST_NEXT]], [[LHS_END]]
+    // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_REDUCE_COPY]]
+    // CHECK: [[DONE]]:
+
+    // S_PRIV = TEMP;
+    // CHECK: [[TEMP_ARR_BEG:%.+]] = bitcast [2 x %struct.S]* [[TEMP_ARR]] to %struct.S*
+    // CHECK: [[RHS_END:%.+]] = getelementptr %struct.S, %struct.S* [[RHS_BEGIN]], i64 2
+    // CHECK: [[IS_EMPTY:%.+]] = icmp eq %struct.S* [[RHS_BEGIN]], [[RHS_END]]
+    // CHECK: br i1 [[IS_EMPTY]], label %[[EXIT:[^,]+]], label %[[BODY:[^,]+]]
+    // CHECK: [[BODY]]:
+    // CHECK: [[CUR_SRC:%.+]] = phi %struct.S* [ [[TEMP_ARR_BEG]], %[[DONE]] ], [ [[SRC_NEXT:%.+]], %[[BODY]] ]
+    // CHECK: [[CUR_DEST:%.+]] = phi %struct.S* [ [[RHS_BEGIN]], %[[DONE]] ], [ [[DEST_NEXT:%.+]], %[[BODY]] ]
+    // CHECK: call {{.*}}%struct.S* [[S_COPY]](%struct.S* [[CUR_DEST]], %struct.S* {{.*}}[[CUR_SRC]])
+    // CHECK: [[DEST_NEXT]] = getelementptr %struct.S, %struct.S* [[CUR_DEST]], i32 1
+    // CHECK: [[SRC_NEXT]] = getelementptr %struct.S, %struct.S* [[CUR_SRC]], i32 1
+    // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[DEST_NEXT]], [[RHS_END]]
+    // CHECK: br i1 [[IS_DONE]], label %[[DONE:[^,]+]], label %[[BODY]]
+    // CHECK: [[DONE]]:
+
+    // TEMP.~S()
+    // CHECK: [[TEMP_ARR_BEG:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TEMP_ARR]], i32 0, i32 0
+    // CHECK: [[TEMP_ARR_END:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[TEMP_ARR_BEG]], i64 2
+    // CHECK: br label %[[BODY:[^,]+]]
+    // CHECK: [[BODY]]:
+    // CHECK: [[CUR:%.+]] = phi %struct.S* [ [[TEMP_ARR_END]], %[[DONE]] ], [ [[PREV:%.+]], %[[BODY]] ]
+    // CHECK: [[PREV]] = getelementptr inbounds %struct.S, %struct.S* [[CUR]], i64 -1
+    // CHECK: call void [[DESTR:@.+]](%struct.S* [[PREV]])
+    // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[PREV]], [[TEMP_ARR_BEG]]
+    // CHECK: br i1 [[IS_DONE]], label %[[EXIT:[^,]+]], label %[[BODY]]
+    // CHECK: [[EXIT]]:
+
+    // goto SCAN_PHASE;
+    // CHECK: br label %[[SCAN_PHASE]]
+#pragma omp scan exclusive(s)
+
+    // INPUT_PHASE:
+    // bar();
+    // goto REDUCE;
+    // CHECK: [[INPUT_PHASE]]:
+    // CHECK: call void @{{.*}}bar{{.*}}()
+    // CHECK: br label %[[REDUCE]]
+    bar();
+
+    // CHECK: [[CONTINUE]]:
+
+    // S_PRIV[2].~S();
+    // CHECK: [[S_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_PRIV_ADDR]], i{{.+}} 0, i{{.+}} 0
+    // CHECK: [[S_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_BEGIN]], i{{.+}} 2
+    // CHECK: br label %[[ARRAY_DESTR:[^,]+]]
+    // CHECK: [[ARRAY_DESTR]]:
+    // CHECK: [[S_CUR:%.+]] = phi %struct.S* [ [[S_END]], %[[CONTINUE]] ], [ [[S_PREV:%.+]], %[[ARRAY_DESTR]] ]
+    // CHECK: [[S_PREV]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_CUR]], i{{.+}} -1
+    // CHECK: call void [[DESTR]](%struct.S* [[S_PREV]])
+    // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_PREV]], [[S_BEGIN]]
+    // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_DESTR]]
+    // CHECK: [[DONE]]:
+    // CHECK: br label %[[INC_BLOCK:[^,]+]]
+
+    // ++OMP_CNT;
+    // CHECK: [[INC_BLOCK]]:
+    // CHECK: [[CNT:%.+]] = load i32, i32* [[OMP_CNT]],
+    // CHECK: [[INC:%.+]] = add nsw i32 [[CNT]], 1
+    // CHECK: store i32 [[INC]], i32* [[OMP_CNT]],
+    // CHECK: br label %[[OMP_HEADER]]
+  }
+  // CHECK: [[OMP_END]]:
+}
+
+// CHECK-NOT: !{!"llvm.loop.parallel_accesses"
+
+#endif // HEADER


        


More information about the cfe-commits mailing list