[clang] ebad64d - [OpenMP][FIX] Consistently use OpenMPIRBuilder if requested

Johannes Doerfert via cfe-commits cfe-commits at lists.llvm.org
Thu Jul 30 08:21:38 PDT 2020


Author: Johannes Doerfert
Date: 2020-07-30T10:19:40-05:00
New Revision: ebad64dfe133e64d1df6b82e6ef2fb031d635b08

URL: https://github.com/llvm/llvm-project/commit/ebad64dfe133e64d1df6b82e6ef2fb031d635b08
DIFF: https://github.com/llvm/llvm-project/commit/ebad64dfe133e64d1df6b82e6ef2fb031d635b08.diff

LOG: [OpenMP][FIX] Consistently use OpenMPIRBuilder if requested

When we use the OpenMPIRBuilder for the parallel region we need to also
use it to get the thread ID (among other things) in the body. This is
because CGOpenMPRuntime::getThreadID() and
CGOpenMPRuntime::emitUpdateLocation implicitly assumes that if they are
called from within a parallel region there is a certain structure to the
code and certain members of the OMPRegionInfo are initialized. It might
make sense to initialize them even if we use the OpenMPIRBuilder but we
would preferably get rid of such state instead.

Bug reported by Anchu Rajendran Sudhakumari.

Depends on D82470.

Reviewed By: anchu-rajendran

Differential Revision: https://reviews.llvm.org/D82822

Added: 
    clang/test/OpenMP/irbuilder_nested_parallel_for.c

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/test/OpenMP/cancel_codegen.cpp
    clang/test/OpenMP/task_codegen.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index dc12286c72be..60c7081b135b 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1455,6 +1455,19 @@ void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
   }
 }
 
+static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
+                                                  SourceLocation Loc,
+                                                  SmallString<128> &Buffer) {
+  llvm::raw_svector_ostream OS(Buffer);
+  // Build debug location
+  PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
+  OS << ";" << PLoc.getFilename() << ";";
+  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
+    OS << FD->getQualifiedNameAsString();
+  OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
+  return OS.str();
+}
+
 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
                                                  SourceLocation Loc,
                                                  unsigned Flags) {
@@ -1464,6 +1477,16 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
       Loc.isInvalid())
     return getOrCreateDefaultLocation(Flags).getPointer();
 
+  // If the OpenMPIRBuilder is used we need to use it for all location handling
+  // as the clang invariants used below might be broken.
+  if (CGM.getLangOpts().OpenMPIRBuilder) {
+    SmallString<128> Buffer;
+    OMPBuilder.updateToLocation(CGF.Builder.saveIP());
+    auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
+        getIdentStringFromSourceLocation(CGF, Loc, Buffer));
+    return OMPBuilder.getOrCreateIdent(SrcLocStr, IdentFlag(Flags));
+  }
+
   assert(CGF.CurFn && "No function in current CodeGenFunction.");
 
   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
@@ -1497,15 +1520,9 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
 
   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
   if (OMPDebugLoc == nullptr) {
-    SmallString<128> Buffer2;
-    llvm::raw_svector_ostream OS2(Buffer2);
-    // Build debug location
-    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
-    OS2 << ";" << PLoc.getFilename() << ";";
-    if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
-      OS2 << FD->getQualifiedNameAsString();
-    OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
-    OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
+    SmallString<128> Buffer;
+    OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(
+        getIdentStringFromSourceLocation(CGF, Loc, Buffer));
     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
   }
   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
@@ -1519,6 +1536,16 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
                                           SourceLocation Loc) {
   assert(CGF.CurFn && "No function in current CodeGenFunction.");
+  // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
+  // the clang invariants used below might be broken.
+  if (CGM.getLangOpts().OpenMPIRBuilder) {
+    SmallString<128> Buffer;
+    OMPBuilder.updateToLocation(CGF.Builder.saveIP());
+    auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
+        getIdentStringFromSourceLocation(CGF, Loc, Buffer));
+    return OMPBuilder.getOrCreateThreadID(
+        OMPBuilder.getOrCreateIdent(SrcLocStr));
+  }
 
   llvm::Value *ThreadID = nullptr;
   // Check whether we've already cached a load of the thread id in this

diff  --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp
index a21a9db1e39a..0942c7cf4236 100644
--- a/clang/test/OpenMP/cancel_codegen.cpp
+++ b/clang/test/OpenMP/cancel_codegen.cpp
@@ -16,7 +16,6 @@
 
 float flag;
 int main (int argc, char **argv) {
-// ALL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
 #pragma omp parallel
 {
 #pragma omp cancel parallel if(flag)
@@ -42,14 +41,14 @@ int main (int argc, char **argv) {
   }
 }
 // ALL: call void @__kmpc_for_static_init_4(
-// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
+// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 3)
 // ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
 // ALL: [[EXIT]]
 // ALL: br label
 // ALL: [[CONTINUE]]
 // ALL: br label
-// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
+// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 3)
 // ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
 // ALL: [[EXIT]]
@@ -66,7 +65,7 @@ for (int i = 0; i < argc; ++i) {
 // ALL: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
 // ALL: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
 // ALL: [[THEN]]
-// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 2)
+// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 2)
 // ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
 // ALL: [[EXIT]]
@@ -148,7 +147,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK: br label
 // CHECK: [[CONTINUE]]
 // CHECK: br label
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 3)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
 // CHECK: [[EXIT]]

diff  --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c
new file mode 100644
index 000000000000..929a92827689
--- /dev/null
+++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c
@@ -0,0 +1,299 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefixes=CHECK %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -verify %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK-DEBUG %s
+
+// expected-no-diagnostics
+
+// TODO: Teach the update script to check new functions too.
+
+#ifndef HEADER
+#define HEADER
+
+// CHECK-LABEL: @_Z14parallel_for_0v(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// CHECK:       omp_parallel:
+// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*))
+// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+// CHECK:       omp.par.outlined.exit:
+// CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// CHECK:       omp.par.exit.split:
+// CHECK-NEXT:    ret void
+//
+// CHECK-DEBUG-LABEL: @_Z14parallel_for_0v(
+// CHECK-DEBUG-NEXT:  entry:
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// CHECK-DEBUG:       omp_parallel:
+// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*)), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+// CHECK-DEBUG:       omp.par.outlined.exit:
+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// CHECK-DEBUG:       omp.par.exit.split:
+// CHECK-DEBUG-NEXT:    ret void, !dbg !{{[0-9]*}}
+//
+void parallel_for_0(void) {
+#pragma omp parallel
+  {
+#pragma omp for
+    for (int i = 0; i < 100; ++i) {
+    }
+  }
+}
+
+// CHECK-LABEL: @_Z14parallel_for_1Pfid(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+// CHECK-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// CHECK:       omp_parallel:
+// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.1 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT19:%.*]]
+// CHECK:       omp.par.outlined.exit19:
+// CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// CHECK:       omp.par.exit.split:
+// CHECK-NEXT:    ret void
+//
+// CHECK-DEBUG-LABEL: @_Z14parallel_for_1Pfid(
+// CHECK-DEBUG-NEXT:  entry:
+// CHECK-DEBUG-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
+// CHECK-DEBUG-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// CHECK-DEBUG-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @12), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// CHECK-DEBUG:       omp_parallel:
+// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @12, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.1 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT19:%.*]]
+// CHECK-DEBUG:       omp.par.outlined.exit19:
+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// CHECK-DEBUG:       omp.par.exit.split:
+// CHECK-DEBUG-NEXT:    ret void, !dbg !{{[0-9]*}}
+//
+void parallel_for_1(float *r, int a, double b) {
+#pragma omp parallel
+  {
+#pragma omp parallel
+    {
+#pragma omp for
+      for (int i = 0; i < 100; ++i) {
+        *r = a + b;
+      }
+    }
+  }
+}
+
+// CHECK-LABEL: @_Z14parallel_for_2Pfid(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT:    [[DOTOMP_IV212:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP213:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_LB214:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_UB215:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_STRIDE216:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_IS_LAST217:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[I218:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+// CHECK-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// CHECK:       omp_parallel:
+// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT211:%.*]]
+// CHECK:       omp.par.outlined.exit211:
+// CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// CHECK:       omp.par.exit.split:
+// CHECK-NEXT:    store i32 0, i32* [[DOTOMP_LB214]], align 4
+// CHECK-NEXT:    store i32 99, i32* [[DOTOMP_UB215]], align 4
+// CHECK-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE216]], align 4
+// CHECK-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST217]], align 4
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM219:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @41)
+// CHECK-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @2, i32 [[OMP_GLOBAL_THREAD_NUM219]], i32 34, i32* [[DOTOMP_IS_LAST217]], i32* [[DOTOMP_LB214]], i32* [[DOTOMP_UB215]], i32* [[DOTOMP_STRIDE216]], i32 1, i32 1)
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4
+// CHECK-NEXT:    [[CMP220:%.*]] = icmp sgt i32 [[TMP0]], 99
+// CHECK-NEXT:    br i1 [[CMP220]], label [[COND_TRUE221:%.*]], label [[COND_FALSE222:%.*]]
+// CHECK:       cond.true221:
+// CHECK-NEXT:    br label [[COND_END223:%.*]]
+// CHECK:       cond.false222:
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4
+// CHECK-NEXT:    br label [[COND_END223]]
+// CHECK:       cond.end223:
+// CHECK-NEXT:    [[COND224:%.*]] = phi i32 [ 99, [[COND_TRUE221]] ], [ [[TMP1]], [[COND_FALSE222]] ]
+// CHECK-NEXT:    store i32 [[COND224]], i32* [[DOTOMP_UB215]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB214]], align 4
+// CHECK-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_IV212]], align 4
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND225:%.*]]
+// CHECK:       omp.inner.for.cond225:
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4
+// CHECK-NEXT:    [[CMP226:%.*]] = icmp sle i32 [[TMP3]], [[TMP4]]
+// CHECK-NEXT:    br i1 [[CMP226]], label [[OMP_INNER_FOR_BODY227:%.*]], label [[OMP_INNER_FOR_END236:%.*]]
+// CHECK:       omp.inner.for.body227:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4
+// CHECK-NEXT:    [[MUL228:%.*]] = mul nsw i32 [[TMP5]], 1
+// CHECK-NEXT:    [[ADD229:%.*]] = add nsw i32 0, [[MUL228]]
+// CHECK-NEXT:    store i32 [[ADD229]], i32* [[I218]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// CHECK-NEXT:    [[CONV230:%.*]] = sitofp i32 [[TMP6]] to double
+// CHECK-NEXT:    [[TMP7:%.*]] = load double, double* [[B_ADDR]], align 8
+// CHECK-NEXT:    [[ADD231:%.*]] = fadd double [[CONV230]], [[TMP7]]
+// CHECK-NEXT:    [[CONV232:%.*]] = fptrunc double [[ADD231]] to float
+// CHECK-NEXT:    [[TMP8:%.*]] = load float*, float** [[R_ADDR]], align 8
+// CHECK-NEXT:    store float [[CONV232]], float* [[TMP8]], align 4
+// CHECK-NEXT:    br label [[OMP_BODY_CONTINUE233:%.*]]
+// CHECK:       omp.body.continue233:
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_INC234:%.*]]
+// CHECK:       omp.inner.for.inc234:
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4
+// CHECK-NEXT:    [[ADD235:%.*]] = add nsw i32 [[TMP9]], 1
+// CHECK-NEXT:    store i32 [[ADD235]], i32* [[DOTOMP_IV212]], align 4
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND225]]
+// CHECK:       omp.inner.for.end236:
+// CHECK-NEXT:    br label [[OMP_LOOP_EXIT237:%.*]]
+// CHECK:       omp.loop.exit237:
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM238:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @43)
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @2, i32 [[OMP_GLOBAL_THREAD_NUM238]])
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM239:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @7, i32 [[OMP_GLOBAL_THREAD_NUM239]])
+// CHECK-NEXT:    ret void
+//
+// CHECK-DEBUG-LABEL: @_Z14parallel_for_2Pfid(
+// CHECK-DEBUG-NEXT:  entry:
+// CHECK-DEBUG-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
+// CHECK-DEBUG-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// CHECK-DEBUG-NEXT:    [[DOTOMP_IV212:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[TMP213:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[DOTOMP_LB214:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[DOTOMP_UB215:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[DOTOMP_STRIDE216:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[DOTOMP_IS_LAST217:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    [[I218:%.*]] = alloca i32, align 4
+// CHECK-DEBUG-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @25), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// CHECK-DEBUG:       omp_parallel:
+// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @25, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT211:%.*]]
+// CHECK-DEBUG:       omp.par.outlined.exit211:
+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// CHECK-DEBUG:       omp.par.exit.split:
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IV212]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[DOTOMP_LB214]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 0, i32* [[DOTOMP_LB214]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[DOTOMP_UB215]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 99, i32* [[DOTOMP_UB215]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[DOTOMP_STRIDE216]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE216]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IS_LAST217]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST217]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[I218]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM219:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @97)
+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @96, i32 [[OMP_GLOBAL_THREAD_NUM219]], i32 34, i32* [[DOTOMP_IS_LAST217]], i32* [[DOTOMP_LB214]], i32* [[DOTOMP_UB215]], i32* [[DOTOMP_STRIDE216]], i32 1, i32 1), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[CMP220:%.*]] = icmp sgt i32 [[TMP0]], 99, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br i1 [[CMP220]], label [[COND_TRUE221:%.*]], label [[COND_FALSE222:%.*]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       cond.true221:
+// CHECK-DEBUG-NEXT:    br label [[COND_END223:%.*]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       cond.false222:
+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[COND_END223]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       cond.end223:
+// CHECK-DEBUG-NEXT:    [[COND224:%.*]] = phi i32 [ 99, [[COND_TRUE221]] ], [ [[TMP1]], [[COND_FALSE222]] ], !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 [[COND224]], i32* [[DOTOMP_UB215]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB214]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_IV212]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_INNER_FOR_COND225:%.*]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       omp.inner.for.cond225:
+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[CMP226:%.*]] = icmp sle i32 [[TMP3]], [[TMP4]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br i1 [[CMP226]], label [[OMP_INNER_FOR_BODY227:%.*]], label [[OMP_INNER_FOR_END236:%.*]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       omp.inner.for.body227:
+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[MUL228:%.*]] = mul nsw i32 [[TMP5]], 1, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[ADD229:%.*]] = add nsw i32 0, [[MUL228]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 [[ADD229]], i32* [[I218]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[CONV230:%.*]] = sitofp i32 [[TMP6]] to double, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[ADD231:%.*]] = fadd double [[CONV230]], [[TMP7]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[CONV232:%.*]] = fptrunc double [[ADD231]] to float, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store float [[CONV232]], float* [[TMP8]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_BODY_CONTINUE233:%.*]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       omp.body.continue233:
+// CHECK-DEBUG-NEXT:    br label [[OMP_INNER_FOR_INC234:%.*]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       omp.inner.for.inc234:
+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[ADD235:%.*]] = add nsw i32 [[TMP9]], 1, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    store i32 [[ADD235]], i32* [[DOTOMP_IV212]], align 4, !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    br label [[OMP_INNER_FOR_COND225]], !dbg !{{[0-9]*}}, !llvm.loop !{{[0-9]*}}
+// CHECK-DEBUG:       omp.inner.for.end236:
+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_EXIT237:%.*]], !dbg !{{[0-9]*}}
+// CHECK-DEBUG:       omp.loop.exit237:
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM238:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @100)
+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @99, i32 [[OMP_GLOBAL_THREAD_NUM238]]), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM239:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @103), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @102, i32 [[OMP_GLOBAL_THREAD_NUM239]]), !dbg !{{[0-9]*}}
+// CHECK-DEBUG-NEXT:    ret void, !dbg !{{[0-9]*}}
+//
+void parallel_for_2(float *r, int a, double b) {
+#pragma omp parallel
+  {
+#pragma omp for
+    for (int i = 0; i < 100; ++i)
+      *r = a + b;
+#pragma omp parallel
+    {
+#pragma omp for
+      for (int i = 0; i < 100; ++i)
+        *r = a + b;
+#pragma omp parallel
+      {
+#pragma omp for
+        for (int i = 0; i < 100; ++i)
+          *r = a + b;
+      }
+#pragma omp for
+      for (int i = 0; i < 100; ++i)
+        *r = a + b;
+#pragma omp parallel
+      {
+#pragma omp for
+        for (int i = 0; i < 100; ++i)
+          *r = a + b;
+      }
+#pragma omp for
+      for (int i = 0; i < 100; ++i)
+        *r = a + b;
+    }
+#pragma omp for
+    for (int i = 0; i < 100; ++i)
+      *r = a + b;
+  }
+#pragma omp for
+  for (int i = 0; i < 100; ++i)
+    *r = a + b;
+}
+
+#endif

diff  --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp
index 2a561ce7744e..922623499d49 100644
--- a/clang/test/OpenMP/task_codegen.cpp
+++ b/clang/test/OpenMP/task_codegen.cpp
@@ -33,12 +33,11 @@ int main() {
   char b;
   S s[2];
   int arr[10][a];
-// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* @{{.+}})
 // CHECK: [[B_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES:%.+]], i32 0, i32 0
 // CHECK: store i8* [[B]], i8** [[B_REF]]
 // CHECK: [[S_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES]], i32 0, i32 1
 // CHECK: store [2 x [[STRUCT_S]]]* [[S]], [2 x [[STRUCT_S]]]** [[S_REF]]
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 33, i64 40, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY1:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 33, i64 40, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY1:@.+]] to i32 (i32, i8*)*))
 // CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]]
 // CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS]]* [[CAPTURES]] to i8*
@@ -46,7 +45,7 @@ int main() {
 // CHECK: [[PRIORITY_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 4
 // CHECK: [[PRIORITY:%.+]] = bitcast %union{{.+}}* [[PRIORITY_REF_PTR]] to i32*
 // CHECK: store i32 {{.+}}, i32* [[PRIORITY]]
-// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]])
 #pragma omp task shared(a, b, s) priority(b)
   {
     a = 15;
@@ -55,7 +54,7 @@ int main() {
   }
 // CHECK: [[S_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS1]], [[STRUCT_SHAREDS1]]* [[CAPTURES:%.+]], i32 0, i32 0
 // CHECK: store [2 x [[STRUCT_S]]]* [[S]], [2 x [[STRUCT_S]]]** [[S_REF]]
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 [[GTID]], i32 1, i64 40, i64 8,
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 {{%.*}}, i32 1, i64 40, i64 8,
 // CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]]
 // CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS1]]* [[CAPTURES]] to i8*
@@ -101,20 +100,20 @@ int main() {
 // CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
 // CHECK: store i8 1, i8* [[T0]]
 // CHECK: bitcast [[KMP_DEPEND_INFO]]* [[DEP_BASE]] to i8*
-// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 4, i8* %{{[^,]+}}, i32 0, i8* null)
+// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]], i32 4, i8* %{{[^,]+}}, i32 0, i8* null)
 #pragma omp task shared(a, s) depend(in : a, b, s, arr[:])
   {
     a = 15;
     s[1].a = 10;
   }
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
-// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]])
 #pragma omp task untied
   {
 #pragma omp critical
     a = 1;
   }
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 40, i64 1,
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 40, i64 1,
 // CHECK: getelementptr inbounds [2 x [[STRUCT_S]]], [2 x [[STRUCT_S]]]* [[S]], i64 0, i64 0
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
 // CHECK: ptrtoint [[STRUCT_S]]* %{{.+}} to i64
@@ -146,12 +145,12 @@ int main() {
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
 // CHECK: store i8 3, i8*
 // CHECK: bitcast [[KMP_DEPEND_INFO]]* %{{.+}} to i8*
-// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 2, i8* %{{[^,]+}}, i32 0, i8* null)
+// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]], i32 2, i8* %{{[^,]+}}, i32 0, i8* null)
 #pragma omp task untied depend(out : s[0], arr[4:][b])
   {
     a = 1;
   }
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 40, i64 1,
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 40, i64 1,
 // CHECK: getelementptr inbounds [2 x [[STRUCT_S]]], [2 x [[STRUCT_S]]]* [[S]], i64 0, i64 0
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
 // CHECK: ptrtoint [[STRUCT_S]]* %{{.+}} to i64
@@ -183,12 +182,12 @@ int main() {
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
 // CHECK: store i8 4, i8*
 // CHECK: bitcast [[KMP_DEPEND_INFO]]* %{{.+}} to i8*
-// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 2, i8* %{{[^,]+}}, i32 0, i8* null)
+// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]], i32 2, i8* %{{[^,]+}}, i32 0, i8* null)
 #pragma omp task untied depend(mutexinoutset: s[0], arr[4:][b])
   {
     a = 1;
   }
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 40, i64 1,
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 3, i64 40, i64 1,
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
 // CHECK: store i64 ptrtoint (i32* @{{.+}} to i64), i64*
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
@@ -229,38 +228,38 @@ int main() {
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
 // CHECK: store i8 3, i8*
 // CHECK: bitcast [[KMP_DEPEND_INFO]]* %{{.+}} to i8*
-// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 3, i8* %{{[^,]+}}, i32 0, i8* null)
+// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]], i32 3, i8* %{{[^,]+}}, i32 0, i8* null)
 #pragma omp task final(true) depend(inout: a, s[1], arr[:a][3:])
   {
     a = 2;
   }
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
-// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 3, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]])
 #pragma omp task final(true)
   {
     a = 2;
   }
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
-// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+  // CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
+  // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]])
   const bool flag = false;
 #pragma omp task final(flag)
   {
     a = 3;
   }
-// CHECK: [[B_VAL:%.+]] = load i8, i8* [[B]]
-// CHECK: [[CMP:%.+]] = icmp ne i8 [[B_VAL]], 0
-// CHECK: [[FINAL:%.+]] = select i1 [[CMP]], i32 2, i32 0
-// CHECK: [[FLAGS:%.+]] = or i32 [[FINAL]], 1
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 [[FLAGS]], i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY5:@.+]] to i32 (i32, i8*)*))
-// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+  // CHECK: [[B_VAL:%.+]] = load i8, i8* [[B]]
+  // CHECK: [[CMP:%.+]] = icmp ne i8 [[B_VAL]], 0
+  // CHECK: [[FINAL:%.+]] = select i1 [[CMP]], i32 2, i32 0
+  // CHECK: [[FLAGS:%.+]] = or i32 [[FINAL]], 1
+  // CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 [[FLAGS]], i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY5:@.+]] to i32 (i32, i8*)*))
+  // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]])
   int c __attribute__((aligned(128)));
 #pragma omp task final(b) shared(c)
   {
     a = 4;
     c = 5;
   }
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*))
-// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*))
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]])
 #pragma omp task untied
   {
     S s1;


        


More information about the cfe-commits mailing list