[clang] [llvm] Codegen changes for strict modifier with grainsize/num_tasks of taskloop construct (PR #117196)

CHANDRA GHALE via cfe-commits cfe-commits at lists.llvm.org
Wed Nov 27 03:07:12 PST 2024


https://github.com/chandraghale updated https://github.com/llvm/llvm-project/pull/117196

>From d19f41d39237b3d4fd2923f037743ddd495d5c9f Mon Sep 17 00:00:00 2001
From: Chandra Ghale <ghale at pe31.hpc.amslabs.hpecorp.net>
Date: Thu, 21 Nov 2024 11:15:11 -0600
Subject: [PATCH 1/5] Initial Codegen changes for strict modifier with
 grainsize/num_tasks of taskloop construct

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         |  28 ++
 clang/lib/CodeGen/CGOpenMPRuntime.h           |   1 +
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |   2 +
 .../taskloop_strictmodifier_codegen.cpp       | 256 ++++++++++++++++++
 .../include/llvm/Frontend/OpenMP/OMPKinds.def |   3 +
 5 files changed, 290 insertions(+)
 create mode 100644 clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index cc389974e04081..361550d2f102b4 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4666,6 +4666,33 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
                                CGF.getContext().VoidPtrTy);
   }
   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
+  if( Data.HasModifier ){
+    llvm::Value *TaskArgs[] = {
+      UpLoc,
+      ThreadID,
+      Result.NewTask,
+      IfVal,
+      LBLVal.getPointer(CGF),
+      UBLVal.getPointer(CGF),
+      CGF.EmitLoadOfScalar(StLVal, Loc),
+      llvm::ConstantInt::getSigned(
+          CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
+      llvm::ConstantInt::getSigned(
+          CGF.IntTy, Data.Schedule.getPointer()
+                         ? Data.Schedule.getInt() ? NumTasks : Grainsize
+                         : NoSchedule),
+      Data.Schedule.getPointer()
+          ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+                                      /*isSigned=*/false)
+          : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+      llvm::ConstantInt::get(CGF.Int32Ty, 1), //strict modifier enabled
+      Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+                             Result.TaskDupFn, CGF.VoidPtrTy)
+                       : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                          CGM.getModule(), OMPRTL___kmpc_taskloop_5),
+                      TaskArgs);
+   } else {
   llvm::Value *TaskArgs[] = {
       UpLoc,
       ThreadID,
@@ -4690,6 +4717,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
                           CGM.getModule(), OMPRTL___kmpc_taskloop),
                       TaskArgs);
+  }
 }
 
 /// Emit reduction operation for each element of array (required for
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 5e7715743afb58..56d502d92806eb 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,6 +122,7 @@ struct OMPTaskDataTy final {
   bool IsReductionWithTaskMod = false;
   bool IsWorksharingReduction = false;
   bool HasNowaitClause = false;
+  bool HasModifier = false;
 };
 
 /// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 390516fea38498..88c862d2975174 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7831,10 +7831,12 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
     // grainsize clause
     Data.Schedule.setInt(/*IntVal=*/false);
     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
+    Data.HasModifier = (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false;
   } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
     // num_tasks clause
     Data.Schedule.setInt(/*IntVal=*/true);
     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
+    Data.HasModifier = (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false;
   }
 
   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
diff --git a/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
new file mode 100644
index 00000000000000..d84ff181f66156
--- /dev/null
+++ b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
@@ -0,0 +1,256 @@
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-LABEL: @main
+int main(int argc, char **argv) {
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[DEFLOC:@.+]])
+// CHECK: call ptr @__kmpc_omp_task_alloc(ptr [[DEFLOC]], i32 [[GTID]],
+// CHECK: call i32 @__kmpc_omp_task(ptr [[DEFLOC]], i32 [[GTID]],
+#pragma omp task
+  ;
+// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// CHECK:       [[THEN]]
+// CHECK: call void @__kmpc_taskgroup(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK: [[TASKV:%.+]] = call ptr @__kmpc_omp_task_alloc(ptr [[DEFLOC]], i32 [[GTID]], i32 33, i64 80, i64 1, ptr [[TASK1:@.+]])
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds nuw %{{.+}}, ptr [[TASKV]], i32 0, i32 0
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 0, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 9, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 7
+// CHECK: store i64 1, ptr [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: call void @__kmpc_taskloop(ptr [[DEFLOC]], i32 [[GTID]], ptr [[TASKV]], i32 1, ptr [[DOWN]], ptr [[UP]], i64 [[ST_VAL]], i32 1, i32 0, i64 0, ptr null)
+// CHECK: call void @__kmpc_end_taskgroup(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT:  call {{.*}}void @__kmpc_end_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT:  br label {{%?}}[[EXIT]]
+// CHECK:       [[EXIT]]
+#pragma omp master taskloop priority(argc)
+  for (int i = 0; i < 10; ++i)
+    ;
+// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// CHECK:       [[THEN]]
+// CHECK: [[TASKV:%.+]] = call ptr @__kmpc_omp_task_alloc(ptr [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 1, ptr [[TASK2:@.+]])
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds nuw %{{.+}}, ptr [[TASKV]], i32 0, i32 0
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 0, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 9, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 7
+// CHECK: store i64 1, ptr [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64
+// CHECK: call void @__kmpc_taskloop_5(ptr [[DEFLOC]], i32 [[GTID]], ptr [[TASKV]], i32 1, ptr [[DOWN]], ptr [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i32 1, ptr null)
+// CHECK-NEXT:  call {{.*}}void @__kmpc_end_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT:  br label {{%?}}[[EXIT]]
+// CHECK:       [[EXIT]]
+#pragma omp master taskloop nogroup grainsize(strict:argc)
+  for (int i = 0; i < 10; ++i)
+    ;
+// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// CHECK:       [[THEN]]
+// CHECK: call void @__kmpc_taskgroup(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK: [[TASKV:%.+]] = call ptr @__kmpc_omp_task_alloc(ptr [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 16, ptr [[TASK3:@.+]])
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds nuw %{{.+}}, ptr [[TASKV]], i32 0, i32 0
+// CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0
+// CHECK: [[IF_INT:%.+]] = sext i1 [[IF]] to i32
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 0, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 %{{.+}}, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 7
+// CHECK: store i64 1, ptr [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: call void @__kmpc_taskloop_5(ptr [[DEFLOC]], i32 [[GTID]], ptr [[TASKV]], i32 [[IF_INT]], ptr [[DOWN]], ptr [[UP]], i64 [[ST_VAL]], i32 1, i32 2, i64 4, i32 1, ptr null)
+// CHECK: call void @__kmpc_end_taskgroup(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT:  call {{.*}}void @__kmpc_end_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT:  br label {{%?}}[[EXIT]]
+// CHECK:       [[EXIT]]
+  int i;
+#pragma omp master taskloop if(argc) shared(argc, argv) collapse(2) num_tasks(strict: 4)
+  for (i = 0; i < argc; ++i)
+  for (int j = argc; j < argv[argc][argc]; ++j)
+    ;
+// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// CHECK:       [[THEN]]
+// CHECK: call void @__kmpc_taskgroup(
+// CHECK: call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %{{.+}}, i32 1, i64 80, i64 1, ptr [[TASK_CANCEL:@.+]])
+// CHECK: call void @__kmpc_taskloop(
+// CHECK: call void @__kmpc_end_taskgroup(
+// CHECK-NEXT:  call {{.*}}void @__kmpc_end_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT:  br label {{%?}}[[EXIT]]
+// CHECK:       [[EXIT]]
+#pragma omp master taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp cancel taskgroup
+#pragma omp cancellation point taskgroup
+  }
+}
+
+// CHECK: define internal noundef i32 [[TASK1]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr %{{.+}}, i32 0, i32 5
+// CHECK: [[DOWN_VAL:%.+]] = load i64, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 6
+// CHECK: [[UP_VAL:%.+]] = load i64, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 7
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 8
+// CHECK: [[LITER_VAL:%.+]] = load i32, ptr [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], ptr [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], ptr [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], ptr [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], ptr [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, ptr [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], ptr [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, ptr [[CNT]],
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, ptr [[UB]],
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, ptr %
+// CHECK: store i32 %
+// CHECK: load i32, ptr %
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, ptr %
+// CHECK: br label %
+// CHECK: ret i32 0
+
+// CHECK: define internal noundef i32 [[TASK2]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr %{{.+}}, i32 0, i32 5
+// CHECK: [[DOWN_VAL:%.+]] = load i64, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 6
+// CHECK: [[UP_VAL:%.+]] = load i64, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 7
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 8
+// CHECK: [[LITER_VAL:%.+]] = load i32, ptr [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], ptr [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], ptr [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], ptr [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], ptr [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, ptr [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], ptr [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, ptr [[CNT]],
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, ptr [[UB]],
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, ptr %
+// CHECK: store i32 %
+// CHECK: load i32, ptr %
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, ptr %
+// CHECK: br label %
+// CHECK: ret i32 0
+
+// CHECK: define internal noundef i32 [[TASK3]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr %{{.+}}, i32 0, i32 5
+// CHECK: [[DOWN_VAL:%.+]] = load i64, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 6
+// CHECK: [[UP_VAL:%.+]] = load i64, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 7
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 8
+// CHECK: [[LITER_VAL:%.+]] = load i32, ptr [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], ptr [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], ptr [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], ptr [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], ptr [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, ptr [[LB]],
+// CHECK: store i64 [[LB_VAL]], ptr [[CNT:%.+]],
+// CHECK: br label
+// CHECK: ret i32 0
+
+// CHECK: define internal noundef i32 [[TASK_CANCEL]](
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(ptr @{{.+}}, i32 %{{.+}}, i32 4)
+// CHECK: [[IS_CANCEL:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[IS_CANCEL]], label %[[EXIT:.+]], label %[[CONTINUE:[^,]+]]
+// CHECK: [[EXIT]]:
+// CHECK: store i32 1, ptr [[CLEANUP_SLOT:%.+]],
+// CHECK: br label %[[DONE:[^,]+]]
+// CHECK: [[CONTINUE]]:
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancellationpoint(ptr @{{.+}}, i32 %{{.+}}, i32 4)
+// CHECK: [[IS_CANCEL:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[IS_CANCEL]], label %[[EXIT2:.+]], label %[[CONTINUE2:[^,]+]]
+// CHECK: [[EXIT2]]:
+// CHECK: store i32 1, ptr [[CLEANUP_SLOT]],
+// CHECK: br label %[[DONE]]
+// CHECK: store i32 0, ptr [[CLEANUP_SLOT]],
+// CHECK: br label %[[DONE]]
+// CHECK: [[DONE]]:
+// CHECK: ret i32 0
+
+// CHECK-LABEL: @_ZN1SC2Ei
+struct S {
+  int a;
+  S(int c) {
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[DEFLOC:@.+]])
+// CHECK: [[TASKV:%.+]] = call ptr @__kmpc_omp_task_alloc(ptr [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 16, ptr [[TASK4:@.+]])
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds nuw %{{.+}}, ptr [[TASKV]], i32 0, i32 0
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 0, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 %{{.+}}, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 7
+// CHECK: store i64 1, ptr [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: [[NUM_TASKS:%.+]] = zext i32 %{{.+}} to i64
+// CHECK: call void @__kmpc_taskloop_5(ptr [[DEFLOC]], i32 [[GTID]], ptr [[TASKV]], i32 1, ptr [[DOWN]], ptr [[UP]], i64 [[ST_VAL]], i32 1, i32 2, i64 [[NUM_TASKS]], i32 1, ptr null)
+#pragma omp master taskloop shared(c) num_tasks(strict:a)
+    for (a = 0; a < c; ++a)
+      ;
+  }
+} s(1);
+
+// CHECK: define internal noundef i32 [[TASK4]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr %{{.+}}, i32 0, i32 5
+// CHECK: [[DOWN_VAL:%.+]] = load i64, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 6
+// CHECK: [[UP_VAL:%.+]] = load i64, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 7
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 8
+// CHECK: [[LITER_VAL:%.+]] = load i32, ptr [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], ptr [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], ptr [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], ptr [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], ptr [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, ptr [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], ptr [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, ptr [[CNT]],
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, ptr [[UB]],
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, ptr %
+// CHECK: store i32 %
+// CHECK: load i32, ptr %
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, ptr %
+// CHECK: br label %
+// CHECK: ret i32 0
+
+#endif
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 6f26f853eca032..928a03148c4165 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -365,6 +365,9 @@ __OMP_RTL(__kmpc_omp_task_with_deps, false, Int32, IdentPtr, Int32,
 __OMP_RTL(__kmpc_taskloop, false, Void, IdentPtr, /* Int */ Int32, VoidPtr,
           /* Int */ Int32, Int64Ptr, Int64Ptr, Int64, /* Int */ Int32,
           /* Int */ Int32, Int64, VoidPtr)
+__OMP_RTL(__kmpc_taskloop_5, false, Void, IdentPtr, /* Int */ Int32, VoidPtr,
+          /* Int */ Int32, Int64Ptr, Int64Ptr, Int64, /* Int */ Int32,
+          /* Int */ Int32, Int64, Int32, VoidPtr)
 __OMP_RTL(__kmpc_omp_target_task_alloc, false, /* kmp_task_t */ VoidPtr,
           IdentPtr, Int32, Int32, SizeTy, SizeTy, TaskRoutineEntryPtr, Int64)
 __OMP_RTL(__kmpc_taskred_modifier_init, false, /* kmp_taskgroup */ VoidPtr,

>From 01db551ef4ec1ef6f87e14838404c4b8f6838b11 Mon Sep 17 00:00:00 2001
From: Chandra Ghale <ghale at pe31.hpc.amslabs.hpecorp.net>
Date: Thu, 21 Nov 2024 12:41:12 -0600
Subject: [PATCH 2/5] clang-format changes

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 100 +++++++++++++-------------
 clang/lib/CodeGen/CGStmtOpenMP.cpp    |   6 +-
 2 files changed, 54 insertions(+), 52 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 361550d2f102b4..50080ff9fa6b14 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4666,57 +4666,57 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
                                CGF.getContext().VoidPtrTy);
   }
   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
-  if( Data.HasModifier ){
+  if (Data.HasModifier) {
     llvm::Value *TaskArgs[] = {
-      UpLoc,
-      ThreadID,
-      Result.NewTask,
-      IfVal,
-      LBLVal.getPointer(CGF),
-      UBLVal.getPointer(CGF),
-      CGF.EmitLoadOfScalar(StLVal, Loc),
-      llvm::ConstantInt::getSigned(
-          CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
-      llvm::ConstantInt::getSigned(
-          CGF.IntTy, Data.Schedule.getPointer()
-                         ? Data.Schedule.getInt() ? NumTasks : Grainsize
-                         : NoSchedule),
-      Data.Schedule.getPointer()
-          ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
-                                      /*isSigned=*/false)
-          : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
-      llvm::ConstantInt::get(CGF.Int32Ty, 1), //strict modifier enabled
-      Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-                             Result.TaskDupFn, CGF.VoidPtrTy)
-                       : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
-  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-                          CGM.getModule(), OMPRTL___kmpc_taskloop_5),
-                      TaskArgs);
-   } else {
-  llvm::Value *TaskArgs[] = {
-      UpLoc,
-      ThreadID,
-      Result.NewTask,
-      IfVal,
-      LBLVal.getPointer(CGF),
-      UBLVal.getPointer(CGF),
-      CGF.EmitLoadOfScalar(StLVal, Loc),
-      llvm::ConstantInt::getSigned(
-          CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
-      llvm::ConstantInt::getSigned(
-          CGF.IntTy, Data.Schedule.getPointer()
-                         ? Data.Schedule.getInt() ? NumTasks : Grainsize
-                         : NoSchedule),
-      Data.Schedule.getPointer()
-          ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
-                                      /*isSigned=*/false)
-          : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
-      Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-                             Result.TaskDupFn, CGF.VoidPtrTy)
-                       : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
-  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-                          CGM.getModule(), OMPRTL___kmpc_taskloop),
-                      TaskArgs);
+        UpLoc,
+        ThreadID,
+        Result.NewTask,
+        IfVal,
+        LBLVal.getPointer(CGF),
+        UBLVal.getPointer(CGF),
+        CGF.EmitLoadOfScalar(StLVal, Loc),
+        llvm::ConstantInt::getSigned(
+            CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
+        llvm::ConstantInt::getSigned(
+            CGF.IntTy, Data.Schedule.getPointer()
+                           ? Data.Schedule.getInt() ? NumTasks : Grainsize
+                           : NoSchedule),
+        Data.Schedule.getPointer()
+            ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+                                        /*isSigned=*/false)
+            : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+        llvm::ConstantInt::get(CGF.Int32Ty, 1), // strict modifier enabled
+        Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+                               Result.TaskDupFn, CGF.VoidPtrTy)
+                         : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                            CGM.getModule(), OMPRTL___kmpc_taskloop_5),
+                        TaskArgs);
+  } else {
+    llvm::Value *TaskArgs[] = {
+        UpLoc,
+        ThreadID,
+        Result.NewTask,
+        IfVal,
+        LBLVal.getPointer(CGF),
+        UBLVal.getPointer(CGF),
+        CGF.EmitLoadOfScalar(StLVal, Loc),
+        llvm::ConstantInt::getSigned(
+            CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
+        llvm::ConstantInt::getSigned(
+            CGF.IntTy, Data.Schedule.getPointer()
+                           ? Data.Schedule.getInt() ? NumTasks : Grainsize
+                           : NoSchedule),
+        Data.Schedule.getPointer()
+            ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+                                        /*isSigned=*/false)
+            : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+        Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+                               Result.TaskDupFn, CGF.VoidPtrTy)
+                         : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                            CGM.getModule(), OMPRTL___kmpc_taskloop),
+                        TaskArgs);
   }
 }
 
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 88c862d2975174..6cb37b20b7aeee 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7831,12 +7831,14 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
     // grainsize clause
     Data.Schedule.setInt(/*IntVal=*/false);
     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
-    Data.HasModifier = (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false;
+    Data.HasModifier =
+        (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false;
   } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
     // num_tasks clause
     Data.Schedule.setInt(/*IntVal=*/true);
     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
-    Data.HasModifier = (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false;
+    Data.HasModifier =
+        (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false;
   }
 
   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {

>From 315287d8c984650e61fe99c3dd797919dcfa9d33 Mon Sep 17 00:00:00 2001
From: Chandra Ghale <ghale at pe31.hpc.amslabs.hpecorp.net>
Date: Fri, 22 Nov 2024 06:38:34 -0600
Subject: [PATCH 3/5] using smallvector to reduce redundant code

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 71 ++++++++++-----------------
 1 file changed, 26 insertions(+), 45 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 50080ff9fa6b14..30b84656cfad6e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4666,58 +4666,39 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
                                CGF.getContext().VoidPtrTy);
   }
   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
-  if (Data.HasModifier) {
-    llvm::Value *TaskArgs[] = {
-        UpLoc,
-        ThreadID,
-        Result.NewTask,
-        IfVal,
-        LBLVal.getPointer(CGF),
-        UBLVal.getPointer(CGF),
-        CGF.EmitLoadOfScalar(StLVal, Loc),
-        llvm::ConstantInt::getSigned(
-            CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
-        llvm::ConstantInt::getSigned(
-            CGF.IntTy, Data.Schedule.getPointer()
-                           ? Data.Schedule.getInt() ? NumTasks : Grainsize
-                           : NoSchedule),
-        Data.Schedule.getPointer()
-            ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
-                                        /*isSigned=*/false)
-            : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
-        llvm::ConstantInt::get(CGF.Int32Ty, 1), // strict modifier enabled
-        Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+  llvm::SmallVector<llvm::Value *, 12> TaskArgs{
+      UpLoc,
+      ThreadID,
+      Result.NewTask,
+      IfVal,
+      LBLVal.getPointer(CGF),
+      UBLVal.getPointer(CGF),
+      CGF.EmitLoadOfScalar(StLVal, Loc),
+      llvm::ConstantInt::getSigned(
+          CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
+      llvm::ConstantInt::getSigned(
+          CGF.IntTy, Data.Schedule.getPointer()
+                         ? Data.Schedule.getInt() ? NumTasks : Grainsize
+                         : NoSchedule),
+      Data.Schedule.getPointer()
+          ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+                                      /*isSigned=*/false)
+          : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
+  if (Data.HasModifier)
+    TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
+
+  TaskArgs.push_back(Result.TaskDupFn
+                         ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
                                Result.TaskDupFn, CGF.VoidPtrTy)
-                         : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+                         : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
+  if (Data.HasModifier)
     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
                             CGM.getModule(), OMPRTL___kmpc_taskloop_5),
                         TaskArgs);
-  } else {
-    llvm::Value *TaskArgs[] = {
-        UpLoc,
-        ThreadID,
-        Result.NewTask,
-        IfVal,
-        LBLVal.getPointer(CGF),
-        UBLVal.getPointer(CGF),
-        CGF.EmitLoadOfScalar(StLVal, Loc),
-        llvm::ConstantInt::getSigned(
-            CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
-        llvm::ConstantInt::getSigned(
-            CGF.IntTy, Data.Schedule.getPointer()
-                           ? Data.Schedule.getInt() ? NumTasks : Grainsize
-                           : NoSchedule),
-        Data.Schedule.getPointer()
-            ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
-                                        /*isSigned=*/false)
-            : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
-        Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-                               Result.TaskDupFn, CGF.VoidPtrTy)
-                         : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+  else
     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
                             CGM.getModule(), OMPRTL___kmpc_taskloop),
                         TaskArgs);
-  }
 }
 
 /// Emit reduction operation for each element of array (required for

>From 35062a081629d83435f4b08998b282349aefce1d Mon Sep 17 00:00:00 2001
From: Chandra Ghale <ghale at pe31.hpc.amslabs.hpecorp.net>
Date: Tue, 26 Nov 2024 06:01:42 -0600
Subject: [PATCH 4/5] Fix for review comment

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 30b84656cfad6e..6a5860242035b2 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4691,14 +4691,11 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
                          ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
                                Result.TaskDupFn, CGF.VoidPtrTy)
                          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
-  if (Data.HasModifier)
-    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-                            CGM.getModule(), OMPRTL___kmpc_taskloop_5),
-                        TaskArgs);
-  else
-    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-                            CGM.getModule(), OMPRTL___kmpc_taskloop),
-                        TaskArgs);
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                          CGM.getModule(), Data.HasModifier
+                                               ? OMPRTL___kmpc_taskloop_5
+                                               : OMPRTL___kmpc_taskloop),
+                      TaskArgs);
 }
 
 /// Emit reduction operation for each element of array (required for

>From 6bc0e3f2422ceb8564f6ac25e8589c57f26968df Mon Sep 17 00:00:00 2001
From: Chandra Ghale <ghale at pe31.hpc.amslabs.hpecorp.net>
Date: Wed, 27 Nov 2024 05:04:21 -0600
Subject: [PATCH 5/5] updated with auto gen check lines

---
 .../taskloop_strictmodifier_codegen.cpp       | 355 +++++++-----------
 1 file changed, 132 insertions(+), 223 deletions(-)

diff --git a/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
index d84ff181f66156..107db947d015f3 100644
--- a/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
@@ -1,256 +1,165 @@
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
-// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
-// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 5
+// RUN: %clang_cc1 -fopenmp -O1 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
 
-// CHECK-LABEL: @main
 int main(int argc, char **argv) {
-// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[DEFLOC:@.+]])
-// CHECK: call ptr @__kmpc_omp_task_alloc(ptr [[DEFLOC]], i32 [[GTID]],
-// CHECK: call i32 @__kmpc_omp_task(ptr [[DEFLOC]], i32 [[GTID]],
 #pragma omp task
-  ;
-// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(ptr [[DEFLOC]], i32 [[GTID]])
-// CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
-// CHECK:       [[THEN]]
-// CHECK: call void @__kmpc_taskgroup(ptr [[DEFLOC]], i32 [[GTID]])
-// CHECK: [[TASKV:%.+]] = call ptr @__kmpc_omp_task_alloc(ptr [[DEFLOC]], i32 [[GTID]], i32 33, i64 80, i64 1, ptr [[TASK1:@.+]])
-// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds nuw %{{.+}}, ptr [[TASKV]], i32 0, i32 0
-// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr [[TASK_DATA]], i32 0, i32 5
-// CHECK: store i64 0, ptr [[DOWN]],
-// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 6
-// CHECK: store i64 9, ptr [[UP]],
-// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 7
-// CHECK: store i64 1, ptr [[ST]],
-// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
-// CHECK: call void @__kmpc_taskloop(ptr [[DEFLOC]], i32 [[GTID]], ptr [[TASKV]], i32 1, ptr [[DOWN]], ptr [[UP]], i64 [[ST_VAL]], i32 1, i32 0, i64 0, ptr null)
-// CHECK: call void @__kmpc_end_taskgroup(ptr [[DEFLOC]], i32 [[GTID]])
-// CHECK-NEXT:  call {{.*}}void @__kmpc_end_master(ptr [[DEFLOC]], i32 [[GTID]])
-// CHECK-NEXT:  br label {{%?}}[[EXIT]]
-// CHECK:       [[EXIT]]
 #pragma omp master taskloop priority(argc)
   for (int i = 0; i < 10; ++i)
     ;
-// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(ptr [[DEFLOC]], i32 [[GTID]])
-// CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
-// CHECK:       [[THEN]]
-// CHECK: [[TASKV:%.+]] = call ptr @__kmpc_omp_task_alloc(ptr [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 1, ptr [[TASK2:@.+]])
-// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds nuw %{{.+}}, ptr [[TASKV]], i32 0, i32 0
-// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr [[TASK_DATA]], i32 0, i32 5
-// CHECK: store i64 0, ptr [[DOWN]],
-// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 6
-// CHECK: store i64 9, ptr [[UP]],
-// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 7
-// CHECK: store i64 1, ptr [[ST]],
-// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
-// CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64
-// CHECK: call void @__kmpc_taskloop_5(ptr [[DEFLOC]], i32 [[GTID]], ptr [[TASKV]], i32 1, ptr [[DOWN]], ptr [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i32 1, ptr null)
-// CHECK-NEXT:  call {{.*}}void @__kmpc_end_master(ptr [[DEFLOC]], i32 [[GTID]])
-// CHECK-NEXT:  br label {{%?}}[[EXIT]]
-// CHECK:       [[EXIT]]
 #pragma omp master taskloop nogroup grainsize(strict:argc)
   for (int i = 0; i < 10; ++i)
     ;
-// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(ptr [[DEFLOC]], i32 [[GTID]])
-// CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
-// CHECK:       [[THEN]]
-// CHECK: call void @__kmpc_taskgroup(ptr [[DEFLOC]], i32 [[GTID]])
-// CHECK: [[TASKV:%.+]] = call ptr @__kmpc_omp_task_alloc(ptr [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 16, ptr [[TASK3:@.+]])
-// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds nuw %{{.+}}, ptr [[TASKV]], i32 0, i32 0
-// CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0
-// CHECK: [[IF_INT:%.+]] = sext i1 [[IF]] to i32
-// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr [[TASK_DATA]], i32 0, i32 5
-// CHECK: store i64 0, ptr [[DOWN]],
-// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 6
-// CHECK: store i64 %{{.+}}, ptr [[UP]],
-// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 7
-// CHECK: store i64 1, ptr [[ST]],
-// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
-// CHECK: call void @__kmpc_taskloop_5(ptr [[DEFLOC]], i32 [[GTID]], ptr [[TASKV]], i32 [[IF_INT]], ptr [[DOWN]], ptr [[UP]], i64 [[ST_VAL]], i32 1, i32 2, i64 4, i32 1, ptr null)
-// CHECK: call void @__kmpc_end_taskgroup(ptr [[DEFLOC]], i32 [[GTID]])
-// CHECK-NEXT:  call {{.*}}void @__kmpc_end_master(ptr [[DEFLOC]], i32 [[GTID]])
-// CHECK-NEXT:  br label {{%?}}[[EXIT]]
-// CHECK:       [[EXIT]]
   int i;
 #pragma omp master taskloop if(argc) shared(argc, argv) collapse(2) num_tasks(strict: 4)
   for (i = 0; i < argc; ++i)
   for (int j = argc; j < argv[argc][argc]; ++j)
     ;
-// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(ptr [[DEFLOC]], i32 [[GTID]])
-// CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
-// CHECK:       [[THEN]]
-// CHECK: call void @__kmpc_taskgroup(
-// CHECK: call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %{{.+}}, i32 1, i64 80, i64 1, ptr [[TASK_CANCEL:@.+]])
-// CHECK: call void @__kmpc_taskloop(
-// CHECK: call void @__kmpc_end_taskgroup(
-// CHECK-NEXT:  call {{.*}}void @__kmpc_end_master(ptr [[DEFLOC]], i32 [[GTID]])
-// CHECK-NEXT:  br label {{%?}}[[EXIT]]
-// CHECK:       [[EXIT]]
 #pragma omp master taskloop
   for (int i = 0; i < 10; ++i) {
 #pragma omp cancel taskgroup
 #pragma omp cancellation point taskgroup
   }
 }
-
-// CHECK: define internal noundef i32 [[TASK1]](
-// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr %{{.+}}, i32 0, i32 5
-// CHECK: [[DOWN_VAL:%.+]] = load i64, ptr [[DOWN]],
-// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 6
-// CHECK: [[UP_VAL:%.+]] = load i64, ptr [[UP]],
-// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 7
-// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
-// CHECK: [[LITER:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 8
-// CHECK: [[LITER_VAL:%.+]] = load i32, ptr [[LITER]],
-// CHECK: store i64 [[DOWN_VAL]], ptr [[LB:%[^,]+]],
-// CHECK: store i64 [[UP_VAL]], ptr [[UB:%[^,]+]],
-// CHECK: store i64 [[ST_VAL]], ptr [[ST:%[^,]+]],
-// CHECK: store i32 [[LITER_VAL]], ptr [[LITER:%[^,]+]],
-// CHECK: [[LB_VAL:%.+]] = load i64, ptr [[LB]],
-// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
-// CHECK: store i32 [[LB_I32]], ptr [[CNT:%.+]],
-// CHECK: br label
-// CHECK: [[VAL:%.+]] = load i32, ptr [[CNT]],
-// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
-// CHECK: [[UB_VAL:%.+]] = load i64, ptr [[UB]],
-// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
-// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
-// CHECK: load i32, ptr %
-// CHECK: store i32 %
-// CHECK: load i32, ptr %
-// CHECK: add nsw i32 %{{.+}}, 1
-// CHECK: store i32 %{{.+}}, ptr %
-// CHECK: br label %
-// CHECK: ret i32 0
-
-// CHECK: define internal noundef i32 [[TASK2]](
-// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr %{{.+}}, i32 0, i32 5
-// CHECK: [[DOWN_VAL:%.+]] = load i64, ptr [[DOWN]],
-// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 6
-// CHECK: [[UP_VAL:%.+]] = load i64, ptr [[UP]],
-// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 7
-// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
-// CHECK: [[LITER:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 8
-// CHECK: [[LITER_VAL:%.+]] = load i32, ptr [[LITER]],
-// CHECK: store i64 [[DOWN_VAL]], ptr [[LB:%[^,]+]],
-// CHECK: store i64 [[UP_VAL]], ptr [[UB:%[^,]+]],
-// CHECK: store i64 [[ST_VAL]], ptr [[ST:%[^,]+]],
-// CHECK: store i32 [[LITER_VAL]], ptr [[LITER:%[^,]+]],
-// CHECK: [[LB_VAL:%.+]] = load i64, ptr [[LB]],
-// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
-// CHECK: store i32 [[LB_I32]], ptr [[CNT:%.+]],
-// CHECK: br label
-// CHECK: [[VAL:%.+]] = load i32, ptr [[CNT]],
-// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
-// CHECK: [[UB_VAL:%.+]] = load i64, ptr [[UB]],
-// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
-// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
-// CHECK: load i32, ptr %
-// CHECK: store i32 %
-// CHECK: load i32, ptr %
-// CHECK: add nsw i32 %{{.+}}, 1
-// CHECK: store i32 %{{.+}}, ptr %
-// CHECK: br label %
-// CHECK: ret i32 0
-
-// CHECK: define internal noundef i32 [[TASK3]](
-// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr %{{.+}}, i32 0, i32 5
-// CHECK: [[DOWN_VAL:%.+]] = load i64, ptr [[DOWN]],
-// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 6
-// CHECK: [[UP_VAL:%.+]] = load i64, ptr [[UP]],
-// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 7
-// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
-// CHECK: [[LITER:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 8
-// CHECK: [[LITER_VAL:%.+]] = load i32, ptr [[LITER]],
-// CHECK: store i64 [[DOWN_VAL]], ptr [[LB:%[^,]+]],
-// CHECK: store i64 [[UP_VAL]], ptr [[UB:%[^,]+]],
-// CHECK: store i64 [[ST_VAL]], ptr [[ST:%[^,]+]],
-// CHECK: store i32 [[LITER_VAL]], ptr [[LITER:%[^,]+]],
-// CHECK: [[LB_VAL:%.+]] = load i64, ptr [[LB]],
-// CHECK: store i64 [[LB_VAL]], ptr [[CNT:%.+]],
-// CHECK: br label
-// CHECK: ret i32 0
-
-// CHECK: define internal noundef i32 [[TASK_CANCEL]](
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(ptr @{{.+}}, i32 %{{.+}}, i32 4)
-// CHECK: [[IS_CANCEL:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK: br i1 [[IS_CANCEL]], label %[[EXIT:.+]], label %[[CONTINUE:[^,]+]]
-// CHECK: [[EXIT]]:
-// CHECK: store i32 1, ptr [[CLEANUP_SLOT:%.+]],
-// CHECK: br label %[[DONE:[^,]+]]
-// CHECK: [[CONTINUE]]:
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancellationpoint(ptr @{{.+}}, i32 %{{.+}}, i32 4)
-// CHECK: [[IS_CANCEL:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK: br i1 [[IS_CANCEL]], label %[[EXIT2:.+]], label %[[CONTINUE2:[^,]+]]
-// CHECK: [[EXIT2]]:
-// CHECK: store i32 1, ptr [[CLEANUP_SLOT]],
-// CHECK: br label %[[DONE]]
-// CHECK: store i32 0, ptr [[CLEANUP_SLOT]],
-// CHECK: br label %[[DONE]]
-// CHECK: [[DONE]]:
-// CHECK: ret i32 0
-
-// CHECK-LABEL: @_ZN1SC2Ei
 struct S {
   int a;
   S(int c) {
-// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[DEFLOC:@.+]])
-// CHECK: [[TASKV:%.+]] = call ptr @__kmpc_omp_task_alloc(ptr [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 16, ptr [[TASK4:@.+]])
-// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds nuw %{{.+}}, ptr [[TASKV]], i32 0, i32 0
-// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr [[TASK_DATA]], i32 0, i32 5
-// CHECK: store i64 0, ptr [[DOWN]],
-// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 6
-// CHECK: store i64 %{{.+}}, ptr [[UP]],
-// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 7
-// CHECK: store i64 1, ptr [[ST]],
-// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
-// CHECK: [[NUM_TASKS:%.+]] = zext i32 %{{.+}} to i64
-// CHECK: call void @__kmpc_taskloop_5(ptr [[DEFLOC]], i32 [[GTID]], ptr [[TASKV]], i32 1, ptr [[DOWN]], ptr [[UP]], i64 [[ST_VAL]], i32 1, i32 2, i64 [[NUM_TASKS]], i32 1, ptr null)
 #pragma omp master taskloop shared(c) num_tasks(strict:a)
     for (a = 0; a < c; ++a)
       ;
   }
 } s(1);
 
-// CHECK: define internal noundef i32 [[TASK4]](
-// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr %{{.+}}, i32 0, i32 5
-// CHECK: [[DOWN_VAL:%.+]] = load i64, ptr [[DOWN]],
-// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 6
-// CHECK: [[UP_VAL:%.+]] = load i64, ptr [[UP]],
-// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 7
-// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
-// CHECK: [[LITER:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 8
-// CHECK: [[LITER_VAL:%.+]] = load i32, ptr [[LITER]],
-// CHECK: store i64 [[DOWN_VAL]], ptr [[LB:%[^,]+]],
-// CHECK: store i64 [[UP_VAL]], ptr [[UB:%[^,]+]],
-// CHECK: store i64 [[ST_VAL]], ptr [[ST:%[^,]+]],
-// CHECK: store i32 [[LITER_VAL]], ptr [[LITER:%[^,]+]],
-// CHECK: [[LB_VAL:%.+]] = load i64, ptr [[LB]],
-// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
-// CHECK: store i32 [[LB_I32]], ptr [[CNT:%.+]],
-// CHECK: br label
-// CHECK: [[VAL:%.+]] = load i32, ptr [[CNT]],
-// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
-// CHECK: [[UB_VAL:%.+]] = load i64, ptr [[UB]],
-// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
-// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
-// CHECK: load i32, ptr %
-// CHECK: store i32 %
-// CHECK: load i32, ptr %
-// CHECK: add nsw i32 %{{.+}}, 1
-// CHECK: store i32 %{{.+}}, ptr %
-// CHECK: br label %
-// CHECK: ret i32 0
-
 #endif
+
+// CHECK-LABEL: define noundef i32 @main(
+// CHECK-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[ARGV_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1:[0-9]+]])
+// CHECK-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3:![0-9]+]]
+// CHECK-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr nonnull @.omp_task_entry..2)
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[TMP2]], align 8, !tbaa [[TBAA9:![0-9]+]]
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]])
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call i32 @__kmpc_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP5]], 0
+// CHECK-NEXT:    br i1 [[DOTNOT]], label %[[OMP_IF_END:.*]], label %[[OMP_IF_THEN:.*]]
+// CHECK:       [[OMP_IF_THEN]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 1, ptr nonnull @.omp_task_entry..4)
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 40
+// CHECK-NEXT:    store i64 0, ptr [[TMP8]], align 8, !tbaa [[TBAA13:![0-9]+]]
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 48
+// CHECK-NEXT:    store i64 9, ptr [[TMP9]], align 8, !tbaa [[TBAA13]]
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 56
+// CHECK-NEXT:    store i64 1, ptr [[TMP10]], align 8, !tbaa [[TBAA13]]
+// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 72
+// CHECK-NEXT:    store i64 0, ptr [[TMP11]], align 8
+// CHECK-NEXT:    [[TMP12:%.*]] = zext i32 [[TMP6]] to i64
+// CHECK-NEXT:    tail call void @__kmpc_taskloop_5(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP7]], i32 1, ptr nonnull [[TMP8]], ptr nonnull [[TMP9]], i64 1, i32 1, i32 1, i64 [[TMP12]], i32 1, ptr null) #[[ATTR1:[0-9]+]]
+// CHECK-NEXT:    tail call void @__kmpc_end_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    br label %[[OMP_IF_END]]
+// CHECK:       [[OMP_IF_END]]:
+// CHECK-NEXT:    [[TMP13:%.*]] = tail call i32 @__kmpc_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    [[DOTNOT22:%.*]] = icmp eq i32 [[TMP13]], 0
+// CHECK-NEXT:    br i1 [[DOTNOT22]], label %[[OMP_IF_END17:.*]], label %[[OMP_IF_THEN2:.*]]
+// CHECK:       [[OMP_IF_THEN2]]:
+// CHECK-NEXT:    tail call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP15]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX9]], align 1, !tbaa [[TBAA15:![0-9]+]]
+// CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP17]] to i32
+// CHECK-NEXT:    [[SUB12:%.*]] = sub i32 [[CONV]], [[TMP14]]
+// CHECK-NEXT:    [[CONV15:%.*]] = zext i32 [[SUB12]] to i64
+// CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV15]], [[IDXPROM]]
+// CHECK-NEXT:    [[SUB16:%.*]] = add nsw i64 [[MUL]], -1
+// CHECK-NEXT:    [[TMP18:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 16, ptr nonnull @.omp_task_entry..6)
+// CHECK-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8, !tbaa [[TBAA16:![0-9]+]]
+// CHECK-NEXT:    store ptr [[ARGC_ADDR]], ptr [[TMP19]], align 8, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[AGG_CAPTURED3_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 8
+// CHECK-NEXT:    store ptr [[ARGV_ADDR]], ptr [[AGG_CAPTURED3_SROA_2_0__SROA_IDX]], align 8, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP20]], 0
+// CHECK-NEXT:    [[TMP21:%.*]] = sext i1 [[TOBOOL]] to i32
+// CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 40
+// CHECK-NEXT:    store i64 0, ptr [[TMP22]], align 8, !tbaa [[TBAA13]]
+// CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 48
+// CHECK-NEXT:    store i64 [[SUB16]], ptr [[TMP23]], align 8, !tbaa [[TBAA13]]
+// CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 56
+// CHECK-NEXT:    store i64 1, ptr [[TMP24]], align 8, !tbaa [[TBAA13]]
+// CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 72
+// CHECK-NEXT:    store i64 0, ptr [[TMP25]], align 8
+// CHECK-NEXT:    call void @__kmpc_taskloop_5(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr nonnull [[TMP18]], i32 [[TMP21]], ptr nonnull [[TMP22]], ptr nonnull [[TMP23]], i64 1, i32 1, i32 2, i64 4, i32 1, ptr null) #[[ATTR1]]
+// CHECK-NEXT:    call void @__kmpc_end_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    call void @__kmpc_end_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    br label %[[OMP_IF_END17]]
+// CHECK:       [[OMP_IF_END17]]:
+// CHECK-NEXT:    [[TMP26:%.*]] = call i32 @__kmpc_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    [[DOTNOT23:%.*]] = icmp eq i32 [[TMP26]], 0
+// CHECK-NEXT:    br i1 [[DOTNOT23]], label %[[OMP_IF_END21:.*]], label %[[OMP_IF_THEN18:.*]]
+// CHECK:       [[OMP_IF_THEN18]]:
+// CHECK-NEXT:    call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP27:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 1, ptr nonnull @.omp_task_entry..8)
+// CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 40
+// CHECK-NEXT:    store i64 0, ptr [[TMP28]], align 8, !tbaa [[TBAA13]]
+// CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 48
+// CHECK-NEXT:    store i64 9, ptr [[TMP29]], align 8, !tbaa [[TBAA13]]
+// CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 56
+// CHECK-NEXT:    store i64 1, ptr [[TMP30]], align 8, !tbaa [[TBAA13]]
+// CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 72
+// CHECK-NEXT:    store i64 0, ptr [[TMP31]], align 8
+// CHECK-NEXT:    call void @__kmpc_taskloop(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP27]], i32 1, ptr nonnull [[TMP28]], ptr nonnull [[TMP29]], i64 1, i32 1, i32 0, i64 0, ptr null)
+// CHECK-NEXT:    call void @__kmpc_end_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    call void @__kmpc_end_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    br label %[[OMP_IF_END21]]
+// CHECK:       [[OMP_IF_END21]]:
+// CHECK-NEXT:    ret i32 0
+//
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN1SC2Ei(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[C:%.*]]) unnamed_addr #[[ATTR6:[0-9]+]] align 2 {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]])
+// CHECK-NEXT:    store i32 [[C]], ptr [[C_ADDR]], align 4, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @__kmpc_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0
+// CHECK-NEXT:    br i1 [[DOTNOT]], label %[[OMP_IF_END:.*]], label %[[OMP_IF_THEN:.*]]
+// CHECK:       [[OMP_IF_THEN]]:
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[THIS]], align 4, !tbaa [[TBAA27:![0-9]+]]
+// CHECK-NEXT:    tail call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[C_ADDR]], align 4, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[SUB4:%.*]] = add nsw i32 [[TMP3]], -1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 16, ptr nonnull @.omp_task_entry..10)
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !tbaa [[TBAA16]]
+// CHECK-NEXT:    store ptr [[THIS]], ptr [[TMP5]], align 8, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[AGG_CAPTURED_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 8
+// CHECK-NEXT:    store ptr [[C_ADDR]], ptr [[AGG_CAPTURED_SROA_2_0__SROA_IDX]], align 8, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 40
+// CHECK-NEXT:    store i64 0, ptr [[TMP6]], align 8, !tbaa [[TBAA13]]
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 48
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[SUB4]] to i64
+// CHECK-NEXT:    store i64 [[CONV]], ptr [[TMP7]], align 8, !tbaa [[TBAA13]]
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 56
+// CHECK-NEXT:    store i64 1, ptr [[TMP8]], align 8, !tbaa [[TBAA13]]
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 72
+// CHECK-NEXT:    store i64 0, ptr [[TMP9]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP2]] to i64
+// CHECK-NEXT:    call void @__kmpc_taskloop_5(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr nonnull [[TMP4]], i32 1, ptr nonnull [[TMP6]], ptr nonnull [[TMP7]], i64 1, i32 1, i32 2, i64 [[TMP10]], i32 1, ptr null) #[[ATTR1]]
+// CHECK-NEXT:    call void @__kmpc_end_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    call void @__kmpc_end_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:    br label %[[OMP_IF_END]]
+// CHECK:       [[OMP_IF_END]]:
+// CHECK-NEXT:    ret void
+



More information about the cfe-commits mailing list