[clang] [llvm] Codegen changes for strict modifier with grainsize/num_tasks of taskloop construct (PR #117196)
CHANDRA GHALE via cfe-commits
cfe-commits at lists.llvm.org
Tue Nov 26 04:02:20 PST 2024
https://github.com/chandraghale updated https://github.com/llvm/llvm-project/pull/117196
>From d19f41d39237b3d4fd2923f037743ddd495d5c9f Mon Sep 17 00:00:00 2001
From: Chandra Ghale <ghale at pe31.hpc.amslabs.hpecorp.net>
Date: Thu, 21 Nov 2024 11:15:11 -0600
Subject: [PATCH 1/4] Initial Codegen changes for strict modifier with
grainsize/num_tasks of taskloop construct
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 28 ++
clang/lib/CodeGen/CGOpenMPRuntime.h | 1 +
clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 +
.../taskloop_strictmodifier_codegen.cpp | 256 ++++++++++++++++++
.../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 +
5 files changed, 290 insertions(+)
create mode 100644 clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index cc389974e04081..361550d2f102b4 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4666,6 +4666,33 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.getContext().VoidPtrTy);
}
enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
+ if( Data.HasModifier ){
+ llvm::Value *TaskArgs[] = {
+ UpLoc,
+ ThreadID,
+ Result.NewTask,
+ IfVal,
+ LBLVal.getPointer(CGF),
+ UBLVal.getPointer(CGF),
+ CGF.EmitLoadOfScalar(StLVal, Loc),
+ llvm::ConstantInt::getSigned(
+ CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
+ llvm::ConstantInt::getSigned(
+ CGF.IntTy, Data.Schedule.getPointer()
+ ? Data.Schedule.getInt() ? NumTasks : Grainsize
+ : NoSchedule),
+ Data.Schedule.getPointer()
+ ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+ /*isSigned=*/false)
+ : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+ llvm::ConstantInt::get(CGF.Int32Ty, 1), //strict modifier enabled
+ Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Result.TaskDupFn, CGF.VoidPtrTy)
+ : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_taskloop_5),
+ TaskArgs);
+ } else {
llvm::Value *TaskArgs[] = {
UpLoc,
ThreadID,
@@ -4690,6 +4717,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_taskloop),
TaskArgs);
+ }
}
/// Emit reduction operation for each element of array (required for
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 5e7715743afb58..56d502d92806eb 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,6 +122,7 @@ struct OMPTaskDataTy final {
bool IsReductionWithTaskMod = false;
bool IsWorksharingReduction = false;
bool HasNowaitClause = false;
+ bool HasModifier = false;
};
/// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 390516fea38498..88c862d2975174 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7831,10 +7831,12 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
// grainsize clause
Data.Schedule.setInt(/*IntVal=*/false);
Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
+ Data.HasModifier = (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false;
} else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
// num_tasks clause
Data.Schedule.setInt(/*IntVal=*/true);
Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
+ Data.HasModifier = (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false;
}
auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
diff --git a/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
new file mode 100644
index 00000000000000..d84ff181f66156
--- /dev/null
+++ b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
@@ -0,0 +1,256 @@
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-LABEL: @main
+int main(int argc, char **argv) {
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[DEFLOC:@.+]])
+// CHECK: call ptr @__kmpc_omp_task_alloc(ptr [[DEFLOC]], i32 [[GTID]],
+// CHECK: call i32 @__kmpc_omp_task(ptr [[DEFLOC]], i32 [[GTID]],
+#pragma omp task
+ ;
+// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// CHECK: [[THEN]]
+// CHECK: call void @__kmpc_taskgroup(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK: [[TASKV:%.+]] = call ptr @__kmpc_omp_task_alloc(ptr [[DEFLOC]], i32 [[GTID]], i32 33, i64 80, i64 1, ptr [[TASK1:@.+]])
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds nuw %{{.+}}, ptr [[TASKV]], i32 0, i32 0
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 0, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 9, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 7
+// CHECK: store i64 1, ptr [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: call void @__kmpc_taskloop(ptr [[DEFLOC]], i32 [[GTID]], ptr [[TASKV]], i32 1, ptr [[DOWN]], ptr [[UP]], i64 [[ST_VAL]], i32 1, i32 0, i64 0, ptr null)
+// CHECK: call void @__kmpc_end_taskgroup(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT: call {{.*}}void @__kmpc_end_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT: br label {{%?}}[[EXIT]]
+// CHECK: [[EXIT]]
+#pragma omp master taskloop priority(argc)
+ for (int i = 0; i < 10; ++i)
+ ;
+// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// CHECK: [[THEN]]
+// CHECK: [[TASKV:%.+]] = call ptr @__kmpc_omp_task_alloc(ptr [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 1, ptr [[TASK2:@.+]])
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds nuw %{{.+}}, ptr [[TASKV]], i32 0, i32 0
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 0, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 9, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 7
+// CHECK: store i64 1, ptr [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64
+// CHECK: call void @__kmpc_taskloop_5(ptr [[DEFLOC]], i32 [[GTID]], ptr [[TASKV]], i32 1, ptr [[DOWN]], ptr [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i32 1, ptr null)
+// CHECK-NEXT: call {{.*}}void @__kmpc_end_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT: br label {{%?}}[[EXIT]]
+// CHECK: [[EXIT]]
+#pragma omp master taskloop nogroup grainsize(strict:argc)
+ for (int i = 0; i < 10; ++i)
+ ;
+// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// CHECK: [[THEN]]
+// CHECK: call void @__kmpc_taskgroup(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK: [[TASKV:%.+]] = call ptr @__kmpc_omp_task_alloc(ptr [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 16, ptr [[TASK3:@.+]])
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds nuw %{{.+}}, ptr [[TASKV]], i32 0, i32 0
+// CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0
+// CHECK: [[IF_INT:%.+]] = sext i1 [[IF]] to i32
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 0, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 %{{.+}}, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 7
+// CHECK: store i64 1, ptr [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: call void @__kmpc_taskloop_5(ptr [[DEFLOC]], i32 [[GTID]], ptr [[TASKV]], i32 [[IF_INT]], ptr [[DOWN]], ptr [[UP]], i64 [[ST_VAL]], i32 1, i32 2, i64 4, i32 1, ptr null)
+// CHECK: call void @__kmpc_end_taskgroup(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT: call {{.*}}void @__kmpc_end_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT: br label {{%?}}[[EXIT]]
+// CHECK: [[EXIT]]
+ int i;
+#pragma omp master taskloop if(argc) shared(argc, argv) collapse(2) num_tasks(strict: 4)
+ for (i = 0; i < argc; ++i)
+ for (int j = argc; j < argv[argc][argc]; ++j)
+ ;
+// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// CHECK: [[THEN]]
+// CHECK: call void @__kmpc_taskgroup(
+// CHECK: call ptr @__kmpc_omp_task_alloc(ptr @{{.+}}, i32 %{{.+}}, i32 1, i64 80, i64 1, ptr [[TASK_CANCEL:@.+]])
+// CHECK: call void @__kmpc_taskloop(
+// CHECK: call void @__kmpc_end_taskgroup(
+// CHECK-NEXT: call {{.*}}void @__kmpc_end_master(ptr [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT: br label {{%?}}[[EXIT]]
+// CHECK: [[EXIT]]
+#pragma omp master taskloop
+ for (int i = 0; i < 10; ++i) {
+#pragma omp cancel taskgroup
+#pragma omp cancellation point taskgroup
+ }
+}
+
+// CHECK: define internal noundef i32 [[TASK1]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr %{{.+}}, i32 0, i32 5
+// CHECK: [[DOWN_VAL:%.+]] = load i64, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 6
+// CHECK: [[UP_VAL:%.+]] = load i64, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 7
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 8
+// CHECK: [[LITER_VAL:%.+]] = load i32, ptr [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], ptr [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], ptr [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], ptr [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], ptr [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, ptr [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], ptr [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, ptr [[CNT]],
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, ptr [[UB]],
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, ptr %
+// CHECK: store i32 %
+// CHECK: load i32, ptr %
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, ptr %
+// CHECK: br label %
+// CHECK: ret i32 0
+
+// CHECK: define internal noundef i32 [[TASK2]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr %{{.+}}, i32 0, i32 5
+// CHECK: [[DOWN_VAL:%.+]] = load i64, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 6
+// CHECK: [[UP_VAL:%.+]] = load i64, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 7
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 8
+// CHECK: [[LITER_VAL:%.+]] = load i32, ptr [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], ptr [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], ptr [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], ptr [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], ptr [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, ptr [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], ptr [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, ptr [[CNT]],
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, ptr [[UB]],
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, ptr %
+// CHECK: store i32 %
+// CHECK: load i32, ptr %
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, ptr %
+// CHECK: br label %
+// CHECK: ret i32 0
+
+// CHECK: define internal noundef i32 [[TASK3]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr %{{.+}}, i32 0, i32 5
+// CHECK: [[DOWN_VAL:%.+]] = load i64, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 6
+// CHECK: [[UP_VAL:%.+]] = load i64, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 7
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 8
+// CHECK: [[LITER_VAL:%.+]] = load i32, ptr [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], ptr [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], ptr [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], ptr [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], ptr [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, ptr [[LB]],
+// CHECK: store i64 [[LB_VAL]], ptr [[CNT:%.+]],
+// CHECK: br label
+// CHECK: ret i32 0
+
+// CHECK: define internal noundef i32 [[TASK_CANCEL]](
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(ptr @{{.+}}, i32 %{{.+}}, i32 4)
+// CHECK: [[IS_CANCEL:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[IS_CANCEL]], label %[[EXIT:.+]], label %[[CONTINUE:[^,]+]]
+// CHECK: [[EXIT]]:
+// CHECK: store i32 1, ptr [[CLEANUP_SLOT:%.+]],
+// CHECK: br label %[[DONE:[^,]+]]
+// CHECK: [[CONTINUE]]:
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancellationpoint(ptr @{{.+}}, i32 %{{.+}}, i32 4)
+// CHECK: [[IS_CANCEL:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[IS_CANCEL]], label %[[EXIT2:.+]], label %[[CONTINUE2:[^,]+]]
+// CHECK: [[EXIT2]]:
+// CHECK: store i32 1, ptr [[CLEANUP_SLOT]],
+// CHECK: br label %[[DONE]]
+// CHECK: store i32 0, ptr [[CLEANUP_SLOT]],
+// CHECK: br label %[[DONE]]
+// CHECK: [[DONE]]:
+// CHECK: ret i32 0
+
+// CHECK-LABEL: @_ZN1SC2Ei
+struct S {
+ int a;
+ S(int c) {
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[DEFLOC:@.+]])
+// CHECK: [[TASKV:%.+]] = call ptr @__kmpc_omp_task_alloc(ptr [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 16, ptr [[TASK4:@.+]])
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds nuw %{{.+}}, ptr [[TASKV]], i32 0, i32 0
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 0, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 %{{.+}}, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr [[TASK_DATA]], i32 0, i32 7
+// CHECK: store i64 1, ptr [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: [[NUM_TASKS:%.+]] = zext i32 %{{.+}} to i64
+// CHECK: call void @__kmpc_taskloop_5(ptr [[DEFLOC]], i32 [[GTID]], ptr [[TASKV]], i32 1, ptr [[DOWN]], ptr [[UP]], i64 [[ST_VAL]], i32 1, i32 2, i64 [[NUM_TASKS]], i32 1, ptr null)
+#pragma omp master taskloop shared(c) num_tasks(strict:a)
+ for (a = 0; a < c; ++a)
+ ;
+ }
+} s(1);
+
+// CHECK: define internal noundef i32 [[TASK4]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds nuw [[TD_TY:%.+]], ptr %{{.+}}, i32 0, i32 5
+// CHECK: [[DOWN_VAL:%.+]] = load i64, ptr [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 6
+// CHECK: [[UP_VAL:%.+]] = load i64, ptr [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 7
+// CHECK: [[ST_VAL:%.+]] = load i64, ptr [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds nuw [[TD_TY]], ptr %{{.+}}, i32 0, i32 8
+// CHECK: [[LITER_VAL:%.+]] = load i32, ptr [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], ptr [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], ptr [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], ptr [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], ptr [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, ptr [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], ptr [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, ptr [[CNT]],
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, ptr [[UB]],
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, ptr %
+// CHECK: store i32 %
+// CHECK: load i32, ptr %
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, ptr %
+// CHECK: br label %
+// CHECK: ret i32 0
+
+#endif
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 6f26f853eca032..928a03148c4165 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -365,6 +365,9 @@ __OMP_RTL(__kmpc_omp_task_with_deps, false, Int32, IdentPtr, Int32,
__OMP_RTL(__kmpc_taskloop, false, Void, IdentPtr, /* Int */ Int32, VoidPtr,
/* Int */ Int32, Int64Ptr, Int64Ptr, Int64, /* Int */ Int32,
/* Int */ Int32, Int64, VoidPtr)
+__OMP_RTL(__kmpc_taskloop_5, false, Void, IdentPtr, /* Int */ Int32, VoidPtr,
+ /* Int */ Int32, Int64Ptr, Int64Ptr, Int64, /* Int */ Int32,
+ /* Int */ Int32, Int64, Int32, VoidPtr)
__OMP_RTL(__kmpc_omp_target_task_alloc, false, /* kmp_task_t */ VoidPtr,
IdentPtr, Int32, Int32, SizeTy, SizeTy, TaskRoutineEntryPtr, Int64)
__OMP_RTL(__kmpc_taskred_modifier_init, false, /* kmp_taskgroup */ VoidPtr,
>From 01db551ef4ec1ef6f87e14838404c4b8f6838b11 Mon Sep 17 00:00:00 2001
From: Chandra Ghale <ghale at pe31.hpc.amslabs.hpecorp.net>
Date: Thu, 21 Nov 2024 12:41:12 -0600
Subject: [PATCH 2/4] clang-format changes
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 100 +++++++++++++-------------
clang/lib/CodeGen/CGStmtOpenMP.cpp | 6 +-
2 files changed, 54 insertions(+), 52 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 361550d2f102b4..50080ff9fa6b14 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4666,57 +4666,57 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.getContext().VoidPtrTy);
}
enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
- if( Data.HasModifier ){
+ if (Data.HasModifier) {
llvm::Value *TaskArgs[] = {
- UpLoc,
- ThreadID,
- Result.NewTask,
- IfVal,
- LBLVal.getPointer(CGF),
- UBLVal.getPointer(CGF),
- CGF.EmitLoadOfScalar(StLVal, Loc),
- llvm::ConstantInt::getSigned(
- CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
- llvm::ConstantInt::getSigned(
- CGF.IntTy, Data.Schedule.getPointer()
- ? Data.Schedule.getInt() ? NumTasks : Grainsize
- : NoSchedule),
- Data.Schedule.getPointer()
- ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
- /*isSigned=*/false)
- : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
- llvm::ConstantInt::get(CGF.Int32Ty, 1), //strict modifier enabled
- Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- Result.TaskDupFn, CGF.VoidPtrTy)
- : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
- CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_taskloop_5),
- TaskArgs);
- } else {
- llvm::Value *TaskArgs[] = {
- UpLoc,
- ThreadID,
- Result.NewTask,
- IfVal,
- LBLVal.getPointer(CGF),
- UBLVal.getPointer(CGF),
- CGF.EmitLoadOfScalar(StLVal, Loc),
- llvm::ConstantInt::getSigned(
- CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
- llvm::ConstantInt::getSigned(
- CGF.IntTy, Data.Schedule.getPointer()
- ? Data.Schedule.getInt() ? NumTasks : Grainsize
- : NoSchedule),
- Data.Schedule.getPointer()
- ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
- /*isSigned=*/false)
- : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
- Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- Result.TaskDupFn, CGF.VoidPtrTy)
- : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
- CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_taskloop),
- TaskArgs);
+ UpLoc,
+ ThreadID,
+ Result.NewTask,
+ IfVal,
+ LBLVal.getPointer(CGF),
+ UBLVal.getPointer(CGF),
+ CGF.EmitLoadOfScalar(StLVal, Loc),
+ llvm::ConstantInt::getSigned(
+ CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
+ llvm::ConstantInt::getSigned(
+ CGF.IntTy, Data.Schedule.getPointer()
+ ? Data.Schedule.getInt() ? NumTasks : Grainsize
+ : NoSchedule),
+ Data.Schedule.getPointer()
+ ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+ /*isSigned=*/false)
+ : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+ llvm::ConstantInt::get(CGF.Int32Ty, 1), // strict modifier enabled
+ Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Result.TaskDupFn, CGF.VoidPtrTy)
+ : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_taskloop_5),
+ TaskArgs);
+ } else {
+ llvm::Value *TaskArgs[] = {
+ UpLoc,
+ ThreadID,
+ Result.NewTask,
+ IfVal,
+ LBLVal.getPointer(CGF),
+ UBLVal.getPointer(CGF),
+ CGF.EmitLoadOfScalar(StLVal, Loc),
+ llvm::ConstantInt::getSigned(
+ CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
+ llvm::ConstantInt::getSigned(
+ CGF.IntTy, Data.Schedule.getPointer()
+ ? Data.Schedule.getInt() ? NumTasks : Grainsize
+ : NoSchedule),
+ Data.Schedule.getPointer()
+ ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+ /*isSigned=*/false)
+ : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+ Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Result.TaskDupFn, CGF.VoidPtrTy)
+ : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_taskloop),
+ TaskArgs);
}
}
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 88c862d2975174..6cb37b20b7aeee 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7831,12 +7831,14 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
// grainsize clause
Data.Schedule.setInt(/*IntVal=*/false);
Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
- Data.HasModifier = (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false;
+ Data.HasModifier =
+ (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false;
} else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
// num_tasks clause
Data.Schedule.setInt(/*IntVal=*/true);
Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
- Data.HasModifier = (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false;
+ Data.HasModifier =
+ (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false;
}
auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
>From 315287d8c984650e61fe99c3dd797919dcfa9d33 Mon Sep 17 00:00:00 2001
From: Chandra Ghale <ghale at pe31.hpc.amslabs.hpecorp.net>
Date: Fri, 22 Nov 2024 06:38:34 -0600
Subject: [PATCH 3/4] using smallvector to reduce redundant code
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 71 ++++++++++-----------------
1 file changed, 26 insertions(+), 45 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 50080ff9fa6b14..30b84656cfad6e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4666,58 +4666,39 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.getContext().VoidPtrTy);
}
enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
- if (Data.HasModifier) {
- llvm::Value *TaskArgs[] = {
- UpLoc,
- ThreadID,
- Result.NewTask,
- IfVal,
- LBLVal.getPointer(CGF),
- UBLVal.getPointer(CGF),
- CGF.EmitLoadOfScalar(StLVal, Loc),
- llvm::ConstantInt::getSigned(
- CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
- llvm::ConstantInt::getSigned(
- CGF.IntTy, Data.Schedule.getPointer()
- ? Data.Schedule.getInt() ? NumTasks : Grainsize
- : NoSchedule),
- Data.Schedule.getPointer()
- ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
- /*isSigned=*/false)
- : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
- llvm::ConstantInt::get(CGF.Int32Ty, 1), // strict modifier enabled
- Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ llvm::SmallVector<llvm::Value *, 12> TaskArgs{
+ UpLoc,
+ ThreadID,
+ Result.NewTask,
+ IfVal,
+ LBLVal.getPointer(CGF),
+ UBLVal.getPointer(CGF),
+ CGF.EmitLoadOfScalar(StLVal, Loc),
+ llvm::ConstantInt::getSigned(
+ CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
+ llvm::ConstantInt::getSigned(
+ CGF.IntTy, Data.Schedule.getPointer()
+ ? Data.Schedule.getInt() ? NumTasks : Grainsize
+ : NoSchedule),
+ Data.Schedule.getPointer()
+ ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+ /*isSigned=*/false)
+ : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
+ if (Data.HasModifier)
+ TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
+
+ TaskArgs.push_back(Result.TaskDupFn
+ ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Result.TaskDupFn, CGF.VoidPtrTy)
- : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+ : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
+ if (Data.HasModifier)
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_taskloop_5),
TaskArgs);
- } else {
- llvm::Value *TaskArgs[] = {
- UpLoc,
- ThreadID,
- Result.NewTask,
- IfVal,
- LBLVal.getPointer(CGF),
- UBLVal.getPointer(CGF),
- CGF.EmitLoadOfScalar(StLVal, Loc),
- llvm::ConstantInt::getSigned(
- CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
- llvm::ConstantInt::getSigned(
- CGF.IntTy, Data.Schedule.getPointer()
- ? Data.Schedule.getInt() ? NumTasks : Grainsize
- : NoSchedule),
- Data.Schedule.getPointer()
- ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
- /*isSigned=*/false)
- : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
- Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- Result.TaskDupFn, CGF.VoidPtrTy)
- : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+ else
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_taskloop),
TaskArgs);
- }
}
/// Emit reduction operation for each element of array (required for
>From 35062a081629d83435f4b08998b282349aefce1d Mon Sep 17 00:00:00 2001
From: Chandra Ghale <ghale at pe31.hpc.amslabs.hpecorp.net>
Date: Tue, 26 Nov 2024 06:01:42 -0600
Subject: [PATCH 4/4] Fix for review comment
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 30b84656cfad6e..6a5860242035b2 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4691,14 +4691,11 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Result.TaskDupFn, CGF.VoidPtrTy)
: llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
- if (Data.HasModifier)
- CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_taskloop_5),
- TaskArgs);
- else
- CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_taskloop),
- TaskArgs);
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), Data.HasModifier
+ ? OMPRTL___kmpc_taskloop_5
+ : OMPRTL___kmpc_taskloop),
+ TaskArgs);
}
/// Emit reduction operation for each element of array (required for
More information about the cfe-commits
mailing list