[llvm-branch-commits] [mlir] [mlir][OpenMP] Don't allocate task context structure if not needed (PR #174588)
Tom Eccles via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jan 7 04:04:32 PST 2026
https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/174588
>From 0e94907969d784a5783f026474be5e4a144b44b3 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Tue, 6 Jan 2026 13:29:30 +0000
Subject: [PATCH] [mlir][OpenMP] Don't allocate task context structure if not
needed
Don't allocate a task context structure if none of the private variables
needed it. This was already skipped when there were no private variables
at all.
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 11 +++--
.../LLVMIR/openmp-task-no-context-struct.mlir | 48 +++++++++++++++++++
.../openmp-taskloop-no-context-struct.mlir | 46 ++++++------------
3 files changed, 70 insertions(+), 35 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index cd210757261bf..43c503757ddf5 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2278,6 +2278,9 @@ void TaskContextStructManager::generateTaskContextStruct() {
privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
}
+ if (privateVarTypes.empty())
+ return;
+
structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(),
privateVarTypes);
@@ -2315,10 +2318,10 @@ SmallVector<llvm::Value *> TaskContextStructManager::createGEPsToPrivateVars(
}
void TaskContextStructManager::createGEPsToPrivateVars() {
- if (!structPtr) {
+ if (!structPtr)
assert(privateVarTypes.empty());
- return;
- }
+ // Still need to run createGEPsToPrivateVars to populate llvmPrivateVarGEPs
+ // with null values for skipped private decls
llvmPrivateVarGEPs = createGEPsToPrivateVars(structPtr);
}
@@ -2767,7 +2770,7 @@ convertOmpTaskloopOp(Operation &opInst, llvm::IRBuilderBase &builder,
}
llvm::OpenMPIRBuilder::TaskDupCallbackTy taskDupOrNull = nullptr;
- if (!taskStructMgr.getLLVMPrivateVarGEPs().empty())
+ if (taskStructMgr.getStructPtr())
taskDupOrNull = taskDupCB;
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
diff --git a/mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir b/mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir
new file mode 100644
index 0000000000000..32ccac8296696
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir
@@ -0,0 +1,48 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Check that we don't allocate a task context structure when none of the private
+// vars need it.
+
+omp.private {type = private} @_QFtestEp_private_i32 : i32
+llvm.func @_QPtest() {
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 {bindc_name = "p"} : (i64) -> !llvm.ptr
+ omp.task private(@_QFtestEp_private_i32 %1 -> %arg0 : !llvm.ptr) {
+ llvm.call @_QPdo_something(%arg0) {fastmathFlags = #llvm.fastmath<contract>} : (!llvm.ptr) -> ()
+ omp.terminator
+ }
+ llvm.return
+}
+llvm.func @_QPdo_something(!llvm.ptr) attributes {sym_visibility = "private"}
+
+// CHECK-LABEL: define void @_QPtest()
+// CHECK: %[[VAL_0:.*]] = alloca i32, i64 1, align 4
+// CHECK: br label %[[VAL_1:.*]]
+// CHECK: entry: ; preds = %[[VAL_2:.*]]
+// CHECK: br label %[[VAL_3:.*]]
+// CHECK: omp.private.init: ; preds = %[[VAL_1]]
+// CHECK-NOT: @malloc
+// CHECK: br label %[[VAL_4:.*]]
+// CHECK: omp.private.copy: ; preds = %[[VAL_3]]
+// CHECK: br label %[[VAL_5:.*]]
+// CHECK: omp.task.start: ; preds = %[[VAL_4]]
+// CHECK: br label %[[VAL_6:.*]]
+// CHECK: codeRepl: ; preds = %[[VAL_5]]
+// CHECK: %[[VAL_7:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK: %[[VAL_8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[VAL_7]], i32 1, i64 40, i64 0, ptr @_QPtest..omp_par)
+// CHECK: %[[VAL_9:.*]] = call i32 @__kmpc_omp_task(ptr @1, i32 %[[VAL_7]], ptr %[[VAL_8]])
+// CHECK: br label %[[VAL_10:.*]]
+// CHECK: task.exit: ; preds = %[[VAL_6]]
+// CHECK: ret void
+
+// CHECK-LABEL: define internal void @_QPtest..omp_par
+// CHECK: task.alloca:
+// CHECK: %[[VAL_11:.*]] = alloca i32, align 4
+// CHECK: br label %[[VAL_12:.*]]
+// CHECK: task.body: ; preds = %[[VAL_13:.*]]
+// CHECK: br label %[[VAL_14:.*]]
+// CHECK: omp.task.region: ; preds = %[[VAL_12]]
+// CHECK: call void @_QPdo_something(ptr %[[VAL_11]])
+// CHECK: br label %[[VAL_15:.*]]
+// CHECK: omp.region.cont: ; preds = %[[VAL_14]]
+// CHECK-NOT: @free
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
index 43b50e7a3206c..69a2edb2a7c0f 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
@@ -22,7 +22,8 @@ llvm.func @_QPtest() {
llvm.return
}
// CHECK-LABEL: define void @_QPtest() {
-// CHECK: %[[STRUCTARG:.*]] = alloca { i64, i64, i64, ptr }, align 8
+// No task context structure:
+// CHECK: %[[STRUCTARG:.*]] = alloca { i64, i64, i64 }, align 8
// CHECK: %[[VAL_0:.*]] = alloca i32, i64 1, align 4
// CHECK: %[[VAL_1:.*]] = alloca i32, i64 1, align 4
// CHECK: %[[VAL_2:.*]] = alloca i32, i64 1, align 4
@@ -30,31 +31,28 @@ llvm.func @_QPtest() {
// CHECK: entry: ; preds = %[[VAL_4:.*]]
// CHECK: br label %[[VAL_5:.*]]
// CHECK: omp.private.init: ; preds = %[[VAL_3]]
-// CHECK: %[[VAL_6:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({}, ptr null, i32 1) to i64))
// CHECK: br label %[[VAL_7:.*]]
// CHECK: omp.private.copy: ; preds = %[[VAL_5]]
// CHECK: br label %[[VAL_8:.*]]
// CHECK: omp.taskloop.start: ; preds = %[[VAL_7]]
// CHECK: br label %[[VAL_9:.*]]
// CHECK: codeRepl: ; preds = %[[VAL_8]]
-// CHECK: %[[VAL_10:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[STRUCTARG]], i32 0, i32 0
+// CHECK: %[[VAL_10:.*]] = getelementptr { i64, i64, i64 }, ptr %[[STRUCTARG]], i32 0, i32 0
// CHECK: store i64 1, ptr %[[VAL_10]], align 4
-// CHECK: %[[VAL_11:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[STRUCTARG]], i32 0, i32 1
+// CHECK: %[[VAL_11:.*]] = getelementptr { i64, i64, i64 }, ptr %[[STRUCTARG]], i32 0, i32 1
// CHECK: store i64 20, ptr %[[VAL_11]], align 4
-// CHECK: %[[VAL_12:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[STRUCTARG]], i32 0, i32 2
+// CHECK: %[[VAL_12:.*]] = getelementptr { i64, i64, i64 }, ptr %[[STRUCTARG]], i32 0, i32 2
// CHECK: store i64 1, ptr %[[VAL_12]], align 4
-// CHECK: %[[VAL_13:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[STRUCTARG]], i32 0, i32 3
-// CHECK: store ptr %[[VAL_6]], ptr %[[VAL_13]], align 8
// CHECK: %[[VAL_14:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
// CHECK: call void @__kmpc_taskgroup(ptr @1, i32 %[[VAL_14]])
-// CHECK: %[[VAL_15:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[VAL_14]], i32 1, i64 40, i64 32, ptr @_QPtest..omp_par)
+// CHECK: %[[VAL_15:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[VAL_14]], i32 1, i64 40, i64 24, ptr @_QPtest..omp_par)
// CHECK: %[[VAL_16:.*]] = load ptr, ptr %[[VAL_15]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_16]], ptr align 1 %[[STRUCTARG]], i64 32, i1 false)
-// CHECK: %[[VAL_17:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_16]], i32 0, i32 0
-// CHECK: %[[VAL_18:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_16]], i32 0, i32 1
-// CHECK: %[[VAL_19:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_16]], i32 0, i32 2
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_16]], ptr align 1 %[[STRUCTARG]], i64 24, i1 false)
+// CHECK: %[[VAL_17:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_16]], i32 0, i32 0
+// CHECK: %[[VAL_18:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_16]], i32 0, i32 1
+// CHECK: %[[VAL_19:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_16]], i32 0, i32 2
// CHECK: %[[VAL_20:.*]] = load i64, ptr %[[VAL_19]], align 4
-// CHECK: call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_14]], ptr %[[VAL_15]], i32 1, ptr %[[VAL_17]], ptr %[[VAL_18]], i64 %[[VAL_20]], i32 1, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK: call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_14]], ptr %[[VAL_15]], i32 1, ptr %[[VAL_17]], ptr %[[VAL_18]], i64 %[[VAL_20]], i32 1, i32 0, i64 0, ptr null)
// CHECK: call void @__kmpc_end_taskgroup(ptr @1, i32 %[[VAL_14]])
// CHECK: br label %[[VAL_21:.*]]
// CHECK: taskloop.exit: ; preds = %[[VAL_9]]
@@ -63,14 +61,12 @@ llvm.func @_QPtest() {
// CHECK-LABEL: define internal void @_QPtest..omp_par
// CHECK: taskloop.alloca:
// CHECK: %[[VAL_22:.*]] = load ptr, ptr %[[VAL_23:.*]], align 8
-// CHECK: %[[VAL_24:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_22]], i32 0, i32 0
+// CHECK: %[[VAL_24:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_22]], i32 0, i32 0
// CHECK: %[[VAL_25:.*]] = load i64, ptr %[[VAL_24]], align 4
-// CHECK: %[[VAL_26:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_22]], i32 0, i32 1
+// CHECK: %[[VAL_26:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_22]], i32 0, i32 1
// CHECK: %[[VAL_27:.*]] = load i64, ptr %[[VAL_26]], align 4
-// CHECK: %[[VAL_28:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_22]], i32 0, i32 2
+// CHECK: %[[VAL_28:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_22]], i32 0, i32 2
// CHECK: %[[VAL_29:.*]] = load i64, ptr %[[VAL_28]], align 4
-// CHECK: %[[VAL_30:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_22]], i32 0, i32 3
-// CHECK: %[[VAL_31:.*]] = load ptr, ptr %[[VAL_30]], align 8, !align !1
// CHECK: %[[VAL_32:.*]] = alloca i32, align 4
// CHECK: %[[VAL_33:.*]] = alloca i32, align 4
// CHECK: %[[VAL_34:.*]] = alloca i32, align 4
@@ -97,7 +93,6 @@ llvm.func @_QPtest() {
// CHECK: omp_loop.after: ; preds = %[[VAL_51]]
// CHECK: br label %[[VAL_53:.*]]
// CHECK: omp.region.cont: ; preds = %[[VAL_52]]
-// CHECK: tail call void @free(ptr %[[VAL_31]])
// CHECK: br label %[[VAL_54:.*]]
// CHECK: omp_loop.body: ; preds = %[[VAL_48]]
// CHECK: %[[VAL_55:.*]] = mul i32 %[[VAL_46]], 1
@@ -114,15 +109,4 @@ llvm.func @_QPtest() {
// CHECK: taskloop.exit.exitStub: ; preds = %[[VAL_53]]
// CHECK: ret void
-// CHECK-LABEL: define internal void @omp_taskloop_dup(
-// CHECK: entry:
-// CHECK: %[[VAL_59:.*]] = getelementptr { %[[VAL_60:.*]], { i64, i64, i64, ptr } }, ptr %[[VAL_61:.*]], i32 0, i32 1
-// CHECK: %[[VAL_62:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_59]], i32 0, i32 3
-// CHECK: %[[VAL_63:.*]] = getelementptr { %[[VAL_60]], { i64, i64, i64, ptr } }, ptr %[[VAL_64:.*]], i32 0, i32 1
-// CHECK: %[[VAL_65:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_63]], i32 0, i32 3
-// CHECK: %[[VAL_66:.*]] = load ptr, ptr %[[VAL_65]], align 8
-// TODO: don't generate allocation for empty task context struct (for later patch)
-// CHECK: %[[VAL_67:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({}, ptr null, i32 1) to i64))
-// CHECK: store ptr %[[VAL_67]], ptr %[[VAL_62]], align 8
-// CHECK: ret void
-
+// CHECK-NOT: define internal void @omp_taskloop_dup
More information about the llvm-branch-commits
mailing list