[llvm-branch-commits] [mlir] [mlir][OpenMP] Don't allocate task context structure if not needed (PR #174588)

Tom Eccles via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Jan 7 04:04:32 PST 2026


https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/174588

>From 0e94907969d784a5783f026474be5e4a144b44b3 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Tue, 6 Jan 2026 13:29:30 +0000
Subject: [PATCH] [mlir][OpenMP] Don't allocate task context structure if not
 needed

Don't allocate a task context structure if none of the private variables
needed it. This was already skipped when there were no private variables
at all.
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 11 +++--
 .../LLVMIR/openmp-task-no-context-struct.mlir | 48 +++++++++++++++++++
 .../openmp-taskloop-no-context-struct.mlir    | 46 ++++++------------
 3 files changed, 70 insertions(+), 35 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index cd210757261bf..43c503757ddf5 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2278,6 +2278,9 @@ void TaskContextStructManager::generateTaskContextStruct() {
     privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
   }
 
+  if (privateVarTypes.empty())
+    return;
+
   structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(),
                                    privateVarTypes);
 
@@ -2315,10 +2318,10 @@ SmallVector<llvm::Value *> TaskContextStructManager::createGEPsToPrivateVars(
 }
 
 void TaskContextStructManager::createGEPsToPrivateVars() {
-  if (!structPtr) {
+  if (!structPtr)
     assert(privateVarTypes.empty());
-    return;
-  }
+  // Still need to run createGEPsToPrivateVars to populate llvmPrivateVarGEPs
+  // with null values for skipped private decls
 
   llvmPrivateVarGEPs = createGEPsToPrivateVars(structPtr);
 }
@@ -2767,7 +2770,7 @@ convertOmpTaskloopOp(Operation &opInst, llvm::IRBuilderBase &builder,
   }
 
   llvm::OpenMPIRBuilder::TaskDupCallbackTy taskDupOrNull = nullptr;
-  if (!taskStructMgr.getLLVMPrivateVarGEPs().empty())
+  if (taskStructMgr.getStructPtr())
     taskDupOrNull = taskDupCB;
 
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
diff --git a/mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir b/mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir
new file mode 100644
index 0000000000000..32ccac8296696
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir
@@ -0,0 +1,48 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Check that we don't allocate a task context structure when none of the private
+// vars need it.
+
+omp.private {type = private} @_QFtestEp_private_i32 : i32
+llvm.func @_QPtest() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "p"} : (i64) -> !llvm.ptr
+  omp.task private(@_QFtestEp_private_i32 %1 -> %arg0 : !llvm.ptr) {
+    llvm.call @_QPdo_something(%arg0) {fastmathFlags = #llvm.fastmath<contract>} : (!llvm.ptr) -> ()
+    omp.terminator
+  }
+  llvm.return
+}
+llvm.func @_QPdo_something(!llvm.ptr) attributes {sym_visibility = "private"}
+
+// CHECK-LABEL: define void @_QPtest()
+// CHECK:         %[[VAL_0:.*]] = alloca i32, i64 1, align 4
+// CHECK:         br label %[[VAL_1:.*]]
+// CHECK:       entry:                                            ; preds = %[[VAL_2:.*]]
+// CHECK:         br label %[[VAL_3:.*]]
+// CHECK:       omp.private.init:                                 ; preds = %[[VAL_1]]
+// CHECK-NOT:     @malloc
+// CHECK:         br label %[[VAL_4:.*]]
+// CHECK:       omp.private.copy:                                 ; preds = %[[VAL_3]]
+// CHECK:         br label %[[VAL_5:.*]]
+// CHECK:       omp.task.start:                                   ; preds = %[[VAL_4]]
+// CHECK:         br label %[[VAL_6:.*]]
+// CHECK:       codeRepl:                                         ; preds = %[[VAL_5]]
+// CHECK:         %[[VAL_7:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:         %[[VAL_8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[VAL_7]], i32 1, i64 40, i64 0, ptr @_QPtest..omp_par)
+// CHECK:         %[[VAL_9:.*]] = call i32 @__kmpc_omp_task(ptr @1, i32 %[[VAL_7]], ptr %[[VAL_8]])
+// CHECK:         br label %[[VAL_10:.*]]
+// CHECK:       task.exit:                                        ; preds = %[[VAL_6]]
+// CHECK:         ret void
+
+// CHECK-LABEL: define internal void @_QPtest..omp_par
+// CHECK:       task.alloca:
+// CHECK:         %[[VAL_11:.*]] = alloca i32, align 4
+// CHECK:         br label %[[VAL_12:.*]]
+// CHECK:       task.body:                                        ; preds = %[[VAL_13:.*]]
+// CHECK:         br label %[[VAL_14:.*]]
+// CHECK:       omp.task.region:                                  ; preds = %[[VAL_12]]
+// CHECK:         call void @_QPdo_something(ptr %[[VAL_11]])
+// CHECK:         br label %[[VAL_15:.*]]
+// CHECK:       omp.region.cont:                                  ; preds = %[[VAL_14]]
+// CHECK-NOT:     @free
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
index 43b50e7a3206c..69a2edb2a7c0f 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
@@ -22,7 +22,8 @@ llvm.func @_QPtest() {
   llvm.return
 }
 // CHECK-LABEL: define void @_QPtest() {
-// CHECK:         %[[STRUCTARG:.*]] = alloca { i64, i64, i64, ptr }, align 8
+// No task context structure:
+// CHECK:         %[[STRUCTARG:.*]] = alloca { i64, i64, i64 }, align 8
 // CHECK:         %[[VAL_0:.*]] = alloca i32, i64 1, align 4
 // CHECK:         %[[VAL_1:.*]] = alloca i32, i64 1, align 4
 // CHECK:         %[[VAL_2:.*]] = alloca i32, i64 1, align 4
@@ -30,31 +31,28 @@ llvm.func @_QPtest() {
 // CHECK:       entry:                                            ; preds = %[[VAL_4:.*]]
 // CHECK:         br label %[[VAL_5:.*]]
 // CHECK:       omp.private.init:                                 ; preds = %[[VAL_3]]
-// CHECK:         %[[VAL_6:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({}, ptr null, i32 1) to i64))
 // CHECK:         br label %[[VAL_7:.*]]
 // CHECK:       omp.private.copy:                                 ; preds = %[[VAL_5]]
 // CHECK:         br label %[[VAL_8:.*]]
 // CHECK:       omp.taskloop.start:                               ; preds = %[[VAL_7]]
 // CHECK:         br label %[[VAL_9:.*]]
 // CHECK:       codeRepl:                                         ; preds = %[[VAL_8]]
-// CHECK:         %[[VAL_10:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[STRUCTARG]], i32 0, i32 0
+// CHECK:         %[[VAL_10:.*]] = getelementptr { i64, i64, i64 }, ptr %[[STRUCTARG]], i32 0, i32 0
 // CHECK:         store i64 1, ptr %[[VAL_10]], align 4
-// CHECK:         %[[VAL_11:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[STRUCTARG]], i32 0, i32 1
+// CHECK:         %[[VAL_11:.*]] = getelementptr { i64, i64, i64 }, ptr %[[STRUCTARG]], i32 0, i32 1
 // CHECK:         store i64 20, ptr %[[VAL_11]], align 4
-// CHECK:         %[[VAL_12:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[STRUCTARG]], i32 0, i32 2
+// CHECK:         %[[VAL_12:.*]] = getelementptr { i64, i64, i64 }, ptr %[[STRUCTARG]], i32 0, i32 2
 // CHECK:         store i64 1, ptr %[[VAL_12]], align 4
-// CHECK:         %[[VAL_13:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[STRUCTARG]], i32 0, i32 3
-// CHECK:         store ptr %[[VAL_6]], ptr %[[VAL_13]], align 8
 // CHECK:         %[[VAL_14:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
 // CHECK:         call void @__kmpc_taskgroup(ptr @1, i32 %[[VAL_14]])
-// CHECK:         %[[VAL_15:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[VAL_14]], i32 1, i64 40, i64 32, ptr @_QPtest..omp_par)
+// CHECK:         %[[VAL_15:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[VAL_14]], i32 1, i64 40, i64 24, ptr @_QPtest..omp_par)
 // CHECK:         %[[VAL_16:.*]] = load ptr, ptr %[[VAL_15]], align 8
-// CHECK:         call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_16]], ptr align 1 %[[STRUCTARG]], i64 32, i1 false)
-// CHECK:         %[[VAL_17:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_16]], i32 0, i32 0
-// CHECK:         %[[VAL_18:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_16]], i32 0, i32 1
-// CHECK:         %[[VAL_19:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_16]], i32 0, i32 2
+// CHECK:         call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_16]], ptr align 1 %[[STRUCTARG]], i64 24, i1 false)
+// CHECK:         %[[VAL_17:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_16]], i32 0, i32 0
+// CHECK:         %[[VAL_18:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_16]], i32 0, i32 1
+// CHECK:         %[[VAL_19:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_16]], i32 0, i32 2
 // CHECK:         %[[VAL_20:.*]] = load i64, ptr %[[VAL_19]], align 4
-// CHECK:         call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_14]], ptr %[[VAL_15]], i32 1, ptr %[[VAL_17]], ptr %[[VAL_18]], i64 %[[VAL_20]], i32 1, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK:         call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_14]], ptr %[[VAL_15]], i32 1, ptr %[[VAL_17]], ptr %[[VAL_18]], i64 %[[VAL_20]], i32 1, i32 0, i64 0, ptr null)
 // CHECK:         call void @__kmpc_end_taskgroup(ptr @1, i32 %[[VAL_14]])
 // CHECK:         br label %[[VAL_21:.*]]
 // CHECK:       taskloop.exit:                                    ; preds = %[[VAL_9]]
@@ -63,14 +61,12 @@ llvm.func @_QPtest() {
 // CHECK-LABEL: define internal void @_QPtest..omp_par
 // CHECK:       taskloop.alloca:
 // CHECK:         %[[VAL_22:.*]] = load ptr, ptr %[[VAL_23:.*]], align 8
-// CHECK:         %[[VAL_24:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_22]], i32 0, i32 0
+// CHECK:         %[[VAL_24:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_22]], i32 0, i32 0
 // CHECK:         %[[VAL_25:.*]] = load i64, ptr %[[VAL_24]], align 4
-// CHECK:         %[[VAL_26:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_22]], i32 0, i32 1
+// CHECK:         %[[VAL_26:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_22]], i32 0, i32 1
 // CHECK:         %[[VAL_27:.*]] = load i64, ptr %[[VAL_26]], align 4
-// CHECK:         %[[VAL_28:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_22]], i32 0, i32 2
+// CHECK:         %[[VAL_28:.*]] = getelementptr { i64, i64, i64 }, ptr %[[VAL_22]], i32 0, i32 2
 // CHECK:         %[[VAL_29:.*]] = load i64, ptr %[[VAL_28]], align 4
-// CHECK:         %[[VAL_30:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_22]], i32 0, i32 3
-// CHECK:         %[[VAL_31:.*]] = load ptr, ptr %[[VAL_30]], align 8, !align !1
 // CHECK:         %[[VAL_32:.*]] = alloca i32, align 4
 // CHECK:         %[[VAL_33:.*]] = alloca i32, align 4
 // CHECK:         %[[VAL_34:.*]] = alloca i32, align 4
@@ -97,7 +93,6 @@ llvm.func @_QPtest() {
 // CHECK:       omp_loop.after:                                   ; preds = %[[VAL_51]]
 // CHECK:         br label %[[VAL_53:.*]]
 // CHECK:       omp.region.cont:                                  ; preds = %[[VAL_52]]
-// CHECK:         tail call void @free(ptr %[[VAL_31]])
 // CHECK:         br label %[[VAL_54:.*]]
 // CHECK:       omp_loop.body:                                    ; preds = %[[VAL_48]]
 // CHECK:         %[[VAL_55:.*]] = mul i32 %[[VAL_46]], 1
@@ -114,15 +109,4 @@ llvm.func @_QPtest() {
 // CHECK:       taskloop.exit.exitStub:                           ; preds = %[[VAL_53]]
 // CHECK:         ret void
 
-// CHECK-LABEL: define internal void @omp_taskloop_dup(
-// CHECK:       entry:
-// CHECK:         %[[VAL_59:.*]] = getelementptr { %[[VAL_60:.*]], { i64, i64, i64, ptr } }, ptr %[[VAL_61:.*]], i32 0, i32 1
-// CHECK:         %[[VAL_62:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_59]], i32 0, i32 3
-// CHECK:         %[[VAL_63:.*]] = getelementptr { %[[VAL_60]], { i64, i64, i64, ptr } }, ptr %[[VAL_64:.*]], i32 0, i32 1
-// CHECK:         %[[VAL_65:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_63]], i32 0, i32 3
-// CHECK:         %[[VAL_66:.*]] = load ptr, ptr %[[VAL_65]], align 8
-// TODO: don't generate allocation for empty task context struct (for later patch)
-// CHECK:         %[[VAL_67:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({}, ptr null, i32 1) to i64))
-// CHECK:         store ptr %[[VAL_67]], ptr %[[VAL_62]], align 8
-// CHECK:         ret void
-
+// CHECK-NOT: define internal void @omp_taskloop_dup



More information about the llvm-branch-commits mailing list