[Mlir-commits] [mlir] [mlir] Translating task_reduction clause for pass-by-value vars to LLVMIR (PR #125218)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Fri Jan 2 05:15:21 PST 2026


https://github.com/NimishMishra updated https://github.com/llvm/llvm-project/pull/125218

>From 55ecb7f882cd3deb7e58132280a79b5b023fae42 Mon Sep 17 00:00:00 2001
From: NimishMishra <neelam.nimish at gmail.com>
Date: Thu, 1 Jan 2026 23:24:28 +0530
Subject: [PATCH 1/7] Rebase and Address review comments

---
 .../mlir/Dialect/OpenMP/OpenMPClauses.td      |   8 +
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 247 +++++++++++++++++-
 2 files changed, 245 insertions(+), 10 deletions(-)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
index 05e2ee4e5632b..a1f987553e050 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
@@ -1388,6 +1388,14 @@ class OpenMP_TaskReductionClauseSkip<
     OptionalAttr<SymbolRefArrayAttr>:$task_reduction_syms
   );
 
+  let extraClassDeclaration = [{
+        /// Returns the number of reduction variables.
+	unsigned getNumReductionVars() { return getTaskReductionVars().size(); }
+
+        /// Returns the reduction symbols
+	auto getReductionSyms() { return getTaskReductionSyms(); }
+  }];
+
   let description = [{
     The `task_reduction` clause specifies a reduction among tasks. For each list
     item, the number of copies is unspecified. Any copies associated with the
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 03d67a52853f6..2db77c628876f 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -361,11 +361,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         op.getReductionMod().value() != omp::ReductionModifier::defaultmod)
       result = todo("reduction with modifier");
   };
-  auto checkTaskReduction = [&todo](auto op, LogicalResult &result) {
-    if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() ||
-        op.getTaskReductionSyms())
-      result = todo("task_reduction");
-  };
   auto checkUntied = [&todo](auto op, LogicalResult &result) {
     if (op.getUntied())
       result = todo("untied");
@@ -399,10 +394,7 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         checkAllocate(op, result);
         checkInReduction(op, result);
       })
-      .Case([&](omp::TaskgroupOp op) {
-        checkAllocate(op, result);
-        checkTaskReduction(op, result);
-      })
+      .Case([&](omp::TaskgroupOp op) { checkAllocate(op, result); })
       .Case([&](omp::TaskwaitOp op) {
         checkDepend(op, result);
         checkNowait(op, result);
@@ -2469,6 +2461,220 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   return success();
 }
 
+template <typename OP>
+llvm::Value *createTaskReductionFunction(
+    llvm::IRBuilderBase &builder, const std::string &name, llvm::Type *redTy,
+    LLVM::ModuleTranslation &moduleTranslation,
+    SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls, Region &region,
+    OP &op, unsigned Cnt,
+    SmallVectorImpl<llvm::Value *> &privateReductionVariables,
+    DenseMap<Value, llvm::Value *> &reductionVariableMap) {
+
+  llvm::LLVMContext &Context = builder.getContext();
+  // TODO: by-ref reduction variables are yet to be handled.
+  llvm::Type *OpaquePtrTy = llvm::PointerType::get(Context, 0);
+  if (region.empty() && name == "red_fini")
+    // Finalization is optional for reductions.
+    return llvm::Constant::getNullValue(OpaquePtrTy);
+  llvm::FunctionType *funcType =
+      llvm::FunctionType::get(OpaquePtrTy, {OpaquePtrTy, OpaquePtrTy}, false);
+  llvm::Function *function =
+      llvm::Function::Create(funcType, llvm::Function::ExternalLinkage, name,
+                             builder.GetInsertBlock()->getModule());
+  function->setDoesNotRecurse();
+  llvm::BasicBlock *entry =
+      llvm::BasicBlock::Create(Context, "entry", function);
+  llvm::IRBuilder<> bbBuilder(entry);
+
+  llvm::Value *arg0 = function->getArg(0);
+  llvm::Value *arg1 = function->getArg(1);
+
+  if (name == "red_init") {
+    function->addParamAttr(0, llvm::Attribute::NoAlias);
+    function->addParamAttr(1, llvm::Attribute::NoAlias);
+    mlir::omp::DeclareReductionOp &reduction = reductionDecls[Cnt];
+    Region &initializerRegion = reduction.getInitializerRegion();
+    Block &entry = initializerRegion.front();
+
+    mlir::Value mlirSource = op.getTaskReductionVars()[Cnt];
+    llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
+    llvm::Value *origVal = llvmSource;
+
+    moduleTranslation.mapValue(reduction.getInitializerMoldArg(), origVal);
+
+    if (entry.getNumArguments() > 1) {
+      llvm::Value *allocation =
+          reductionVariableMap.lookup(op.getReductionVars()[Cnt]);
+      moduleTranslation.mapValue(reduction.getInitializerAllocArg(),
+                                 allocation);
+    }
+
+  } else if (name == "red_comb") {
+    llvm::Value *arg0L = bbBuilder.CreateLoad(redTy, arg0);
+    llvm::Value *arg1L = bbBuilder.CreateLoad(redTy, arg1);
+    moduleTranslation.mapValue(region.front().getArgument(0), arg0L);
+    moduleTranslation.mapValue(region.front().getArgument(1), arg1L);
+  }
+  if (region.empty()) {
+    // Emit an empty function body in case of empty region
+    bbBuilder.CreateRet(arg0); // Return from the function
+    return function;
+  }
+
+  SmallVector<llvm::Value *, 1> phis;
+  if (failed(inlineConvertOmpRegions(region, "", bbBuilder, moduleTranslation,
+                                     &phis)))
+    return nullptr;
+  assert(
+      phis.size() == 1 &&
+      "expected one value to be yielded from the reduction declaration region");
+  bbBuilder.CreateStore(phis[0], arg0);
+  bbBuilder.CreateRet(arg0); // Return from the function
+  return function;
+}
+
+void emitTaskRedInitCall(
+    llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
+    const llvm::OpenMPIRBuilder::LocationDescription &ompLoc, int arraySize,
+    llvm::Value *ArrayAlloca) {
+
+  llvm::LLVMContext &Context = builder.getContext();
+  uint32_t SrcLocStrSize;
+  llvm::Constant *SrcLocStr =
+      moduleTranslation.getOpenMPBuilder()->getOrCreateSrcLocStr(ompLoc,
+                                                                 SrcLocStrSize);
+  llvm::Value *Ident = moduleTranslation.getOpenMPBuilder()->getOrCreateIdent(
+      SrcLocStr, SrcLocStrSize);
+  llvm::Value *ThreadID =
+      moduleTranslation.getOpenMPBuilder()->getOrCreateThreadID(Ident);
+  llvm::Constant *ConstInt =
+      llvm::ConstantInt::get(llvm::Type::getInt32Ty(Context), arraySize);
+
+  llvm::Function *TaskRedInitFn =
+      moduleTranslation.getOpenMPBuilder()->getOrCreateRuntimeFunctionPtr(
+          llvm::omp::OMPRTL___kmpc_taskred_init);
+  builder.CreateCall(TaskRedInitFn, {ThreadID, ConstInt, ArrayAlloca});
+}
+
+template <typename OP>
+static LogicalResult allocAndInitializeTaskReductionVars(
+    OP op, ArrayRef<BlockArgument> reductionArgs, llvm::IRBuilderBase &builder,
+    LLVM::ModuleTranslation &moduleTranslation,
+    llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
+    SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
+    SmallVectorImpl<llvm::Value *> &privateReductionVariables,
+    DenseMap<Value, llvm::Value *> &reductionVariableMap) {
+
+  if (op.getNumReductionVars() == 0)
+    return success();
+
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+  llvm::LLVMContext &Context = builder.getContext();
+  SmallVector<DeferredStore> deferredStores;
+
+  // Save the current insertion point
+  auto oldIP = builder.saveIP();
+
+  // Set insertion point after the allocations
+  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
+
+  // Define the kmp_taskred_input_t structure
+  llvm::StructType *kmp_taskred_input_t =
+      llvm::StructType::create(Context, "kmp_taskred_input_t");
+  llvm::Type *OpaquePtrTy = llvm::PointerType::get(Context, 0); // void*
+  llvm::Type *SizeTy = builder.getInt64Ty(); // size_t (assumed to be i64)
+  llvm::Type *FlagsTy = llvm::Type::getInt32Ty(Context); // flags (i32)
+
+  // Structure members
+  std::vector<llvm::Type *> structMembers = {
+      OpaquePtrTy, // reduce_shar (void*)
+      OpaquePtrTy, // reduce_orig (void*)
+      SizeTy,      // reduce_size (size_t)
+      OpaquePtrTy, // reduce_init (void*)
+      OpaquePtrTy, // reduce_fini (void*)
+      OpaquePtrTy, // reduce_comb (void*)
+      FlagsTy      // flags (i32)
+  };
+
+  kmp_taskred_input_t->setBody(structMembers);
+  int arraySize = op.getTaskReductionVars().size();
+  llvm::ArrayType *ArrayTy =
+      llvm::ArrayType::get(kmp_taskred_input_t, arraySize);
+
+  // Allocate the array for kmp_taskred_input_t
+  llvm::AllocaInst *ArrayAlloca =
+      builder.CreateAlloca(ArrayTy, nullptr, "kmp_taskred_array");
+
+  // Restore the insertion point
+  builder.restoreIP(oldIP);
+  llvm::DataLayout DL = builder.GetInsertBlock()->getModule()->getDataLayout();
+
+  for (int Cnt = 0; Cnt < arraySize; ++Cnt) {
+    llvm::Value *shared =
+        moduleTranslation.lookupValue(op.getTaskReductionVars()[Cnt]);
+
+    // Create a GEP to access the reduction element
+    llvm::Value *StructPtr = builder.CreateGEP(
+        ArrayTy, ArrayAlloca, {builder.getInt32(0), builder.getInt32(Cnt)},
+        "red_element");
+
+    llvm::Value *FieldPtrReduceShar = builder.CreateStructGEP(
+        kmp_taskred_input_t, StructPtr, 0, "reduce_shar");
+    builder.CreateStore(shared, FieldPtrReduceShar);
+
+    llvm::Value *FieldPtrReduceOrig = builder.CreateStructGEP(
+        kmp_taskred_input_t, StructPtr, 1, "reduce_orig");
+    builder.CreateStore(shared, FieldPtrReduceOrig);
+
+    // Store size of the reduction variable
+    llvm::Value *FieldPtrReduceSize = builder.CreateStructGEP(
+        kmp_taskred_input_t, StructPtr, 2, "reduce_size");
+    llvm::Type *redTy =
+        moduleTranslation.convertType(reductionDecls[Cnt].getType());
+    uint64_t sizeInBytes = DL.getTypeAllocSize(redTy);
+    llvm::ConstantInt *sizeConst =
+        llvm::ConstantInt::get(llvm::Type::getInt64Ty(Context), sizeInBytes);
+    builder.CreateStore(sizeConst, FieldPtrReduceSize);
+
+    // Initialize reduction variable
+    llvm::Value *FieldPtrReduceInit = builder.CreateStructGEP(
+        kmp_taskred_input_t, StructPtr, 3, "reduce_init");
+    llvm::Value *initFunction = createTaskReductionFunction(
+        builder, "red_init", redTy, moduleTranslation, reductionDecls,
+        reductionDecls[Cnt].getInitializerRegion(), op, Cnt,
+        privateReductionVariables, reductionVariableMap);
+    builder.CreateStore(initFunction, FieldPtrReduceInit);
+
+    // Create finish and combine functions
+    llvm::Value *FieldPtrReduceFini = builder.CreateStructGEP(
+        kmp_taskred_input_t, StructPtr, 4, "reduce_fini");
+    llvm::Value *finiFunction = createTaskReductionFunction(
+        builder, "red_fini", redTy, moduleTranslation, reductionDecls,
+        reductionDecls[Cnt].getCleanupRegion(), op, Cnt,
+        privateReductionVariables, reductionVariableMap);
+    builder.CreateStore(finiFunction, FieldPtrReduceFini);
+
+    llvm::Value *FieldPtrReduceComb = builder.CreateStructGEP(
+        kmp_taskred_input_t, StructPtr, 5, "reduce_comb");
+    llvm::Value *combFunction = createTaskReductionFunction(
+        builder, "red_comb", redTy, moduleTranslation, reductionDecls,
+        reductionDecls[Cnt].getReductionRegion(), op, Cnt,
+        privateReductionVariables, reductionVariableMap);
+    builder.CreateStore(combFunction, FieldPtrReduceComb);
+
+    llvm::Value *FieldPtrFlags =
+        builder.CreateStructGEP(kmp_taskred_input_t, StructPtr, 6, "flags");
+    llvm::ConstantInt *flagVal =
+        llvm::ConstantInt::get(llvm::Type::getInt64Ty(Context), 0);
+    builder.CreateStore(flagVal, FieldPtrFlags);
+  }
+
+  // Emit the runtime call
+  emitTaskRedInitCall(builder, moduleTranslation, ompLoc, arraySize,
+                      ArrayAlloca);
+  return success();
+}
+
 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
@@ -2477,8 +2683,29 @@ convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
   if (failed(checkImplementationStatus(*tgOp)))
     return failure();
 
+  SmallVector<omp::DeclareReductionOp> reductionDecls;
+  SmallVector<llvm::Value *> privateReductionVariables(
+      tgOp.getNumReductionVars());
+  DenseMap<Value, llvm::Value *> reductionVariableMap;
+  MutableArrayRef<BlockArgument> reductionArgs =
+      tgOp.getRegion().getArguments();
+  LogicalResult bodyGenStatus = success();
+
+  std::optional<ArrayAttr> attr = tgOp.getTaskReductionSyms();
+  assert(attr && "Missing task reduction symbols");
+  reductionDecls.reserve(reductionDecls.size() + tgOp.getNumReductionVars());
+  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
+    reductionDecls.push_back(
+        SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
+            tgOp, symbolRef));
+  }
   auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
     builder.restoreIP(codegenIP);
+
+    if (failed(allocAndInitializeTaskReductionVars(
+            tgOp, reductionArgs, builder, moduleTranslation, allocaIP,
+            reductionDecls, privateReductionVariables, reductionVariableMap)))
+      bodyGenStatus = failure();
     return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region",
                                builder, moduleTranslation)
         .takeError();
@@ -2494,7 +2721,7 @@ convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
     return failure();
 
   builder.restoreIP(*afterIP);
-  return success();
+  return bodyGenStatus;
 }
 
 static LogicalResult

>From 3e1e9f50b76d2b8a09bd486dc01699a5d3120586 Mon Sep 17 00:00:00 2001
From: NimishMishra <neelam.nimish at gmail.com>
Date: Fri, 2 Jan 2026 12:36:02 +0530
Subject: [PATCH 2/7] Add tests and address review comments

---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  67 +++++--
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      | 189 ++++++++++++++++++
 mlir/test/Target/LLVMIR/openmp-todo.mlir      |  52 ++---
 3 files changed, 262 insertions(+), 46 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 2db77c628876f..565c9e9601de3 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -361,6 +361,15 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         op.getReductionMod().value() != omp::ReductionModifier::defaultmod)
       result = todo("reduction with modifier");
   };
+  auto checkTaskReduction = [&todo](auto op, LogicalResult &result) {
+    if (op.getTaskReductionByref()) {
+      llvm::ArrayRef<bool> ByrefAttrs = op.getTaskReductionByref().value();
+      for (bool ByrefAttr : ByrefAttrs) {
+        if (ByrefAttr)
+          result = todo("task_reduction with pass by reference argument");
+      }
+    }
+  };
   auto checkUntied = [&todo](auto op, LogicalResult &result) {
     if (op.getUntied())
       result = todo("untied");
@@ -394,7 +403,10 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         checkAllocate(op, result);
         checkInReduction(op, result);
       })
-      .Case([&](omp::TaskgroupOp op) { checkAllocate(op, result); })
+      .Case([&](omp::TaskgroupOp op) {
+        checkAllocate(op, result);
+        checkTaskReduction(op, result);
+      })
       .Case([&](omp::TaskwaitOp op) {
         checkDepend(op, result);
         checkNowait(op, result);
@@ -2462,11 +2474,11 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
 }
 
 template <typename OP>
-llvm::Value *createTaskReductionFunction(
+static llvm::Value *createTaskReductionFunction(
     llvm::IRBuilderBase &builder, const std::string &name, llvm::Type *redTy,
     LLVM::ModuleTranslation &moduleTranslation,
     SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls, Region &region,
-    OP &op, unsigned Cnt,
+    OP &op, unsigned cnt,
     SmallVectorImpl<llvm::Value *> &privateReductionVariables,
     DenseMap<Value, llvm::Value *> &reductionVariableMap) {
 
@@ -2476,6 +2488,8 @@ llvm::Value *createTaskReductionFunction(
   if (region.empty() && name == "red_fini")
     // Finalization is optional for reductions.
     return llvm::Constant::getNullValue(OpaquePtrTy);
+
+  // Prepare a general structure of the function to be emitted
   llvm::FunctionType *funcType =
       llvm::FunctionType::get(OpaquePtrTy, {OpaquePtrTy, OpaquePtrTy}, false);
   llvm::Function *function =
@@ -2486,17 +2500,20 @@ llvm::Value *createTaskReductionFunction(
       llvm::BasicBlock::Create(Context, "entry", function);
   llvm::IRBuilder<> bbBuilder(entry);
 
+  // Prepare the function arguments
   llvm::Value *arg0 = function->getArg(0);
   llvm::Value *arg1 = function->getArg(1);
 
   if (name == "red_init") {
+    // For the initialization, map the reduction variables
+    // to the arguments of the function
     function->addParamAttr(0, llvm::Attribute::NoAlias);
     function->addParamAttr(1, llvm::Attribute::NoAlias);
-    mlir::omp::DeclareReductionOp &reduction = reductionDecls[Cnt];
+    mlir::omp::DeclareReductionOp &reduction = reductionDecls[cnt];
     Region &initializerRegion = reduction.getInitializerRegion();
     Block &entry = initializerRegion.front();
 
-    mlir::Value mlirSource = op.getTaskReductionVars()[Cnt];
+    mlir::Value mlirSource = op.getTaskReductionVars()[cnt];
     llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
     llvm::Value *origVal = llvmSource;
 
@@ -2504,22 +2521,25 @@ llvm::Value *createTaskReductionFunction(
 
     if (entry.getNumArguments() > 1) {
       llvm::Value *allocation =
-          reductionVariableMap.lookup(op.getReductionVars()[Cnt]);
+          reductionVariableMap.lookup(op.getReductionVars()[cnt]);
       moduleTranslation.mapValue(reduction.getInitializerAllocArg(),
                                  allocation);
     }
 
   } else if (name == "red_comb") {
+    // For the combiner, perform a load for each argument
+    // and map it to the combiner region.
     llvm::Value *arg0L = bbBuilder.CreateLoad(redTy, arg0);
     llvm::Value *arg1L = bbBuilder.CreateLoad(redTy, arg1);
     moduleTranslation.mapValue(region.front().getArgument(0), arg0L);
     moduleTranslation.mapValue(region.front().getArgument(1), arg1L);
   }
-  if (region.empty()) {
-    // Emit an empty function body in case of empty region
+
+  // Emit an empty function body in case of empty region
+  if (region.empty())
     bbBuilder.CreateRet(arg0); // Return from the function
     return function;
-  }
+}
 
   SmallVector<llvm::Value *, 1> phis;
   if (failed(inlineConvertOmpRegions(region, "", bbBuilder, moduleTranslation,
@@ -2609,13 +2629,13 @@ static LogicalResult allocAndInitializeTaskReductionVars(
   builder.restoreIP(oldIP);
   llvm::DataLayout DL = builder.GetInsertBlock()->getModule()->getDataLayout();
 
-  for (int Cnt = 0; Cnt < arraySize; ++Cnt) {
+  for (int cnt = 0; cnt < arraySize; ++cnt) {
     llvm::Value *shared =
-        moduleTranslation.lookupValue(op.getTaskReductionVars()[Cnt]);
+        moduleTranslation.lookupValue(op.getTaskReductionVars()[cnt]);
 
     // Create a GEP to access the reduction element
     llvm::Value *StructPtr = builder.CreateGEP(
-        ArrayTy, ArrayAlloca, {builder.getInt32(0), builder.getInt32(Cnt)},
+        ArrayTy, ArrayAlloca, {builder.getInt32(0), builder.getInt32(cnt)},
         "red_element");
 
     llvm::Value *FieldPtrReduceShar = builder.CreateStructGEP(
@@ -2630,7 +2650,7 @@ static LogicalResult allocAndInitializeTaskReductionVars(
     llvm::Value *FieldPtrReduceSize = builder.CreateStructGEP(
         kmp_taskred_input_t, StructPtr, 2, "reduce_size");
     llvm::Type *redTy =
-        moduleTranslation.convertType(reductionDecls[Cnt].getType());
+        moduleTranslation.convertType(reductionDecls[cnt].getType());
     uint64_t sizeInBytes = DL.getTypeAllocSize(redTy);
     llvm::ConstantInt *sizeConst =
         llvm::ConstantInt::get(llvm::Type::getInt64Ty(Context), sizeInBytes);
@@ -2641,7 +2661,7 @@ static LogicalResult allocAndInitializeTaskReductionVars(
         kmp_taskred_input_t, StructPtr, 3, "reduce_init");
     llvm::Value *initFunction = createTaskReductionFunction(
         builder, "red_init", redTy, moduleTranslation, reductionDecls,
-        reductionDecls[Cnt].getInitializerRegion(), op, Cnt,
+        reductionDecls[cnt].getInitializerRegion(), op, cnt,
         privateReductionVariables, reductionVariableMap);
     builder.CreateStore(initFunction, FieldPtrReduceInit);
 
@@ -2650,7 +2670,7 @@ static LogicalResult allocAndInitializeTaskReductionVars(
         kmp_taskred_input_t, StructPtr, 4, "reduce_fini");
     llvm::Value *finiFunction = createTaskReductionFunction(
         builder, "red_fini", redTy, moduleTranslation, reductionDecls,
-        reductionDecls[Cnt].getCleanupRegion(), op, Cnt,
+        reductionDecls[cnt].getCleanupRegion(), op, cnt,
         privateReductionVariables, reductionVariableMap);
     builder.CreateStore(finiFunction, FieldPtrReduceFini);
 
@@ -2658,7 +2678,7 @@ static LogicalResult allocAndInitializeTaskReductionVars(
         kmp_taskred_input_t, StructPtr, 5, "reduce_comb");
     llvm::Value *combFunction = createTaskReductionFunction(
         builder, "red_comb", redTy, moduleTranslation, reductionDecls,
-        reductionDecls[Cnt].getReductionRegion(), op, Cnt,
+        reductionDecls[cnt].getReductionRegion(), op, cnt,
         privateReductionVariables, reductionVariableMap);
     builder.CreateStore(combFunction, FieldPtrReduceComb);
 
@@ -2692,12 +2712,15 @@ convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
   LogicalResult bodyGenStatus = success();
 
   std::optional<ArrayAttr> attr = tgOp.getTaskReductionSyms();
-  assert(attr && "Missing task reduction symbols");
-  reductionDecls.reserve(reductionDecls.size() + tgOp.getNumReductionVars());
-  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
-    reductionDecls.push_back(
-        SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
-            tgOp, symbolRef));
+  if (attr) {
+    reductionDecls.reserve(reductionDecls.size() + tgOp.getNumReductionVars());
+    for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
+      reductionDecls.push_back(
+          SymbolTable::lookupNearestSymbolFrom<omp::DeclareReductionOp>(
+              tgOp, symbolRef));
+    }
+    assert(reductionDecls.size() == tgOp.getNumReductionVars() &&
+           "Missing reduction declaration");
   }
   auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
     builder.restoreIP(codegenIP);
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 1eb501ca02703..a4165837d9df7 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3074,6 +3074,195 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) {
 
 // -----
 
+// CHECK-LABEL: define void @_QPtaskred_integer_arg() {
+// CHECK: %[[VAR:.*]] = alloca i32, i64 1, align 4
+// CHECK: %kmp_taskred_array = alloca [1 x %kmp_taskred_input_t], align 8
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK: call void @__kmpc_taskgroup(ptr @1, i32 %[[TID]])
+// CHECK: %[[RED_ELEMENT:.*]] = getelementptr [1 x %kmp_taskred_input_t], ptr %kmp_taskred_array, i32 0, i32 0
+// CHECK: %[[REDUCE_SHAR:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 0
+// CHECK: store ptr %[[VAR]], ptr %[[REDUCE_SHAR]], align 8
+// CHECK: %[[REDUCE_ORIG:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 1
+// CHECK: store ptr %[[VAR]], ptr %[[REDUCE_ORIG]], align 8
+// CHECK: %[[REDUCE_SIZE:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 2
+// CHECK: store i64 4, ptr %[[REDUCE_SIZE]], align 4
+// CHECK: %[[REDUCE_INIT:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 3
+// CHECK: store ptr @red_init, ptr %[[REDUCE_INIT]], align 8
+// CHECK: %[[REDUCE_FINI:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 4
+// CHECK: store ptr null, ptr %reduce_fini, align 8
+// CHECK: %[[REDUCE_COMB:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 5
+// CHECK: store ptr @red_comb, ptr %[[REDUCE_COMB]], align 8
+// CHECK: %[[FLAGS:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 6
+// CHECK: store i64 0, ptr %[[FLAGS]], align 4
+// CHECK: %omp_global_thread_num1 = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK: %[[INIT:.*]] = call ptr @__kmpc_taskred_init(i32 %omp_global_thread_num1, i32 1, ptr %kmp_taskred_array)
+
+// CHECK: define ptr @red_init(ptr noalias %0, ptr noalias %1) #2 {
+// CHECK: entry:
+// CHECK: store i32 0, ptr %0, align 4
+// CHECK: ret ptr %0
+// CHECK: }
+ 
+// CHECK: ; Function Attrs: norecurse
+// CHECK: define ptr @red_comb(ptr %0, ptr %1) #2 {
+// CHECK: entry:
+// CHECK: %2 = load i32, ptr %0, align 4
+// CHECK: %3 = load i32, ptr %1, align 4
+// CHECK: %4 = add i32 %2, %3
+// CHECK: store i32 %4, ptr %0, align 4
+// CHECK: ret ptr %0
+// CHECK: }
+
+omp.declare_reduction @add_reduction_i32 : i32 init {
+^bb0(%arg0: i32):
+  %0 = llvm.mlir.constant(0 : i32) : i32
+  omp.yield(%0 : i32)
+} combiner {
+^bb0(%arg0: i32, %arg1: i32):
+  %0 = llvm.add %arg0, %arg1 : i32
+  omp.yield(%0 : i32)
+}
+llvm.func @_QPtaskred_integer_arg() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "s"} : (i64) -> !llvm.ptr
+  omp.taskgroup task_reduction(@add_reduction_i32 %1 -> %arg0 : !llvm.ptr) {
+  omp.terminator
+  }
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: define void @_QPfloat_arg() {
+// CHECK: %[[VAR:.*]] = alloca float, i64 1, align 4
+// CHECK: %kmp_taskred_array = alloca [1 x %kmp_taskred_input_t], align 8
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK: call void @__kmpc_taskgroup(ptr @1, i32 %[[TID]])
+// CHECK: %[[RED_ELEMENT:.*]] = getelementptr [1 x %kmp_taskred_input_t], ptr %kmp_taskred_array, i32 0, i32 0
+// CHECK: %[[REDUCE_SHAR:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 0
+// CHECK: store ptr %[[VAR]], ptr %[[REDUCE_SHAR]], align 8
+// CHECK: %[[REDUCE_ORIG:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 1
+// CHECK: store ptr %[[VAR]], ptr %[[REDUCE_ORIG]], align 8
+// CHECK: %[[REDUCE_SIZE:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 2
+// CHECK: store i64 4, ptr %[[REDUCE_SIZE]], align 4
+// CHECK: %[[REDUCE_INIT:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 3
+// CHECK: store ptr @red_init, ptr %[[REDUCE_INIT]], align 8
+// CHECK: %[[REDUCE_FINI:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 4
+// CHECK: store ptr null, ptr %reduce_fini, align 8
+// CHECK: %[[REDUCE_COMB:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 5
+// CHECK: store ptr @red_comb, ptr %[[REDUCE_COMB]], align 8
+// CHECK: %[[FLAGS:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 6
+// CHECK: store i64 0, ptr %[[FLAGS]], align 4
+// CHECK: %omp_global_thread_num1 = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK: %[[INIT:.*]] = call ptr @__kmpc_taskred_init(i32 %omp_global_thread_num1, i32 1, ptr %kmp_taskred_array)
+
+
+// CHECK: ; Function Attrs: norecurse
+// CHECK: define ptr @red_init(ptr noalias %0, ptr noalias %1) #2 {
+// CHECK: entry:
+// CHECK: store float 1.000000e+00, ptr %0, align 4
+// CHECK: ret ptr %0
+// CHECK: }
+
+// CHECK: ; Function Attrs: norecurse
+// CHECK: define ptr @red_comb(ptr %0, ptr %1) #2 {
+// CHECK: entry:
+// CHECK: %2 = load float, ptr %0, align 4
+// CHECK: %3 = load float, ptr %1, align 4
+// CHECK: %4 = fmul contract float %2, %3
+// CHECK: store float %4, ptr %0, align 4
+// CHECK: ret ptr %0
+// CHECK: }
+
+
+omp.declare_reduction @multiply_reduction_f32 : f32 init {
+^bb0(%arg0: f32):
+  %0 = llvm.mlir.constant(1.000000e+00 : f32) : f32
+  omp.yield(%0 : f32)
+} combiner {
+^bb0(%arg0: f32, %arg1: f32):
+  %0 = llvm.fmul %arg0, %arg1 {fastmathFlags = #llvm.fastmath<contract>} : f32
+  omp.yield(%0 : f32)
+}
+llvm.func @_QPfloat_arg() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x f32 {bindc_name = "s"} : (i64) -> !llvm.ptr
+  omp.taskgroup task_reduction(@multiply_reduction_f32 %1 -> %arg0 : !llvm.ptr) {
+    omp.terminator
+  }
+  llvm.return
+}
+
+// -----
+
+// CHECK: define void @_QPlogical_reduction() {
+// CHECK: %[[VAR:.*]] = alloca i32, i64 1, align 4
+// CHECK: %kmp_taskred_array = alloca [1 x %kmp_taskred_input_t], align 8
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK: call void @__kmpc_taskgroup(ptr @1, i32 %[[TID]])
+// CHECK: %[[RED_ELEMENT:.*]] = getelementptr [1 x %kmp_taskred_input_t], ptr %kmp_taskred_array, i32 0, i32 0
+// CHECK: %[[REDUCE_SHAR:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 0
+// CHECK: store ptr %[[VAR]], ptr %[[REDUCE_SHAR]], align 8
+// CHECK: %[[REDUCE_ORIG:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 1
+// CHECK: store ptr %[[VAR]], ptr %[[REDUCE_ORIG]], align 8
+// CHECK: %[[REDUCE_SIZE:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 2
+// CHECK: store i64 4, ptr %[[REDUCE_SIZE]], align 4
+// CHECK: %[[REDUCE_INIT:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 3
+// CHECK: store ptr @red_init, ptr %[[REDUCE_INIT]], align 8
+// CHECK: %[[REDUCE_FINI:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 4
+// CHECK: store ptr null, ptr %reduce_fini, align 8
+// CHECK: %[[REDUCE_COMB:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 5
+// CHECK: store ptr @red_comb, ptr %[[REDUCE_COMB]], align 8
+// CHECK: %[[FLAGS:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 6
+// CHECK: store i64 0, ptr %[[FLAGS]], align 4
+// CHECK: %omp_global_thread_num1 = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK: %[[INIT:.*]] = call ptr @__kmpc_taskred_init(i32 %omp_global_thread_num1, i32 1, ptr %kmp_taskred_array)
+
+
+// CHECK: define ptr @red_init(ptr noalias %0, ptr noalias %1) #2 {
+// CHECK: entry:
+// CHECK: store i32 1, ptr %0, align 4
+// CHECK: ret ptr %0
+// CHECK: }
+
+// CHECK: ; Function Attrs: norecurse
+// CHECK: define ptr @red_comb(ptr %0, ptr %1) #2 {
+// CHECK: entry:
+// CHECK: %2 = load i32, ptr %0, align 4
+// CHECK: %3 = load i32, ptr %1, align 4
+// CHECK: %4 = icmp ne i32 %2, 0
+// CHECK: %5 = icmp ne i32 %3, 0
+// CHECK: %6 = and i1 %4, %5
+// CHECK: %7 = zext i1 %6 to i32
+// CHECK: store i32 %7, ptr %0, align 4
+// CHECK: ret ptr %0
+// CHECK: }
+
+
+omp.declare_reduction @and_reduction_l32 : i32 init {
+^bb0(%arg0: i32):
+  %0 = llvm.mlir.constant(1 : i64) : i32
+  omp.yield(%0 : i32)
+} combiner {
+^bb0(%arg0: i32, %arg1: i32):
+  %0 = llvm.mlir.constant(0 : i64) : i32
+  %1 = llvm.icmp "ne" %arg0, %0 : i32
+  %2 = llvm.icmp "ne" %arg1, %0 : i32
+  %3 = llvm.and %1, %2 : i1
+  %4 = llvm.zext %3 : i1 to i32
+  omp.yield(%4 : i32)
+}
+llvm.func @_QPlogical_reduction() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "s"} : (i64) -> !llvm.ptr
+  omp.taskgroup task_reduction(@and_reduction_l32 %1 -> %arg0 : !llvm.ptr) {
+    omp.terminator
+  }
+  llvm.return
+}
+
+// -----
+
 llvm.func @test_01() attributes {sym_visibility = "private"}
 llvm.func @test_02() attributes {sym_visibility = "private"}
 // CHECK-LABEL: define void @_QPomp_task_priority() {
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index 396c57af81c44..f240ab6d39227 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -295,31 +295,35 @@ llvm.func @taskgroup_allocate(%x : !llvm.ptr) {
 
 // -----
 
-omp.declare_reduction @add_f32 : f32
-init {
-^bb0(%arg: f32):
-  %0 = llvm.mlir.constant(0.0 : f32) : f32
-  omp.yield (%0 : f32)
-}
-combiner {
-^bb1(%arg0: f32, %arg1: f32):
-  %1 = llvm.fadd %arg0, %arg1 : f32
-  omp.yield (%1 : f32)
-}
-atomic {
-^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
-  %2 = llvm.load %arg3 : !llvm.ptr -> f32
-  llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
-  omp.yield
-}
-llvm.func @taskgroup_task_reduction(%x : !llvm.ptr) {
-  // expected-error at below {{not yet implemented: Unhandled clause task_reduction in omp.taskgroup operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.taskgroup}}
-  omp.taskgroup task_reduction(@add_f32 %x -> %prv : !llvm.ptr) {
-    omp.terminator
-  }
+omp.declare_reduction @add_reduction_byref_i32 : !llvm.ptr alloc {
+   %0 = llvm.mlir.constant(1 : i64) : i64
+   %1 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr
+   %2 = llvm.mlir.constant(1 : i64) : i64
+   omp.yield(%1 : !llvm.ptr)
+} init {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+   %0 = llvm.mlir.constant(0 : i32) : i32
+   llvm.store %0, %arg1 : i32, !llvm.ptr
+   omp.yield(%arg1 : !llvm.ptr)
+} combiner {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+   %0 = llvm.load %arg0 : !llvm.ptr -> i32
+   %1 = llvm.load %arg1 : !llvm.ptr -> i32
+   %2 = llvm.add %0, %1 : i32
+   llvm.store %2, %arg0 : i32, !llvm.ptr
+   omp.yield(%arg0 : !llvm.ptr)
+}
+llvm.func @_QPtask_reduction_byref() {
+   %0 = llvm.mlir.constant(1 : i64) : i64
+   %1 = llvm.alloca %0 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr
+   %2 = llvm.mlir.constant(1 : i64) : i64
+   // expected-error at below {{not yet implemented: Unhandled clause task_reduction with pass by reference argument in omp.taskgroup operation}}
+   // expected-error at below {{LLVM Translation failed for operation: omp.taskgroup}}
+   omp.taskgroup task_reduction(byref @add_reduction_byref_i32 %1 -> %arg0 : !llvm.ptr) {
+     omp.terminator
+   }
   llvm.return
-}
+} 
 
 // -----
 

>From c17469d2067a683a6f76591ebe9e0f92100980f9 Mon Sep 17 00:00:00 2001
From: NimishMishra <neelam.nimish at gmail.com>
Date: Fri, 2 Jan 2026 12:42:07 +0530
Subject: [PATCH 3/7] Format

---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 565c9e9601de3..c33d5bf3fa083 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2538,19 +2538,19 @@ static llvm::Value *createTaskReductionFunction(
   // Emit an empty function body in case of empty region
   if (region.empty())
     bbBuilder.CreateRet(arg0); // Return from the function
-    return function;
+  return function;
 }
 
-  SmallVector<llvm::Value *, 1> phis;
-  if (failed(inlineConvertOmpRegions(region, "", bbBuilder, moduleTranslation,
-                                     &phis)))
-    return nullptr;
-  assert(
-      phis.size() == 1 &&
-      "expected one value to be yielded from the reduction declaration region");
-  bbBuilder.CreateStore(phis[0], arg0);
-  bbBuilder.CreateRet(arg0); // Return from the function
-  return function;
+SmallVector<llvm::Value *, 1> phis;
+if (failed(inlineConvertOmpRegions(region, "", bbBuilder, moduleTranslation,
+                                   &phis)))
+  return nullptr;
+assert(
+    phis.size() == 1 &&
+    "expected one value to be yielded from the reduction declaration region");
+bbBuilder.CreateStore(phis[0], arg0);
+bbBuilder.CreateRet(arg0); // Return from the function
+return function;
 }
 
 void emitTaskRedInitCall(

>From 743d4cd6d0b303052f5e4f85f120d86b593163da Mon Sep 17 00:00:00 2001
From: NimishMishra <neelam.nimish at gmail.com>
Date: Fri, 2 Jan 2026 12:50:23 +0530
Subject: [PATCH 4/7] Format

---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index c33d5bf3fa083..5cc8093bc806e 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2539,18 +2539,17 @@ static llvm::Value *createTaskReductionFunction(
   if (region.empty())
     bbBuilder.CreateRet(arg0); // Return from the function
   return function;
-}
 
-SmallVector<llvm::Value *, 1> phis;
-if (failed(inlineConvertOmpRegions(region, "", bbBuilder, moduleTranslation,
-                                   &phis)))
-  return nullptr;
-assert(
-    phis.size() == 1 &&
-    "expected one value to be yielded from the reduction declaration region");
-bbBuilder.CreateStore(phis[0], arg0);
-bbBuilder.CreateRet(arg0); // Return from the function
-return function;
+  SmallVector<llvm::Value *, 1> phis;
+  if (failed(inlineConvertOmpRegions(region, "", bbBuilder, moduleTranslation,
+                                     &phis)))
+    return nullptr;
+  assert(
+      phis.size() == 1 &&
+      "expected one value to be yielded from the reduction declaration region");
+  bbBuilder.CreateStore(phis[0], arg0);
+  bbBuilder.CreateRet(arg0); // Return from the function
+  return function;
 }
 
 void emitTaskRedInitCall(

>From dd24d8d809b7b13b6c2b274aec8b4ab3d1cffcb0 Mon Sep 17 00:00:00 2001
From: NimishMishra <neelam.nimish at gmail.com>
Date: Fri, 2 Jan 2026 13:04:30 +0530
Subject: [PATCH 5/7] Fix test

---
 .../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp      | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 5cc8093bc806e..c7bbfeda85439 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2536,9 +2536,10 @@ static llvm::Value *createTaskReductionFunction(
   }
 
   // Emit an empty function body in case of empty region
-  if (region.empty())
+  if (region.empty()) {
     bbBuilder.CreateRet(arg0); // Return from the function
-  return function;
+    return function;
+  }
 
   SmallVector<llvm::Value *, 1> phis;
   if (failed(inlineConvertOmpRegions(region, "", bbBuilder, moduleTranslation,

>From 31d9c847bd088522860d6d8901e8396f51227a59 Mon Sep 17 00:00:00 2001
From: NimishMishra <neelam.nimish at gmail.com>
Date: Fri, 2 Jan 2026 18:33:50 +0530
Subject: [PATCH 6/7] Address review comments

---
 .../mlir/Dialect/OpenMP/OpenMPClauses.td      |  8 +-
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 78 +++++++++++--------
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      |  6 +-
 3 files changed, 52 insertions(+), 40 deletions(-)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
index a1f987553e050..95e1f607d7d51 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
@@ -1389,11 +1389,11 @@ class OpenMP_TaskReductionClauseSkip<
   );
 
   let extraClassDeclaration = [{
-        /// Returns the number of reduction variables.
-	unsigned getNumReductionVars() { return getTaskReductionVars().size(); }
+    /// Returns the number of reduction variables.
+    unsigned getNumReductionVars() { return getTaskReductionVars().size(); }
 
-        /// Returns the reduction symbols
-	auto getReductionSyms() { return getTaskReductionSyms(); }
+    /// Returns the reduction symbols
+    auto getReductionSyms() { return getTaskReductionSyms(); }
   }];
 
   let description = [{
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index c7bbfeda85439..f76cb433bf20c 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2473,18 +2473,29 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   return success();
 }
 
-template <typename OP>
+/*
+ * Utility function for translating `red_init`, `red_comb`, and `red_fini` to
+ * LLVMIR. The ulitity first (commonly) generates a skeleton for any of the
+ * three functions, and then generates the function body based on the
+ * specific operations involved in `red_init` (codegen related to initialization
+ * of task reduction variables) and `red_comb` (codegen related to combination).
+ * Currently, codegen for `red_fini` is skipped since finalization is optional
+ * for `task_reduction` clause, but this ulitity has the capability of defining
+ * finalization if needed. Finally, the returned `llvm::Function` is used to
+ * populate the relevant entries in the task reduction specific data structure.
+ */
 static llvm::Value *createTaskReductionFunction(
-    llvm::IRBuilderBase &builder, const std::string &name, llvm::Type *redTy,
-    LLVM::ModuleTranslation &moduleTranslation,
-    SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls, Region &region,
-    OP &op, unsigned cnt,
+    omp::TaskgroupOp &op, llvm::IRBuilderBase &builder, const std::string &name,
+    llvm::Type *redTy, LLVM::ModuleTranslation &moduleTranslation,
+    omp::DeclareReductionOp &reductionDecl, Region &region, unsigned cnt,
     SmallVectorImpl<llvm::Value *> &privateReductionVariables,
     DenseMap<Value, llvm::Value *> &reductionVariableMap) {
 
   llvm::LLVMContext &Context = builder.getContext();
   // TODO: by-ref reduction variables are yet to be handled.
-  llvm::Type *OpaquePtrTy = llvm::PointerType::get(Context, 0);
+  llvm::DataLayout DL = builder.GetInsertBlock()->getModule()->getDataLayout();
+  llvm::Type *OpaquePtrTy =
+      llvm::PointerType::get(Context, DL.getProgramAddressSpace());
   if (region.empty() && name == "red_fini")
     // Finalization is optional for reductions.
     return llvm::Constant::getNullValue(OpaquePtrTy);
@@ -2509,20 +2520,19 @@ static llvm::Value *createTaskReductionFunction(
     // to the arguments of the function
     function->addParamAttr(0, llvm::Attribute::NoAlias);
     function->addParamAttr(1, llvm::Attribute::NoAlias);
-    mlir::omp::DeclareReductionOp &reduction = reductionDecls[cnt];
-    Region &initializerRegion = reduction.getInitializerRegion();
+    Region &initializerRegion = reductionDecl.getInitializerRegion();
     Block &entry = initializerRegion.front();
 
     mlir::Value mlirSource = op.getTaskReductionVars()[cnt];
     llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource);
     llvm::Value *origVal = llvmSource;
 
-    moduleTranslation.mapValue(reduction.getInitializerMoldArg(), origVal);
+    moduleTranslation.mapValue(reductionDecl.getInitializerMoldArg(), origVal);
 
     if (entry.getNumArguments() > 1) {
       llvm::Value *allocation =
-          reductionVariableMap.lookup(op.getReductionVars()[cnt]);
-      moduleTranslation.mapValue(reduction.getInitializerAllocArg(),
+          reductionVariableMap.lookup(op.getTaskReductionVars()[cnt]);
+      moduleTranslation.mapValue(reductionDecl.getInitializerAllocArg(),
                                  allocation);
     }
 
@@ -2553,10 +2563,11 @@ static llvm::Value *createTaskReductionFunction(
   return function;
 }
 
-void emitTaskRedInitCall(
-    llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
-    const llvm::OpenMPIRBuilder::LocationDescription &ompLoc, int arraySize,
-    llvm::Value *ArrayAlloca) {
+static void
+emitTaskRedInitCall(llvm::IRBuilderBase &builder,
+                    LLVM::ModuleTranslation &moduleTranslation,
+                    const llvm::OpenMPIRBuilder::LocationDescription &ompLoc,
+                    int arraySize, llvm::Value *ArrayAlloca) {
 
   llvm::LLVMContext &Context = builder.getContext();
   uint32_t SrcLocStrSize;
@@ -2590,19 +2601,15 @@ static LogicalResult allocAndInitializeTaskReductionVars(
 
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::LLVMContext &Context = builder.getContext();
-  SmallVector<DeferredStore> deferredStores;
-
-  // Save the current insertion point
-  auto oldIP = builder.saveIP();
-
-  // Set insertion point after the allocations
-  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
+  llvm::DataLayout DL = builder.GetInsertBlock()->getModule()->getDataLayout();
 
   // Define the kmp_taskred_input_t structure
   llvm::StructType *kmp_taskred_input_t =
       llvm::StructType::create(Context, "kmp_taskred_input_t");
-  llvm::Type *OpaquePtrTy = llvm::PointerType::get(Context, 0); // void*
-  llvm::Type *SizeTy = builder.getInt64Ty(); // size_t (assumed to be i64)
+  llvm::Type *OpaquePtrTy =
+      llvm::PointerType::get(Context,
+                             DL.getProgramAddressSpace()); // void*
+  llvm::Type *SizeTy = DL.getIntPtrType(Context);          // size_t
   llvm::Type *FlagsTy = llvm::Type::getInt32Ty(Context); // flags (i32)
 
   // Structure members
@@ -2621,13 +2628,18 @@ static LogicalResult allocAndInitializeTaskReductionVars(
   llvm::ArrayType *ArrayTy =
       llvm::ArrayType::get(kmp_taskred_input_t, arraySize);
 
+  // Save the current insertion point
+  auto oldIP = builder.saveIP();
+
+  // Set insertion point after the allocations
+  builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
+
   // Allocate the array for kmp_taskred_input_t
   llvm::AllocaInst *ArrayAlloca =
       builder.CreateAlloca(ArrayTy, nullptr, "kmp_taskred_array");
 
   // Restore the insertion point
   builder.restoreIP(oldIP);
-  llvm::DataLayout DL = builder.GetInsertBlock()->getModule()->getDataLayout();
 
   for (int cnt = 0; cnt < arraySize; ++cnt) {
     llvm::Value *shared =
@@ -2660,8 +2672,8 @@ static LogicalResult allocAndInitializeTaskReductionVars(
     llvm::Value *FieldPtrReduceInit = builder.CreateStructGEP(
         kmp_taskred_input_t, StructPtr, 3, "reduce_init");
     llvm::Value *initFunction = createTaskReductionFunction(
-        builder, "red_init", redTy, moduleTranslation, reductionDecls,
-        reductionDecls[cnt].getInitializerRegion(), op, cnt,
+        op, builder, "red_init", redTy, moduleTranslation, reductionDecls[cnt],
+        reductionDecls[cnt].getInitializerRegion(), cnt,
         privateReductionVariables, reductionVariableMap);
     builder.CreateStore(initFunction, FieldPtrReduceInit);
 
@@ -2669,23 +2681,23 @@ static LogicalResult allocAndInitializeTaskReductionVars(
     llvm::Value *FieldPtrReduceFini = builder.CreateStructGEP(
         kmp_taskred_input_t, StructPtr, 4, "reduce_fini");
     llvm::Value *finiFunction = createTaskReductionFunction(
-        builder, "red_fini", redTy, moduleTranslation, reductionDecls,
-        reductionDecls[cnt].getCleanupRegion(), op, cnt,
-        privateReductionVariables, reductionVariableMap);
+        op, builder, "red_fini", redTy, moduleTranslation, reductionDecls[cnt],
+        reductionDecls[cnt].getCleanupRegion(), cnt, privateReductionVariables,
+        reductionVariableMap);
     builder.CreateStore(finiFunction, FieldPtrReduceFini);
 
     llvm::Value *FieldPtrReduceComb = builder.CreateStructGEP(
         kmp_taskred_input_t, StructPtr, 5, "reduce_comb");
     llvm::Value *combFunction = createTaskReductionFunction(
-        builder, "red_comb", redTy, moduleTranslation, reductionDecls,
-        reductionDecls[cnt].getReductionRegion(), op, cnt,
+        op, builder, "red_comb", redTy, moduleTranslation, reductionDecls[cnt],
+        reductionDecls[cnt].getReductionRegion(), cnt,
         privateReductionVariables, reductionVariableMap);
     builder.CreateStore(combFunction, FieldPtrReduceComb);
 
     llvm::Value *FieldPtrFlags =
         builder.CreateStructGEP(kmp_taskred_input_t, StructPtr, 6, "flags");
     llvm::ConstantInt *flagVal =
-        llvm::ConstantInt::get(llvm::Type::getInt64Ty(Context), 0);
+        llvm::ConstantInt::get(llvm::Type::getInt32Ty(Context), 0);
     builder.CreateStore(flagVal, FieldPtrFlags);
   }
 
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index a4165837d9df7..dd0236b3cdb52 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3093,7 +3093,7 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) {
 // CHECK: %[[REDUCE_COMB:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 5
 // CHECK: store ptr @red_comb, ptr %[[REDUCE_COMB]], align 8
 // CHECK: %[[FLAGS:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 6
-// CHECK: store i64 0, ptr %[[FLAGS]], align 4
+// CHECK: store i32 0, ptr %[[FLAGS]], align 4
 // CHECK: %omp_global_thread_num1 = call i32 @__kmpc_global_thread_num(ptr @1)
 // CHECK: %[[INIT:.*]] = call ptr @__kmpc_taskred_init(i32 %omp_global_thread_num1, i32 1, ptr %kmp_taskred_array)
 
@@ -3152,7 +3152,7 @@ llvm.func @_QPtaskred_integer_arg() {
 // CHECK: %[[REDUCE_COMB:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 5
 // CHECK: store ptr @red_comb, ptr %[[REDUCE_COMB]], align 8
 // CHECK: %[[FLAGS:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 6
-// CHECK: store i64 0, ptr %[[FLAGS]], align 4
+// CHECK: store i32 0, ptr %[[FLAGS]], align 4
 // CHECK: %omp_global_thread_num1 = call i32 @__kmpc_global_thread_num(ptr @1)
 // CHECK: %[[INIT:.*]] = call ptr @__kmpc_taskred_init(i32 %omp_global_thread_num1, i32 1, ptr %kmp_taskred_array)
 
@@ -3214,7 +3214,7 @@ llvm.func @_QPfloat_arg() {
 // CHECK: %[[REDUCE_COMB:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 5
 // CHECK: store ptr @red_comb, ptr %[[REDUCE_COMB]], align 8
 // CHECK: %[[FLAGS:.*]] = getelementptr inbounds nuw %kmp_taskred_input_t, ptr %[[RED_ELEMENT]], i32 0, i32 6
-// CHECK: store i64 0, ptr %[[FLAGS]], align 4
+// CHECK: store i32 0, ptr %[[FLAGS]], align 4
 // CHECK: %omp_global_thread_num1 = call i32 @__kmpc_global_thread_num(ptr @1)
 // CHECK: %[[INIT:.*]] = call ptr @__kmpc_taskred_init(i32 %omp_global_thread_num1, i32 1, ptr %kmp_taskred_array)
 

>From ddb8c9f79e77d97b38611f8cd97cb5fddb885ed6 Mon Sep 17 00:00:00 2001
From: NimishMishra <neelam.nimish at gmail.com>
Date: Fri, 2 Jan 2026 18:45:06 +0530
Subject: [PATCH 7/7] Fix format

---
 .../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index f76cb433bf20c..08a6695166eb8 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2610,7 +2610,7 @@ static LogicalResult allocAndInitializeTaskReductionVars(
       llvm::PointerType::get(Context,
                              DL.getProgramAddressSpace()); // void*
   llvm::Type *SizeTy = DL.getIntPtrType(Context);          // size_t
-  llvm::Type *FlagsTy = llvm::Type::getInt32Ty(Context); // flags (i32)
+  llvm::Type *FlagsTy = llvm::Type::getInt32Ty(Context);   // flags (i32)
 
   // Structure members
   std::vector<llvm::Type *> structMembers = {



More information about the Mlir-commits mailing list