[Mlir-commits] [mlir] 07a40b3 - [mlir][OpenMP] Fix taskloop outlined step handling (#190198)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Tue Apr 7 08:51:27 PDT 2026


Author: Tom Eccles
Date: 2026-04-07T15:51:03Z
New Revision: 07a40b33e513c85061a4204b579a8cd8a60e6507

URL: https://github.com/llvm/llvm-project/commit/07a40b33e513c85061a4204b579a8cd8a60e6507
DIFF: https://github.com/llvm/llvm-project/commit/07a40b33e513c85061a4204b579a8cd8a60e6507.diff

LOG: [mlir][OpenMP] Fix taskloop outlined step handling (#190198)

The outlined taskloop preheader still used the original function's
casted step value when computing the canonical loop trip count. When
lb/ub/step were defined outside the taskloop body, the outlined function
ended up referring to an instruction from another function, which
crashed LLVM IR verification and finalization.

Reload the task step from the outlined task shareds, alongside lb and
ub, and use that value for the trip-count division. Update the MLIR
taskloop checks and add a regression for outer-scope variable bounds.

Fortran reproducer:
```
subroutine test(lb, ub, step)
  integer :: i, lb, ub, step

  !$omp taskloop
    do i=lb,ub,step
      call do_something(i)
    enddo
  !$omp end taskloop
end subroutine
```

Assisted-by: codex

Added: 
    mlir/test/Target/LLVMIR/openmp-taskloop-outer-bounds.mlir

Modified: 
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
    mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop.mlir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index de6624a9e6063..d06ebbaca9f08 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2322,8 +2322,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
     // bounds.
     Value *TaskLB = nullptr;
     Value *TaskUB = nullptr;
+    Value *TaskStep = nullptr;
     Value *LoadTaskLB = nullptr;
     Value *LoadTaskUB = nullptr;
+    Value *LoadTaskStep = nullptr;
     for (Instruction &I : *TaskloopAllocaBB) {
       if (I.getOpcode() == Instruction::GetElementPtr) {
         GetElementPtrInst &Gep = cast<GetElementPtrInst>(I);
@@ -2335,6 +2337,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
           case 1:
             TaskUB = &I;
             break;
+          case 2:
+            TaskStep = &I;
+            break;
           }
         }
       } else if (I.getOpcode() == Instruction::Load) {
@@ -2345,6 +2350,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
         } else if (Load.getPointerOperand() == TaskUB) {
           assert(TaskUB != nullptr && "Expected value for TaskUB");
           LoadTaskUB = &I;
+        } else if (Load.getPointerOperand() == TaskStep) {
+          assert(TaskStep != nullptr && "Expected value for TaskStep");
+          LoadTaskStep = &I;
         }
       }
     }
@@ -2353,8 +2361,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
 
     assert(LoadTaskLB != nullptr && "Expected value for LoadTaskLB");
     assert(LoadTaskUB != nullptr && "Expected value for LoadTaskUB");
-    Value *TripCountMinusOne =
-        Builder.CreateSDiv(Builder.CreateSub(LoadTaskUB, LoadTaskLB), FakeStep);
+    assert(LoadTaskStep != nullptr && "Expected value for LoadTaskStep");
+    Value *TripCountMinusOne = Builder.CreateSDiv(
+        Builder.CreateSub(LoadTaskUB, LoadTaskLB), LoadTaskStep);
     Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One, "trip_cnt");
     Value *CastedTripCount = Builder.CreateIntCast(TripCount, IVTy, true);
     Value *CastedTaskLB = Builder.CreateIntCast(LoadTaskLB, IVTy, true);

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir
index 7aa6398a7f113..983cf943ea245 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir
@@ -94,7 +94,7 @@ llvm.func @_QPtest(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm.
 // CHECK:         br label %[[VAL_38:.*]]
 // CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_37]]
 // CHECK:         %[[VAL_39:.*]] = sub i64 %[[VAL_28]], %[[VAL_26]]
-// CHECK:         %[[VAL_40:.*]] = sdiv i64 %[[VAL_39]], 1
+// CHECK:         %[[VAL_40:.*]] = sdiv i64 %[[VAL_39]], %[[VAL_30]]
 // CHECK:         %[[VAL_41:.*]] = add i64 %[[VAL_40]], 1
 // CHECK:         %[[VAL_42:.*]] = trunc i64 %[[VAL_41]] to i32
 // CHECK:         %[[VAL_43:.*]] = trunc i64 %[[VAL_26]] to i32
@@ -228,7 +228,7 @@ llvm.func @_QPtest2(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
 // CHECK:         br label %[[VAL_108:.*]]
 // CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_107]]
 // CHECK:         %[[VAL_109:.*]] = sub i64 %[[VAL_98]], %[[VAL_96]]
-// CHECK:         %[[VAL_110:.*]] = sdiv i64 %[[VAL_109]], 1
+// CHECK:         %[[VAL_110:.*]] = sdiv i64 %[[VAL_109]], %[[VAL_100]]
 // CHECK:         %[[VAL_111:.*]] = add i64 %[[VAL_110]], 1
 // CHECK:         %[[VAL_112:.*]] = trunc i64 %[[VAL_111]] to i32
 // CHECK:         %[[VAL_113:.*]] = trunc i64 %[[VAL_96]] to i32
@@ -353,7 +353,7 @@ llvm.func @_QPtest3(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
 // CHECK:         br label %[[VAL_174:.*]]
 // CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_173]]
 // CHECK:         %[[VAL_175:.*]] = sub i64 %[[VAL_164]], %[[VAL_162]]
-// CHECK:         %[[VAL_176:.*]] = sdiv i64 %[[VAL_175]], 1
+// CHECK:         %[[VAL_176:.*]] = sdiv i64 %[[VAL_175]], %[[VAL_166]]
 // CHECK:         %[[VAL_177:.*]] = add i64 %[[VAL_176]], 1
 // CHECK:         %[[VAL_178:.*]] = trunc i64 %[[VAL_177]] to i32
 // CHECK:         %[[VAL_179:.*]] = trunc i64 %[[VAL_162]] to i32

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir
index 5d2dd9472421d..feeca4a272741 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir
@@ -94,7 +94,7 @@ llvm.func @_QPtest(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm.
 // CHECK:         br label %[[VAL_38:.*]]
 // CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_37]]
 // CHECK:         %[[VAL_39:.*]] = sub i64 %[[VAL_28]], %[[VAL_26]]
-// CHECK:         %[[VAL_40:.*]] = sdiv i64 %[[VAL_39]], 1
+// CHECK:         %[[VAL_40:.*]] = sdiv i64 %[[VAL_39]], %[[VAL_30]]
 // CHECK:         %[[VAL_41:.*]] = add i64 %[[VAL_40]], 1
 // CHECK:         %[[VAL_42:.*]] = trunc i64 %[[VAL_41]] to i32
 // CHECK:         %[[VAL_43:.*]] = trunc i64 %[[VAL_26]] to i32
@@ -228,7 +228,7 @@ llvm.func @_QPtest2(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
 // CHECK:         br label %[[VAL_108:.*]]
 // CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_107]]
 // CHECK:         %[[VAL_109:.*]] = sub i64 %[[VAL_98]], %[[VAL_96]]
-// CHECK:         %[[VAL_110:.*]] = sdiv i64 %[[VAL_109]], 1
+// CHECK:         %[[VAL_110:.*]] = sdiv i64 %[[VAL_109]], %[[VAL_100]]
 // CHECK:         %[[VAL_111:.*]] = add i64 %[[VAL_110]], 1
 // CHECK:         %[[VAL_112:.*]] = trunc i64 %[[VAL_111]] to i32
 // CHECK:         %[[VAL_113:.*]] = trunc i64 %[[VAL_96]] to i32

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir
index f0abff7e38869..f63b6691d165a 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir
@@ -42,9 +42,11 @@ llvm.func @_QPtest() {
 // CHECK: %[[task_lb:.*]] = load i64, ptr %[[gep_task_lb]], align 4
 // CHECK: %[[gep_task_ub:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 1
 // CHECK: %[[task_ub:.*]] = load i64, ptr %gep_ub.val, align 4
+// CHECK: %[[gep_task_step:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 2
+// CHECK: %[[task_step:.*]] = load i64, ptr %[[gep_task_step]], align 4
 
 // CHECK: %[[VAL_3:.*]] = sub i64 %[[task_ub]], %[[task_lb]]
-// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], 1
+// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], %[[task_step]]
 // CHECK: %[[trip_cnt:.*]] = add i64 %[[VAL_4]], 1
 // CHECK: %[[VAL_6:.*]] = trunc i64 %[[task_lb]] to i32
 
@@ -92,9 +94,11 @@ llvm.func @_QPtest2() {
 // CHECK: %[[task_lb:.*]] = load i64, ptr %[[gep_task_lb]], align 4
 // CHECK: %[[gep_task_ub:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 1
 // CHECK: %[[task_ub:.*]] = load i64, ptr %gep_ub.val, align 4
+// CHECK: %[[gep_task_step:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 2
+// CHECK: %[[task_step:.*]] = load i64, ptr %[[gep_task_step]], align 4
 
 // CHECK: %[[VAL_3:.*]] = sub i64 %[[task_ub]], %[[task_lb]]
-// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], 1
+// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], %[[task_step]]
 // CHECK: %[[trip_cnt:.*]] = add i64 %[[VAL_4]], 1
 // CHECK: %[[VAL_6:.*]] = trunc i64 %[[task_lb]] to i32
 
@@ -147,9 +151,11 @@ llvm.func @_QPtest3() {
 // CHECK: %[[task_lb:.*]] = load i64, ptr %[[gep_task_lb]], align 4
 // CHECK: %[[gep_task_ub:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 1
 // CHECK: %[[task_ub:.*]] = load i64, ptr %[[gep_task_ub]], align 4
+// CHECK: %[[gep_task_step:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 2
+// CHECK: %[[task_step:.*]] = load i64, ptr %[[gep_task_step]], align 4
 
 // CHECK: %[[VAL_3:.*]] = sub i64 %[[task_ub]], %[[task_lb]]
-// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], 1
+// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], %[[task_step]]
 // CHECK: %[[trip_cnt:.*]] = add i64 %[[VAL_4]], 1
 // CHECK: %[[VAL_5:.*]] = trunc i64 %[[trip_cnt]] to i32
 // CHECK: %6 = trunc i64 %[[task_lb]] to i32
@@ -202,9 +208,11 @@ llvm.func @_QPtest4() {
 // CHECK: %[[task_lb:.*]] = load i64, ptr %[[gep_task_lb]], align 4
 // CHECK: %[[gep_task_ub:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 1
 // CHECK: %[[task_ub:.*]] = load i64, ptr %[[gep_task_ub]], align 4
+// CHECK: %[[gep_task_step:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 2
+// CHECK: %[[task_step:.*]] = load i64, ptr %[[gep_task_step]], align 4
 
 // CHECK: %[[VAL_3:.*]] = sub i64 %[[task_ub]], %[[task_lb]]
-// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], 1
+// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], %[[task_step]]
 // CHECK: %[[trip_cnt:.*]] = add i64 %[[VAL_4]], 1
 // CHECK: %[[VAL_5:.*]] = trunc i64 %[[trip_cnt]] to i32
 // CHECK: %6 = trunc i64 %[[task_lb]] to i32
@@ -259,9 +267,11 @@ llvm.func @_QPtest5() {
 // CHECK: %[[task_lb:.*]] = load i64, ptr %[[gep_task_lb]], align 4
 // CHECK: %[[gep_task_ub:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 1
 // CHECK: %[[task_ub:.*]] = load i64, ptr %[[gep_task_ub]], align 4
+// CHECK: %[[gep_task_step:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 2
+// CHECK: %[[task_step:.*]] = load i64, ptr %[[gep_task_step]], align 4
 
 // CHECK: %[[VAL_3:.*]] = sub i64 %[[task_ub]], %[[task_lb]]
-// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], 1
+// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], %[[task_step]]
 // CHECK: %[[trip_cnt:.*]] = add i64 %[[VAL_4]], 1
 // CHECK: %[[VAL_5:.*]] = trunc i64 %[[trip_cnt]] to i32
 // CHECK: %6 = trunc i64 %[[task_lb]] to i32
@@ -312,9 +322,11 @@ llvm.func @_QPtest6() {
 // CHECK: %[[task_lb:.*]] = load i64, ptr %[[gep_task_lb]], align 4
 // CHECK: %[[gep_task_ub:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 1
 // CHECK: %[[task_ub:.*]] = load i64, ptr %[[gep_task_ub]], align 4
+// CHECK: %[[gep_task_step:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 2
+// CHECK: %[[task_step:.*]] = load i64, ptr %[[gep_task_step]], align 4
 
 // CHECK: %[[VAL_3:.*]] = sub i64 %[[task_ub]], %[[task_lb]]
-// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], 1
+// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], %[[task_step]]
 // CHECK: %[[trip_cnt:.*]] = add i64 %[[VAL_4]], 1
 // CHECK: %[[VAL_5:.*]] = trunc i64 %[[trip_cnt]] to i32
 // CHECK: %6 = trunc i64 %[[task_lb]] to i32

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
index a7e9eab874c9f..4976b442c9852 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
@@ -78,7 +78,7 @@ llvm.func @_QPtest() {
 // CHECK:         br label %[[VAL_38:.*]]
 // CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_37]]
 // CHECK:         %[[VAL_39:.*]] = sub i64 %[[VAL_27]], %[[VAL_25]]
-// CHECK:         %[[VAL_40:.*]] = sdiv i64 %[[VAL_39]], 1
+// CHECK:         %[[VAL_40:.*]] = sdiv i64 %[[VAL_39]], %[[VAL_29]]
 // CHECK:         %[[VAL_41:.*]] = add i64 %[[VAL_40]], 1
 // CHECK:         %[[VAL_42:.*]] = trunc i64 %[[VAL_41]] to i32
 // CHECK:         %[[VAL_43:.*]] = trunc i64 %[[VAL_25]] to i32

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-outer-bounds.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-outer-bounds.mlir
new file mode 100644
index 0000000000000..16dc904788c97
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-outer-bounds.mlir
@@ -0,0 +1,41 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Regression test: taskloop loop bounds defined outside omp.taskloop must not
+// leave the outlined loop body using the original function's casted step
+// value. The outlined loop preheader has to reload the task's step from the
+// task shared values structure.
+
+omp.private {type = private} @_QFtestEi_private_i32 : i32
+
+llvm.func @_QPtest() {
+  %c1_i64 = llvm.mlir.constant(1 : i64) : i64
+  %i = llvm.alloca %c1_i64 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  %lb.addr = llvm.alloca %c1_i64 x i32 {bindc_name = "lb"} : (i64) -> !llvm.ptr
+  %ub.addr = llvm.alloca %c1_i64 x i32 {bindc_name = "ub"} : (i64) -> !llvm.ptr
+  %step.addr = llvm.alloca %c1_i64 x i32 {bindc_name = "step"} : (i64) -> !llvm.ptr
+  %c1_i32 = llvm.mlir.constant(1 : i32) : i32
+  %c10_i32 = llvm.mlir.constant(10 : i32) : i32
+  %c2_i32 = llvm.mlir.constant(2 : i32) : i32
+  llvm.store %c1_i32, %lb.addr : i32, !llvm.ptr
+  llvm.store %c10_i32, %ub.addr : i32, !llvm.ptr
+  llvm.store %c2_i32, %step.addr : i32, !llvm.ptr
+  %lb = llvm.load %lb.addr : !llvm.ptr -> i32
+  %ub = llvm.load %ub.addr : !llvm.ptr -> i32
+  %step = llvm.load %step.addr : !llvm.ptr -> i32
+  omp.taskloop private(@_QFtestEi_private_i32 %i -> %arg0 : !llvm.ptr) {
+    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) inclusive step (%step) {
+      llvm.store %iv, %arg0 : i32, !llvm.ptr
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// CHECK-LABEL: define void @_QPtest() {
+// CHECK: call void @__kmpc_taskloop(
+
+// CHECK-LABEL: define internal void @_QPtest..omp_par(
+// CHECK: %[[GEPSTEP:.*]] = getelementptr {{.*}}, ptr %{{.*}}, i32 0, i32 2
+// CHECK: %[[LOADSTEP:.*]] = load i64, ptr %[[GEPSTEP]]
+// CHECK: %[[SUB:.*]] = sub i64 %{{.*}}, %{{.*}}
+// CHECK: %[[DIV:.*]] = sdiv i64 %[[SUB]], %[[LOADSTEP]]

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
index 5f31c547e7485..d8f644de657fc 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
@@ -96,7 +96,7 @@ llvm.func @_QPtest() {
 // CHECK:         br label %[[VAL_39:.*]]
 // CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_38]]
 // CHECK:         %[[VAL_40:.*]] = sub i64 %[[VAL_29]], %[[VAL_27]]
-// CHECK:         %[[VAL_41:.*]] = sdiv i64 %[[VAL_40]], 1
+// CHECK:         %[[VAL_41:.*]] = sdiv i64 %[[VAL_40]], %[[VAL_31]]
 // CHECK:         %[[VAL_42:.*]] = add i64 %[[VAL_41]], 1
 // CHECK:         %[[VAL_43:.*]] = trunc i64 %[[VAL_42]] to i32
 // CHECK:         %[[VAL_44:.*]] = trunc i64 %[[VAL_27]] to i32
@@ -148,4 +148,3 @@ llvm.func @_QPtest() {
 // CHECK:         %[[VAL_75:.*]] = load i32, ptr %[[VAL_71]], align 4
 // CHECK:         store i32 %[[VAL_75]], ptr %[[VAL_72]], align 4
 // CHECK:         ret void
-


        


More information about the Mlir-commits mailing list