[llvm] [mlir] [mlir][OpenMP] Fix taskloop outlined step handling (PR #190198)

via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 2 08:41:10 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-openmp

Author: Tom Eccles (tblah)

<details>
<summary>Changes</summary>

The outlined taskloop preheader still used the original function's casted step value when computing the canonical loop trip count. When lb/ub/step were defined outside the taskloop body, the outlined function ended up referring to an instruction from another function, which crashed LLVM IR verification and finalization.

Reload the task step from the outlined task shareds, alongside lb and ub, and use that value for the trip-count division. Update the MLIR taskloop checks and add a regression for outer-scope variable bounds.

Fortran reproducer:
```
subroutine test(lb, ub, step)
  integer :: i, lb, ub, step

  !$omp taskloop
    do i=lb,ub,step
      call do_something(i)
    enddo
  !$omp end taskloop
end subroutine
```

Assisted-by: codex

---
Full diff: https://github.com/llvm/llvm-project/pull/190198.diff


7 Files Affected:

- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+11-2) 
- (modified) mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir (+3-3) 
- (modified) mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir (+2-2) 
- (modified) mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir (+18-6) 
- (modified) mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir (+1-1) 
- (added) mlir/test/Target/LLVMIR/openmp-taskloop-outer-bounds.mlir (+41) 
- (modified) mlir/test/Target/LLVMIR/openmp-taskloop.mlir (+1-2) 


``````````diff
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index de6624a9e6063..d06ebbaca9f08 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2322,8 +2322,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
     // bounds.
     Value *TaskLB = nullptr;
     Value *TaskUB = nullptr;
+    Value *TaskStep = nullptr;
     Value *LoadTaskLB = nullptr;
     Value *LoadTaskUB = nullptr;
+    Value *LoadTaskStep = nullptr;
     for (Instruction &I : *TaskloopAllocaBB) {
       if (I.getOpcode() == Instruction::GetElementPtr) {
         GetElementPtrInst &Gep = cast<GetElementPtrInst>(I);
@@ -2335,6 +2337,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
           case 1:
             TaskUB = &I;
             break;
+          case 2:
+            TaskStep = &I;
+            break;
           }
         }
       } else if (I.getOpcode() == Instruction::Load) {
@@ -2345,6 +2350,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
         } else if (Load.getPointerOperand() == TaskUB) {
           assert(TaskUB != nullptr && "Expected value for TaskUB");
           LoadTaskUB = &I;
+        } else if (Load.getPointerOperand() == TaskStep) {
+          assert(TaskStep != nullptr && "Expected value for TaskStep");
+          LoadTaskStep = &I;
         }
       }
     }
@@ -2353,8 +2361,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
 
     assert(LoadTaskLB != nullptr && "Expected value for LoadTaskLB");
     assert(LoadTaskUB != nullptr && "Expected value for LoadTaskUB");
-    Value *TripCountMinusOne =
-        Builder.CreateSDiv(Builder.CreateSub(LoadTaskUB, LoadTaskLB), FakeStep);
+    assert(LoadTaskStep != nullptr && "Expected value for LoadTaskStep");
+    Value *TripCountMinusOne = Builder.CreateSDiv(
+        Builder.CreateSub(LoadTaskUB, LoadTaskLB), LoadTaskStep);
     Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One, "trip_cnt");
     Value *CastedTripCount = Builder.CreateIntCast(TripCount, IVTy, true);
     Value *CastedTaskLB = Builder.CreateIntCast(LoadTaskLB, IVTy, true);
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir
index 7aa6398a7f113..983cf943ea245 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir
@@ -94,7 +94,7 @@ llvm.func @_QPtest(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm.
 // CHECK:         br label %[[VAL_38:.*]]
 // CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_37]]
 // CHECK:         %[[VAL_39:.*]] = sub i64 %[[VAL_28]], %[[VAL_26]]
-// CHECK:         %[[VAL_40:.*]] = sdiv i64 %[[VAL_39]], 1
+// CHECK:         %[[VAL_40:.*]] = sdiv i64 %[[VAL_39]], %[[VAL_30]]
 // CHECK:         %[[VAL_41:.*]] = add i64 %[[VAL_40]], 1
 // CHECK:         %[[VAL_42:.*]] = trunc i64 %[[VAL_41]] to i32
 // CHECK:         %[[VAL_43:.*]] = trunc i64 %[[VAL_26]] to i32
@@ -228,7 +228,7 @@ llvm.func @_QPtest2(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
 // CHECK:         br label %[[VAL_108:.*]]
 // CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_107]]
 // CHECK:         %[[VAL_109:.*]] = sub i64 %[[VAL_98]], %[[VAL_96]]
-// CHECK:         %[[VAL_110:.*]] = sdiv i64 %[[VAL_109]], 1
+// CHECK:         %[[VAL_110:.*]] = sdiv i64 %[[VAL_109]], %[[VAL_100]]
 // CHECK:         %[[VAL_111:.*]] = add i64 %[[VAL_110]], 1
 // CHECK:         %[[VAL_112:.*]] = trunc i64 %[[VAL_111]] to i32
 // CHECK:         %[[VAL_113:.*]] = trunc i64 %[[VAL_96]] to i32
@@ -353,7 +353,7 @@ llvm.func @_QPtest3(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
 // CHECK:         br label %[[VAL_174:.*]]
 // CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_173]]
 // CHECK:         %[[VAL_175:.*]] = sub i64 %[[VAL_164]], %[[VAL_162]]
-// CHECK:         %[[VAL_176:.*]] = sdiv i64 %[[VAL_175]], 1
+// CHECK:         %[[VAL_176:.*]] = sdiv i64 %[[VAL_175]], %[[VAL_166]]
 // CHECK:         %[[VAL_177:.*]] = add i64 %[[VAL_176]], 1
 // CHECK:         %[[VAL_178:.*]] = trunc i64 %[[VAL_177]] to i32
 // CHECK:         %[[VAL_179:.*]] = trunc i64 %[[VAL_162]] to i32
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir
index 5d2dd9472421d..feeca4a272741 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir
@@ -94,7 +94,7 @@ llvm.func @_QPtest(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm.
 // CHECK:         br label %[[VAL_38:.*]]
 // CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_37]]
 // CHECK:         %[[VAL_39:.*]] = sub i64 %[[VAL_28]], %[[VAL_26]]
-// CHECK:         %[[VAL_40:.*]] = sdiv i64 %[[VAL_39]], 1
+// CHECK:         %[[VAL_40:.*]] = sdiv i64 %[[VAL_39]], %[[VAL_30]]
 // CHECK:         %[[VAL_41:.*]] = add i64 %[[VAL_40]], 1
 // CHECK:         %[[VAL_42:.*]] = trunc i64 %[[VAL_41]] to i32
 // CHECK:         %[[VAL_43:.*]] = trunc i64 %[[VAL_26]] to i32
@@ -228,7 +228,7 @@ llvm.func @_QPtest2(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
 // CHECK:         br label %[[VAL_108:.*]]
 // CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_107]]
 // CHECK:         %[[VAL_109:.*]] = sub i64 %[[VAL_98]], %[[VAL_96]]
-// CHECK:         %[[VAL_110:.*]] = sdiv i64 %[[VAL_109]], 1
+// CHECK:         %[[VAL_110:.*]] = sdiv i64 %[[VAL_109]], %[[VAL_100]]
 // CHECK:         %[[VAL_111:.*]] = add i64 %[[VAL_110]], 1
 // CHECK:         %[[VAL_112:.*]] = trunc i64 %[[VAL_111]] to i32
 // CHECK:         %[[VAL_113:.*]] = trunc i64 %[[VAL_96]] to i32
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir
index f0abff7e38869..f63b6691d165a 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir
@@ -42,9 +42,11 @@ llvm.func @_QPtest() {
 // CHECK: %[[task_lb:.*]] = load i64, ptr %[[gep_task_lb]], align 4
 // CHECK: %[[gep_task_ub:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 1
 // CHECK: %[[task_ub:.*]] = load i64, ptr %gep_ub.val, align 4
+// CHECK: %[[gep_task_step:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 2
+// CHECK: %[[task_step:.*]] = load i64, ptr %[[gep_task_step]], align 4
 
 // CHECK: %[[VAL_3:.*]] = sub i64 %[[task_ub]], %[[task_lb]]
-// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], 1
+// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], %[[task_step]]
 // CHECK: %[[trip_cnt:.*]] = add i64 %[[VAL_4]], 1
 // CHECK: %[[VAL_6:.*]] = trunc i64 %[[task_lb]] to i32
 
@@ -92,9 +94,11 @@ llvm.func @_QPtest2() {
 // CHECK: %[[task_lb:.*]] = load i64, ptr %[[gep_task_lb]], align 4
 // CHECK: %[[gep_task_ub:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 1
 // CHECK: %[[task_ub:.*]] = load i64, ptr %gep_ub.val, align 4
+// CHECK: %[[gep_task_step:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 2
+// CHECK: %[[task_step:.*]] = load i64, ptr %[[gep_task_step]], align 4
 
 // CHECK: %[[VAL_3:.*]] = sub i64 %[[task_ub]], %[[task_lb]]
-// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], 1
+// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], %[[task_step]]
 // CHECK: %[[trip_cnt:.*]] = add i64 %[[VAL_4]], 1
 // CHECK: %[[VAL_6:.*]] = trunc i64 %[[task_lb]] to i32
 
@@ -147,9 +151,11 @@ llvm.func @_QPtest3() {
 // CHECK: %[[task_lb:.*]] = load i64, ptr %[[gep_task_lb]], align 4
 // CHECK: %[[gep_task_ub:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 1
 // CHECK: %[[task_ub:.*]] = load i64, ptr %[[gep_task_ub]], align 4
+// CHECK: %[[gep_task_step:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 2
+// CHECK: %[[task_step:.*]] = load i64, ptr %[[gep_task_step]], align 4
 
 // CHECK: %[[VAL_3:.*]] = sub i64 %[[task_ub]], %[[task_lb]]
-// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], 1
+// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], %[[task_step]]
 // CHECK: %[[trip_cnt:.*]] = add i64 %[[VAL_4]], 1
 // CHECK: %[[VAL_5:.*]] = trunc i64 %[[trip_cnt]] to i32
 // CHECK: %6 = trunc i64 %[[task_lb]] to i32
@@ -202,9 +208,11 @@ llvm.func @_QPtest4() {
 // CHECK: %[[task_lb:.*]] = load i64, ptr %[[gep_task_lb]], align 4
 // CHECK: %[[gep_task_ub:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 1
 // CHECK: %[[task_ub:.*]] = load i64, ptr %[[gep_task_ub]], align 4
+// CHECK: %[[gep_task_step:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 2
+// CHECK: %[[task_step:.*]] = load i64, ptr %[[gep_task_step]], align 4
 
 // CHECK: %[[VAL_3:.*]] = sub i64 %[[task_ub]], %[[task_lb]]
-// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], 1
+// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], %[[task_step]]
 // CHECK: %[[trip_cnt:.*]] = add i64 %[[VAL_4]], 1
 // CHECK: %[[VAL_5:.*]] = trunc i64 %[[trip_cnt]] to i32
 // CHECK: %6 = trunc i64 %[[task_lb]] to i32
@@ -259,9 +267,11 @@ llvm.func @_QPtest5() {
 // CHECK: %[[task_lb:.*]] = load i64, ptr %[[gep_task_lb]], align 4
 // CHECK: %[[gep_task_ub:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 1
 // CHECK: %[[task_ub:.*]] = load i64, ptr %[[gep_task_ub]], align 4
+// CHECK: %[[gep_task_step:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 2
+// CHECK: %[[task_step:.*]] = load i64, ptr %[[gep_task_step]], align 4
 
 // CHECK: %[[VAL_3:.*]] = sub i64 %[[task_ub]], %[[task_lb]]
-// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], 1
+// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], %[[task_step]]
 // CHECK: %[[trip_cnt:.*]] = add i64 %[[VAL_4]], 1
 // CHECK: %[[VAL_5:.*]] = trunc i64 %[[trip_cnt]] to i32
 // CHECK: %6 = trunc i64 %[[task_lb]] to i32
@@ -312,9 +322,11 @@ llvm.func @_QPtest6() {
 // CHECK: %[[task_lb:.*]] = load i64, ptr %[[gep_task_lb]], align 4
 // CHECK: %[[gep_task_ub:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 1
 // CHECK: %[[task_ub:.*]] = load i64, ptr %[[gep_task_ub]], align 4
+// CHECK: %[[gep_task_step:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_1]], i32 0, i32 2
+// CHECK: %[[task_step:.*]] = load i64, ptr %[[gep_task_step]], align 4
 
 // CHECK: %[[VAL_3:.*]] = sub i64 %[[task_ub]], %[[task_lb]]
-// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], 1
+// CHECK: %[[VAL_4:.*]] = sdiv i64 %[[VAL_3]], %[[task_step]]
 // CHECK: %[[trip_cnt:.*]] = add i64 %[[VAL_4]], 1
 // CHECK: %[[VAL_5:.*]] = trunc i64 %[[trip_cnt]] to i32
 // CHECK: %6 = trunc i64 %[[task_lb]] to i32
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
index a7e9eab874c9f..4976b442c9852 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
@@ -78,7 +78,7 @@ llvm.func @_QPtest() {
 // CHECK:         br label %[[VAL_38:.*]]
 // CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_37]]
 // CHECK:         %[[VAL_39:.*]] = sub i64 %[[VAL_27]], %[[VAL_25]]
-// CHECK:         %[[VAL_40:.*]] = sdiv i64 %[[VAL_39]], 1
+// CHECK:         %[[VAL_40:.*]] = sdiv i64 %[[VAL_39]], %[[VAL_29]]
 // CHECK:         %[[VAL_41:.*]] = add i64 %[[VAL_40]], 1
 // CHECK:         %[[VAL_42:.*]] = trunc i64 %[[VAL_41]] to i32
 // CHECK:         %[[VAL_43:.*]] = trunc i64 %[[VAL_25]] to i32
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-outer-bounds.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-outer-bounds.mlir
new file mode 100644
index 0000000000000..b36be76e4cc48
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-outer-bounds.mlir
@@ -0,0 +1,41 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Regression test: taskloop loop bounds defined outside omp.taskloop must not
+// leave the outlined loop body using the original function's casted step
+// value. The outlined loop preheader has to reload the task's step from the
+// task shareds structure.
+
+omp.private {type = private} @_QFtestEi_private_i32 : i32
+
+llvm.func @_QPtest() {
+  %c1_i64 = llvm.mlir.constant(1 : i64) : i64
+  %i = llvm.alloca %c1_i64 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  %lb.addr = llvm.alloca %c1_i64 x i32 {bindc_name = "lb"} : (i64) -> !llvm.ptr
+  %ub.addr = llvm.alloca %c1_i64 x i32 {bindc_name = "ub"} : (i64) -> !llvm.ptr
+  %step.addr = llvm.alloca %c1_i64 x i32 {bindc_name = "step"} : (i64) -> !llvm.ptr
+  %c1_i32 = llvm.mlir.constant(1 : i32) : i32
+  %c10_i32 = llvm.mlir.constant(10 : i32) : i32
+  %c2_i32 = llvm.mlir.constant(2 : i32) : i32
+  llvm.store %c1_i32, %lb.addr : i32, !llvm.ptr
+  llvm.store %c10_i32, %ub.addr : i32, !llvm.ptr
+  llvm.store %c2_i32, %step.addr : i32, !llvm.ptr
+  %lb = llvm.load %lb.addr : !llvm.ptr -> i32
+  %ub = llvm.load %ub.addr : !llvm.ptr -> i32
+  %step = llvm.load %step.addr : !llvm.ptr -> i32
+  omp.taskloop private(@_QFtestEi_private_i32 %i -> %arg0 : !llvm.ptr) {
+    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) inclusive step (%step) {
+      llvm.store %iv, %arg0 : i32, !llvm.ptr
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// CHECK-LABEL: define void @_QPtest() {
+// CHECK: call void @__kmpc_taskloop(
+
+// CHECK-LABEL: define internal void @_QPtest..omp_par(
+// CHECK: %[[GEPSTEP:.*]] = getelementptr {{.*}}, ptr %{{.*}}, i32 0, i32 2
+// CHECK: %[[LOADSTEP:.*]] = load i64, ptr %[[GEPSTEP]]
+// CHECK: %[[SUB:.*]] = sub i64 %{{.*}}, %{{.*}}
+// CHECK: %[[DIV:.*]] = sdiv i64 %[[SUB]], %[[LOADSTEP]]
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
index 5f31c547e7485..d8f644de657fc 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
@@ -96,7 +96,7 @@ llvm.func @_QPtest() {
 // CHECK:         br label %[[VAL_39:.*]]
 // CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_38]]
 // CHECK:         %[[VAL_40:.*]] = sub i64 %[[VAL_29]], %[[VAL_27]]
-// CHECK:         %[[VAL_41:.*]] = sdiv i64 %[[VAL_40]], 1
+// CHECK:         %[[VAL_41:.*]] = sdiv i64 %[[VAL_40]], %[[VAL_31]]
 // CHECK:         %[[VAL_42:.*]] = add i64 %[[VAL_41]], 1
 // CHECK:         %[[VAL_43:.*]] = trunc i64 %[[VAL_42]] to i32
 // CHECK:         %[[VAL_44:.*]] = trunc i64 %[[VAL_27]] to i32
@@ -148,4 +148,3 @@ llvm.func @_QPtest() {
 // CHECK:         %[[VAL_75:.*]] = load i32, ptr %[[VAL_71]], align 4
 // CHECK:         store i32 %[[VAL_75]], ptr %[[VAL_72]], align 4
 // CHECK:         ret void
-

``````````

</details>


https://github.com/llvm/llvm-project/pull/190198


More information about the llvm-commits mailing list