[flang-commits] [flang] [flang][OpenMP] Implement collapse for imperfectly nested loops (PR #202435)
Tom Eccles via flang-commits
flang-commits at lists.llvm.org
Tue Jun 23 09:28:49 PDT 2026
================
@@ -0,0 +1,604 @@
+! Test lowering of imperfectly nested collapse loops (CLN relaxation).
+! Intervening code is guarded by IV comparisons to restore correct
+! execution frequency and ordering within the flat omp.loop_nest body.
+
+! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s
+
+! CHECK-LABEL: func.func @_QPcollapse2_imperfect
+subroutine collapse2_imperfect(n, x)
+ integer, intent(in) :: n
+ integer, intent(inout) :: x
+ integer :: i, j
+
+ !$omp do collapse(2)
+ do i = 1, n
+ x = x + 1
+ do j = 1, n
+ x = x + j
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+! CHECK: omp.wsloop
+! CHECK-NEXT: omp.loop_nest (%[[I:.*]], %[[J:.*]]) : i32 =
+! CHECK-SAME: (%{{.*}}, %[[LB_J:.*]]) to
+! CHECK: hlfir.assign %[[I]]
+! CHECK: hlfir.assign %[[J]]
+! Guard: j == lower_bound (before code executes once per i)
+! CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[J]], %[[LB_J]] : i32
+! CHECK: fir.if %[[CMP]] {
+! Intervening code: x = x + 1
+! CHECK: %[[X1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[C1:.*]] = arith.constant 1 : i32
+! CHECK: %[[ADD1:.*]] = arith.addi %[[X1]], %[[C1]] : i32
+! CHECK: hlfir.assign %[[ADD1]] to %{{.*}} : i32, !fir.ref<i32>
+! CHECK: }
+! Innermost body: x = x + j
+! CHECK: %[[X2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[JVAL:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[ADD2:.*]] = arith.addi %[[X2]], %[[JVAL]] : i32
+! CHECK: hlfir.assign %[[ADD2]] to %{{.*}} : i32, !fir.ref<i32>
+! CHECK: omp.yield
+
+! CHECK-LABEL: func.func @_QPcollapse3_imperfect
+subroutine collapse3_imperfect(n, x)
+ integer, intent(in) :: n
+ integer, intent(inout) :: x
+ integer :: i, j, k
+
+ !$omp do collapse(3)
+ do i = 1, n
+ x = x + i
+ do j = 1, n
+ x = x + j
+ do k = 1, n
+ x = x + k
+ end do
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+! CHECK: omp.wsloop
+! CHECK-NEXT: omp.loop_nest (%[[I3:.*]], %[[J3:.*]], %[[K3:.*]]) : i32 =
+! CHECK-SAME: (%{{.*}}, %[[LB_J3:.*]], %[[LB_K3:.*]]) to
+! CHECK: hlfir.assign %[[I3]]
+! CHECK: hlfir.assign %[[J3]]
+! CHECK: hlfir.assign %[[K3]]
+! Guard: j == lb_j AND k == lb_k (level 0 before code, once per i)
+! CHECK: %[[CMP_J:.*]] = arith.cmpi eq, %[[J3]], %[[LB_J3]] : i32
+! CHECK: %[[CMP_K1:.*]] = arith.cmpi eq, %[[K3]], %[[LB_K3]] : i32
+! CHECK: %[[AND1:.*]] = arith.andi %[[CMP_J]], %[[CMP_K1]] : i1
+! CHECK: fir.if %[[AND1]] {
+! Intervening code at level 0: x = x + i
+! CHECK: %[[XI:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[IVAL:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[ADDI:.*]] = arith.addi %[[XI]], %[[IVAL]] : i32
+! CHECK: hlfir.assign %[[ADDI]] to %{{.*}} : i32, !fir.ref<i32>
+! CHECK: }
+! Guard: k == lb_k (level 1 before code, once per (i,j))
+! CHECK: %[[CMP_K2:.*]] = arith.cmpi eq, %[[K3]], %[[LB_K3]] : i32
+! CHECK: fir.if %[[CMP_K2]] {
+! Intervening code at level 1: x = x + j
+! CHECK: %[[XJ:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[JVAL3:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[ADDJ:.*]] = arith.addi %[[XJ]], %[[JVAL3]] : i32
+! CHECK: hlfir.assign %[[ADDJ]] to %{{.*}} : i32, !fir.ref<i32>
+! CHECK: }
+! Innermost body: x = x + k
+! CHECK: %[[XK:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[KVAL:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[ADDK:.*]] = arith.addi %[[XK]], %[[KVAL]] : i32
+! CHECK: hlfir.assign %[[ADDK]] to %{{.*}} : i32, !fir.ref<i32>
+! CHECK: omp.yield
+
+! CHECK-LABEL: func.func @_QPcollapse2_both_sides
+subroutine collapse2_both_sides(n, x)
+ integer, intent(in) :: n
+ integer, intent(inout) :: x
+ integer :: i, j
+
+ !$omp simd collapse(2)
+ do i = 1, n
+ x = x + 1
+ do j = 1, n
+ x = x + j
+ end do
+ call ext_sub(x)
+ end do
+ !$omp end simd
+end subroutine
+
+! CHECK: omp.simd
+! CHECK-NEXT: omp.loop_nest (%[[I4:.*]], %[[J4:.*]]) : i32 =
+! CHECK-SAME: (%{{[^)]*}}, %[[LB_J4:[^)]*]]) to (%{{[^)]*}}, %[[UB_J4:[^)]*]])
+! CHECK: hlfir.assign %[[I4]]
+! CHECK: hlfir.assign %[[J4]]
+! Guard: j == lower_bound (before code)
+! CHECK: %[[CMP_B:.*]] = arith.cmpi eq, %[[J4]], %[[LB_J4]] : i32
+! CHECK: fir.if %[[CMP_B]] {
+! Intervening code before inner loop: x = x + 1
+! CHECK: %[[XB:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[CB:.*]] = arith.constant 1 : i32
+! CHECK: %[[ADDB:.*]] = arith.addi %[[XB]], %[[CB]] : i32
+! CHECK: hlfir.assign %[[ADDB]] to %{{.*}} : i32, !fir.ref<i32>
+! CHECK: }
+! Innermost body: x = x + j
+! CHECK: %[[XIN:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[JIN:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[ADDIN:.*]] = arith.addi %[[XIN]], %[[JIN]] : i32
+! CHECK: hlfir.assign %[[ADDIN]] to %{{.*}} : i32, !fir.ref<i32>
+! Guard: j == upper_bound (after code)
+! CHECK: %[[CMP_A:.*]] = arith.cmpi eq, %[[J4]], %[[UB_J4]] : i32
+! CHECK: fir.if %[[CMP_A]] {
+! Intervening code after inner loop: call ext_sub(x)
+! CHECK: fir.call @_QPext_sub
----------------
tblah wrote:
At this point I think we might have a bug if ext_sub used j. Shouldn't the value of j be n+1 after the loop (I'm not certain about this)?
https://github.com/llvm/llvm-project/pull/202435
More information about the flang-commits
mailing list