[flang-commits] [flang] [DRAFT][flang][PFT] canonicalise multi-stmt if/cycle into structured if/else (PR #196386)

Eugene Epshteyn via flang-commits flang-commits at lists.llvm.org
Sat May 9 18:49:57 PDT 2026


https://github.com/eugeneepshteyn updated https://github.com/llvm/llvm-project/pull/196386

>From efdc71d01a777ac3be8ba819c5bb862cc89bea37 Mon Sep 17 00:00:00 2001
From: Eugene Epshteyn <eepshteyn at nvidia.com>
Date: Thu, 7 May 2026 14:27:13 -0400
Subject: [PATCH 1/2] [flang][PFT] canonicalise multi-stmt if/cycle into
 structured if/else

rewriteIfGotos already canonicalises the special case
`if (cond) cycle` into a structured form before branch analysis. It
does not handle the more general pattern

  do j = 1, n
    do i = 1, n
      if (i == jdiag) then
        a(i,j) = 0.0d0
        cycle
      end if
      a(i,j) = real(i + j, 8)
    end do
  end do

Without canonicalisation the cycle marks the loop as unstructured, which
(for an enclosing `acc parallel loop`) defeats GPU parallelisation.

Extend rewriteIfGotos to also accept IF bodies of size >= 3 whose last
body statement is a CYCLE or GOTO and whose construct has no existing
ELSE / ELSE IF chain.

After the rewrite the IfConstruct is a plain structured if/else with no
branches out of the loop, so PFT branch analysis no longer marks the
enclosing DO as unstructured.

Assisted-by: AI
---
 flang/lib/Lower/PFTBuilder.cpp                | 86 +++++++++++++----
 .../OpenACC/acc-loop-cycle-canonicalize.f90   | 92 +++++++++++++++++++
 flang/test/Lower/ifconvert.f90                | 59 ++++++++++++
 3 files changed, 220 insertions(+), 17 deletions(-)
 create mode 100644 flang/test/Lower/OpenACC/acc-loop-cycle-canonicalize.f90

diff --git a/flang/lib/Lower/PFTBuilder.cpp b/flang/lib/Lower/PFTBuilder.cpp
index afabb6b29f4f1..a1286b928df31 100644
--- a/flang/lib/Lower/PFTBuilder.cpp
+++ b/flang/lib/Lower/PFTBuilder.cpp
@@ -737,17 +737,51 @@ class PFTBuilder {
           if (successorIt != it) {
             Fortran::lower::pft::EvaluationList &ifBodyList =
                 *ifConstructIt->evaluationList;
+            // The trailing GOTO/CYCLE is the last body statement, just before
+            // EndIfStmt. For the legacy single-body-statement case (3-eval
+            // IfConstruct) it is also `next(begin)`.
+            lower::pft::EvaluationList::iterator endIfStmtIt =
+                std::prev(ifBodyList.end());
             lower::pft::EvaluationList::iterator branchStmtIt =
-                std::next(ifBodyList.begin());
+                std::prev(endIfStmtIt);
             assert((branchStmtIt->isA<parser::GotoStmt>() ||
                     branchStmtIt->isA<parser::CycleStmt>()) &&
                    "expected goto or cycle statement");
+            // Capture the body statement that lexically precedes the trailing
+            // CYCLE/GOTO; for the legacy 3-eval case this is the IfStmt
+            // itself.
+            lower::pft::Evaluation *lastBodyEval = &*std::prev(branchStmtIt);
+            bool wasThreeEval = branchStmtIt == std::next(ifBodyList.begin());
             ifBodyList.erase(branchStmtIt);
-            lower::pft::Evaluation &ifStmt = *ifBodyList.begin();
-            ifStmt.negateCondition = true;
-            ifStmt.lexicalSuccessor = firstStmt(&*successorIt);
-            lower::pft::EvaluationList::iterator endIfStmtIt =
-                std::prev(ifBodyList.end());
+            endIfStmtIt = std::prev(ifBodyList.end());
+            if (wasThreeEval) {
+              // Legacy rewrite for `if (cond) then; CYCLE/GOTO; end if`:
+              // negate the condition, drop the IF, and put the after-stmts
+              // where the IF used to be. Saves one branch versus introducing
+              // an empty THEN with a synthetic ELSE for the same shape.
+              lower::pft::Evaluation &ifStmt = *ifBodyList.begin();
+              ifStmt.negateCondition = true;
+              ifStmt.lexicalSuccessor = firstStmt(&*successorIt);
+            } else {
+              // Multi-stmt body: keep the THEN body (minus the trailing
+              // CYCLE/GOTO) and synthesise an ELSE branch that holds the
+              // after-stmts. After this rewrite the IfConstruct is a plain
+              // structured if/else with no branches out of the loop, so PFT
+              // branch analysis no longer marks the enclosing DO as
+              // unstructured.
+              static const parser::ElseStmt syntheticElseStmt{
+                  std::optional<parser::Name>{}};
+              auto elseIt = ifBodyList.insert(
+                  endIfStmtIt,
+                  lower::pft::Evaluation{
+                      syntheticElseStmt,
+                      lower::pft::PftNode{*ifConstructIt},
+                      ifConstructIt->position,
+                      std::optional<parser::Label>{}});
+              elseIt->parentConstruct = &*ifConstructIt;
+              lastBodyEval->lexicalSuccessor = &*elseIt;
+              elseIt->lexicalSuccessor = firstStmt(&*successorIt);
+            }
             std::prev(it)->lexicalSuccessor = &*endIfStmtIt;
             endIfStmtIt->lexicalSuccessor = firstStmt(&*it);
             ifBodyList.splice(endIfStmtIt, evaluationList, successorIt, it);
@@ -757,17 +791,35 @@ class PFTBuilder {
           ifCandidateStack.pop_back();
         }
       }
-      if (eval.isA<parser::IfConstruct>() && eval.evaluationList->size() == 3) {
-        const auto bodyEval = std::next(eval.evaluationList->begin());
-        if (const auto *gotoStmt = bodyEval->getIf<parser::GotoStmt>()) {
-          if (!bodyEval->lexicalSuccessor->label)
-            ifCandidateStack.push_back({it, gotoStmt->v});
-        } else if (doStmt) {
-          if (const auto *cycleStmt = bodyEval->getIf<parser::CycleStmt>()) {
-            std::string cycleName = getConstructName(*cycleStmt);
-            if (cycleName.empty() || cycleName == doName)
-              // This candidate will match doStmt's EndDoStmt.
-              ifCandidateStack.push_back({it, {}, true});
+      if (eval.isA<parser::IfConstruct>() && eval.evaluationList->size() >= 3) {
+        // Find the last body statement (just before EndIfStmt). For an
+        // IfConstruct body of size >= 3 layout is
+        // `[IfThenStmt, body1, ..., bodyN, EndIfStmt]`; the candidate is
+        // viable only if `bodyN` is a GOTO/CYCLE that targets out of the
+        // enclosing loop and there is no ELSE / ELSE IF branch in between.
+        auto endIfIt = std::prev(eval.evaluationList->end());
+        auto lastBodyIt = std::prev(endIfIt);
+        bool hasElseChain = false;
+        for (auto bIt = std::next(eval.evaluationList->begin()); bIt != endIfIt;
+             ++bIt) {
+          if (bIt->isA<parser::ElseStmt>() ||
+              bIt->isA<parser::ElseIfStmt>()) {
+            hasElseChain = true;
+            break;
+          }
+        }
+        if (!hasElseChain) {
+          if (const auto *gotoStmt = lastBodyIt->getIf<parser::GotoStmt>()) {
+            if (!lastBodyIt->lexicalSuccessor->label)
+              ifCandidateStack.push_back({it, gotoStmt->v});
+          } else if (doStmt) {
+            if (const auto *cycleStmt =
+                    lastBodyIt->getIf<parser::CycleStmt>()) {
+              std::string cycleName = getConstructName(*cycleStmt);
+              if (cycleName.empty() || cycleName == doName)
+                // This candidate will match doStmt's EndDoStmt.
+                ifCandidateStack.push_back({it, {}, true});
+            }
           }
         }
       }
diff --git a/flang/test/Lower/OpenACC/acc-loop-cycle-canonicalize.f90 b/flang/test/Lower/OpenACC/acc-loop-cycle-canonicalize.f90
new file mode 100644
index 0000000000000..7da82cd0b3ecd
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-loop-cycle-canonicalize.f90
@@ -0,0 +1,92 @@
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+! Plan B (lorado-2856 follow-up): rewriteIfGotos canonicalises
+! `if (cond) then; <stmts>; cycle; end if; <after>` into a structured
+! `if (cond) then; <stmts>; else; <after>; end if`. This means an
+! `acc parallel loop` over such a body, with or without `collapse(N)`,
+! lowers to a STRUCTURED acc.loop (with `control(...)` and bounds on the
+! op, no `unstructured` attribute), so the GPU backend can parallelise it
+! the same way nvfortran does.
+
+! Reproducer from the lorado #2856 GitLab issue.
+subroutine acc_collapse_cycle_lorado(a)
+  integer :: i, j, jdiag
+  real(8) :: a(:,:)
+  jdiag = 4
+  !$acc parallel loop collapse(2) copy(a)
+  do j = 1, 8
+    do i = 1, 8
+      if (i == jdiag) then
+        a(i, j) = 0.0d0
+        cycle
+      end if
+      a(i, j) = real(i + j, 8)
+    end do
+  end do
+  !$acc end parallel loop
+end subroutine
+
+! CHECK-LABEL: func.func @_QPacc_collapse_cycle_lorado
+! CHECK: acc.parallel combined(loop)
+! Both IVs are privatised:
+! CHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK: acc.private varPtr(%{{.*}} : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "i"}
+! Structured acc.loop: bounds are on the op (control(...) clause) for both IVs.
+! CHECK: acc.loop combined(parallel) private(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) control(%{{.*}} : i32, %{{.*}} : i32) = (%{{.*}}, %{{.*}} : i32, i32) to (%{{.*}}, %{{.*}} : i32, i32) step (%{{.*}}, %{{.*}} : i32, i32)
+! Body uses fir.if/else for the canonicalised cycle (no cf.cond_br backedges).
+! CHECK: fir.if %{{.*}} {
+! CHECK:   hlfir.assign %{{cst.*}} to %{{.*}} : f64, !fir.ref<f64>
+! CHECK: } else {
+! CHECK:   hlfir.assign %{{.*}} to %{{.*}} : f64, !fir.ref<f64>
+! CHECK: }
+! Trailing attributes carry collapse and inclusiveUpperbound but NOT unstructured:
+! CHECK: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true, true>, independent = [#acc.device_type<none>]}
+! CHECK-NOT: unstructured
+
+! Same shape, single-level (no collapse): exercises the rewrite for an
+! unadorned `acc parallel loop` body too.
+subroutine acc_loop_cycle_canonicalize(a)
+  integer :: i, jdiag
+  real :: a(:)
+  jdiag = 4
+  !$acc parallel loop
+  do i = 1, 8
+    if (i == jdiag) then
+      a(i) = 0.0
+      cycle
+    end if
+    a(i) = real(i)
+  end do
+  !$acc end parallel loop
+end subroutine
+
+! CHECK-LABEL: func.func @_QPacc_loop_cycle_canonicalize
+! CHECK: acc.loop combined(parallel) private(%{{.*}} : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: fir.if
+! CHECK: } else {
+! CHECK: } attributes {{{.*}}independent = [#acc.device_type<none>]{{.*}}}
+! CHECK-NOT: unstructured
+
+! Negative test: an existing ELSE branch blocks the rewrite. The loop
+! stays unstructured (acc.loop with `unstructured` attribute, no `control`).
+subroutine acc_loop_cycle_existing_else_unchanged(a)
+  integer :: i, jdiag
+  real :: a(:)
+  jdiag = 4
+  !$acc parallel loop
+  do i = 1, 8
+    if (i == jdiag) then
+      a(i) = 0.0
+    else
+      a(i) = -1.0
+      cycle
+    end if
+    a(i) = real(i)
+  end do
+  !$acc end parallel loop
+end subroutine
+
+! CHECK-LABEL: func.func @_QPacc_loop_cycle_existing_else_unchanged
+! No control(...) on acc.loop; trailing attributes contain `unstructured`.
+! CHECK: acc.loop combined(parallel) private(%{{.*}} : !fir.ref<i32>) {
+! CHECK: } attributes {{{.*}}unstructured}
diff --git a/flang/test/Lower/ifconvert.f90 b/flang/test/Lower/ifconvert.f90
index cbd1e79bd0114..6b135ee00ffb8 100644
--- a/flang/test/Lower/ifconvert.f90
+++ b/flang/test/Lower/ifconvert.f90
@@ -94,3 +94,62 @@
      print*, i
 4 end do
 end
+
+! Multi-statement IF body whose last stmt is a CYCLE: the rewrite turns
+! `if (cond) then; <stmts>; cycle; end if; <after>` into a structured
+! `if (cond) then; <stmts>; else; <after>; end if`. The IfStmt is NOT
+! negated (no `[negate]` annotation), and a synthetic ElseStmt appears
+! between the THEN body and the moved <after> stmts.
+subroutine block_if_cycle_multi_stmt(a)
+  integer :: i, jdiag
+  real :: a(:)
+  jdiag = 4
+  ! CHECK-LABEL: Subroutine block_if_cycle_multi_stmt
+  ! CHECK: <<DoConstruct>>
+  ! CHECK:   NonLabelDoStmt {{.*}}: do i = 1, 8
+  ! CHECK:   <<IfConstruct>>
+  ! CHECK:     {{[0-9]+}} ^IfThenStmt {{.*}}: if(i == jdiag) then
+  ! CHECK-NOT:                       [negate]
+  ! CHECK:     {{[0-9]+}} ^AssignmentStmt: a(i) = 0.0
+  ! CHECK:     ^ElseStmt
+  ! CHECK:     {{[0-9]+}} AssignmentStmt: a(i) = real(i)
+  ! CHECK:     {{[0-9]+}} EndIfStmt
+  ! CHECK:   <<End IfConstruct>>
+  ! CHECK:   {{[0-9]+}} EndDoStmt {{.*}}: end do
+  ! CHECK: <<End DoConstruct>>
+  do i = 1, 8
+    if (i == jdiag) then
+      a(i) = 0.0
+      cycle
+    end if
+    a(i) = real(i)
+  end do
+end subroutine
+
+! IF body whose last stmt is CYCLE but contains an existing ELSE branch:
+! the rewrite must NOT apply (we cannot safely splice the after-stmts past
+! the existing else). The loop stays unstructured.
+subroutine block_if_cycle_with_existing_else(a)
+  integer :: i, jdiag
+  real :: a(:)
+  jdiag = 4
+  ! CHECK-LABEL: Subroutine block_if_cycle_with_existing_else
+  ! CHECK: <<DoConstruct!>>
+  ! CHECK:   <<IfConstruct!>>
+  ! CHECK:     ^IfThenStmt {{.*}}: if(i == jdiag) then
+  ! CHECK:     ^AssignmentStmt: a(i) = 0.0
+  ! CHECK:     ^ElseStmt: else
+  ! CHECK:     AssignmentStmt: a(i) = -1.0
+  ! CHECK:     CycleStmt! {{.*}}: cycle
+  ! CHECK:     EndIfStmt: end if
+  ! CHECK:   <<End IfConstruct!>>
+  do i = 1, 8
+    if (i == jdiag) then
+      a(i) = 0.0
+    else
+      a(i) = -1.0
+      cycle
+    end if
+    a(i) = real(i)
+  end do
+end subroutine

>From 160aa8b6e55130b5da4c937d119b8168a34160ed Mon Sep 17 00:00:00 2001
From: Eugene Epshteyn <eepshteyn at nvidia.com>
Date: Thu, 7 May 2026 14:36:58 -0400
Subject: [PATCH 2/2] clang-format

---
 flang/lib/Lower/PFTBuilder.cpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/flang/lib/Lower/PFTBuilder.cpp b/flang/lib/Lower/PFTBuilder.cpp
index a1286b928df31..405842c3736c9 100644
--- a/flang/lib/Lower/PFTBuilder.cpp
+++ b/flang/lib/Lower/PFTBuilder.cpp
@@ -774,10 +774,8 @@ class PFTBuilder {
               auto elseIt = ifBodyList.insert(
                   endIfStmtIt,
                   lower::pft::Evaluation{
-                      syntheticElseStmt,
-                      lower::pft::PftNode{*ifConstructIt},
-                      ifConstructIt->position,
-                      std::optional<parser::Label>{}});
+                      syntheticElseStmt, lower::pft::PftNode{*ifConstructIt},
+                      ifConstructIt->position, std::optional<parser::Label>{}});
               elseIt->parentConstruct = &*ifConstructIt;
               lastBodyEval->lexicalSuccessor = &*elseIt;
               elseIt->lexicalSuccessor = firstStmt(&*successorIt);
@@ -802,8 +800,7 @@ class PFTBuilder {
         bool hasElseChain = false;
         for (auto bIt = std::next(eval.evaluationList->begin()); bIt != endIfIt;
              ++bIt) {
-          if (bIt->isA<parser::ElseStmt>() ||
-              bIt->isA<parser::ElseIfStmt>()) {
+          if (bIt->isA<parser::ElseStmt>() || bIt->isA<parser::ElseIfStmt>()) {
             hasElseChain = true;
             break;
           }



More information about the flang-commits mailing list