[flang-commits] [flang] [flang][acc] Implement cache directive lowering (PR #174897)
via flang-commits
flang-commits at lists.llvm.org
Thu Jan 8 12:49:53 PST 2026
https://github.com/khaki3 updated https://github.com/llvm/llvm-project/pull/174897
>From 7ecc7bdf41a4968832c20759afd6a59eaff30186 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Wed, 7 Jan 2026 14:17:15 -0800
Subject: [PATCH 01/13] [flang][ACC] Implement cache directive lowering
The acc.cache operation is currently defined to be associated with a
loop. However, this implementation generates acc.cache as a standalone
data entry operation, similar to acc.private. The acc.cache operation
definition will be updated in a future change to reflect this usage.
Key implementation details:
- Add genCacheBounds() to generate acc.bounds for cache operands,
handling single elements (arr(i)), full ranges (arr(l:u)), and
partial ranges with missing bounds (arr(l:) or arr(:u))
- Generate acc.cache or acc.cache_readonly based on the readonly
modifier
- Update the symbol map so subsequent lowering uses the cache result
- Insert cache operations after loop iterator setup
---
flang/lib/Lower/Bridge.cpp | 12 ++-
flang/lib/Lower/OpenACC.cpp | 195 +++++++++++++++++++++++++++++++++++-
2 files changed, 203 insertions(+), 4 deletions(-)
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 6c3631438a596..97bbda2db97a2 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -3506,7 +3506,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
void genFIR(const Fortran::parser::OpenACCConstruct &acc) {
mlir::OpBuilder::InsertPoint insertPt = builder->saveInsertionPoint();
- localSymbols.pushScope();
+
+ // Cache constructs should not push/pop a scope because they need to update
+ // the symbol map for subsequent statements in the same loop body.
+ bool isCacheConstruct =
+ std::holds_alternative<Fortran::parser::OpenACCCacheConstruct>(acc.u);
+
+ if (!isCacheConstruct)
+ localSymbols.pushScope();
mlir::Value exitCond = genOpenACCConstruct(
*this, bridge.getSemanticsContext(), getEval(), acc, localSymbols);
@@ -3605,7 +3612,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations())
genFIR(e);
}
- localSymbols.popScope();
+ if (!isCacheConstruct)
+ localSymbols.popScope();
builder->restoreInsertionPoint(insertPt);
if (accLoop && exitCond) {
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 52fee7baf9de1..a189736c56e40 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -4837,12 +4837,203 @@ genACC(Fortran::lower::AbstractConverter &converter,
atomicConstruct.u);
}
+/// Generate acc.bounds for cache directive. Handles:
+/// - Single element: arr(i) or arr(5)
+/// - Full range: arr(lower:upper)
+/// - Missing upper: arr(lower:) - uses array's upper bound
+/// - Missing lower: arr(:upper) - uses array's lower bound
+static void
+genCacheBounds(Fortran::lower::AbstractConverter &converter,
+ Fortran::semantics::SemanticsContext &semanticsContext,
+ Fortran::lower::StatementContext &stmtCtx,
+ const Fortran::parser::AccObject &accObject,
+ std::stringstream &asFortran,
+ llvm::SmallVectorImpl<mlir::Value> &bounds) {
+ fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+ mlir::Location loc = converter.getCurrentLocation();
+ mlir::Type idxTy = builder.getIndexType();
+ mlir::Type boundTy = builder.getType<mlir::acc::DataBoundsType>();
+
+ Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext};
+ Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
+
+ std::optional<Fortran::evaluate::DataRef> dataRef;
+ Fortran::semantics::MaybeExpr designator = Fortran::common::visit(
+ [&](auto &&s) { return ea.Analyze(s); }, accObject.u);
+ if (designator)
+ dataRef = Fortran::evaluate::ExtractDataRef(*designator);
+
+ if (!dataRef)
+ return;
+
+ auto *arrayRef = std::get_if<Fortran::evaluate::ArrayRef>(&dataRef->u);
+ if (!arrayRef)
+ return;
+
+ const auto &subscripts = arrayRef->subscript();
+ int dimension = 0;
+ mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
+ fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(symbol);
+
+ for (const auto &subscript : subscripts) {
+ if (dimension != 0)
+ asFortran << ',';
+
+ mlir::Value lbound, extent;
+ mlir::Value arrayLb =
+ fir::factory::readLowerBound(builder, loc, dataExv, dimension, one);
+ mlir::Value arrayExtent =
+ fir::factory::readExtent(builder, loc, dataExv, dimension);
+
+ const auto *triplet = std::get_if<Fortran::evaluate::Triplet>(&subscript.u);
+
+ if (triplet) {
+ asFortran << ':';
+
+ // Compute lower bound (use array lb if not specified).
+ Fortran::semantics::MaybeExpr lowerSexpr =
+ Fortran::evaluate::AsGenericExpr(triplet->lower());
+ mlir::Value lb;
+ if (lowerSexpr) {
+ auto lowerConst = Fortran::evaluate::ToInt64(*lowerSexpr);
+ if (lowerConst) {
+ lb = builder.createIntegerConstant(loc, idxTy, *lowerConst);
+ } else {
+ lb = builder.createConvert(
+ loc, idxTy,
+ fir::getBase(converter.genExprValue(loc, *lowerSexpr, stmtCtx)));
+ }
+ } else {
+ lb = arrayLb;
+ }
+
+ // Compute upper bound (use array ub if not specified).
+ Fortran::semantics::MaybeExpr upperSexpr =
+ Fortran::evaluate::AsGenericExpr(triplet->upper());
+ mlir::Value ub;
+ if (upperSexpr) {
+ auto upperConst = Fortran::evaluate::ToInt64(*upperSexpr);
+ if (upperConst) {
+ ub = builder.createIntegerConstant(loc, idxTy, *upperConst);
+ } else {
+ ub = builder.createConvert(
+ loc, idxTy,
+ fir::getBase(converter.genExprValue(loc, *upperSexpr, stmtCtx)));
+ }
+ } else {
+ // arr(lower:) - upper is array's upper bound
+ ub = mlir::arith::AddIOp::create(
+ builder, loc,
+ mlir::arith::SubIOp::create(builder, loc, arrayLb, one),
+ arrayExtent);
+ }
+
+ // Normalize to zero-based and compute extent.
+ lbound = mlir::arith::SubIOp::create(builder, loc, lb, arrayLb);
+ mlir::Value ubound =
+ mlir::arith::SubIOp::create(builder, loc, ub, arrayLb);
+ extent = mlir::arith::AddIOp::create(
+ builder, loc,
+ mlir::arith::SubIOp::create(builder, loc, ubound, lbound), one);
+ } else {
+ // Single element: arr(elem)
+ using IndirectSubscriptIntegerExpr =
+ Fortran::evaluate::IndirectSubscriptIntegerExpr;
+ using SubscriptInteger = Fortran::evaluate::SubscriptInteger;
+ Fortran::evaluate::Expr<SubscriptInteger> scalarExpr =
+ std::get<IndirectSubscriptIntegerExpr>(subscript.u).value();
+ auto elemConst = Fortran::evaluate::ToInt64(scalarExpr);
+
+ mlir::Value elem;
+ if (elemConst) {
+ elem = builder.createIntegerConstant(loc, idxTy, *elemConst);
+ } else {
+ Fortran::semantics::SomeExpr sexpr =
+ Fortran::evaluate::AsGenericExpr(std::move(scalarExpr));
+ elem = builder.createConvert(
+ loc, idxTy,
+ fir::getBase(converter.genExprValue(loc, sexpr, stmtCtx)));
+ }
+
+ lbound = mlir::arith::SubIOp::create(builder, loc, elem, arrayLb);
+ extent = one;
+ }
+
+ mlir::Value bound = mlir::acc::DataBoundsOp::create(
+ builder, loc, boundTy, lbound, /*upperbound=*/mlir::Value{}, extent,
+ /*stride=*/one, /*strideInBytes=*/false, arrayLb);
+ bounds.push_back(bound);
+ ++dimension;
+ }
+}
+
static void
genACC(Fortran::lower::AbstractConverter &converter,
Fortran::semantics::SemanticsContext &semanticsContext,
const Fortran::parser::OpenACCCacheConstruct &cacheConstruct) {
- mlir::Location loc = converter.genLocation(cacheConstruct.source);
- TODO(loc, "OpenACC cache directive");
+ fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+
+ // Find enclosing acc.loop
+ auto loopOp = builder.getRegion().getParentOfType<mlir::acc::LoopOp>();
+ if (!loopOp)
+ return;
+
+ // Set insertion point before terminator (after loop variable setup)
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ mlir::Block &loopBody = loopOp.getRegion().front();
+ builder.setInsertionPoint(loopBody.getTerminator());
+
+ const auto &objectListWithModifier =
+ std::get<Fortran::parser::AccObjectListWithModifier>(cacheConstruct.t);
+ const auto &accObjectList =
+ std::get<Fortran::parser::AccObjectList>(objectListWithModifier.t);
+ const auto &modifier =
+ std::get<std::optional<Fortran::parser::AccDataModifier>>(
+ objectListWithModifier.t);
+ mlir::acc::DataClause dataClause =
+ (modifier &&
+ (*modifier).v == Fortran::parser::AccDataModifier::Modifier::ReadOnly)
+ ? mlir::acc::DataClause::acc_cache_readonly
+ : mlir::acc::DataClause::acc_cache;
+
+ Fortran::lower::StatementContext stmtCtx;
+
+ for (const auto &accObject : accObjectList.v) {
+ mlir::Location operandLocation = genOperandLocation(converter, accObject);
+ Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
+
+ std::stringstream asFortran;
+ asFortran << symbol.name().ToString();
+
+ fir::factory::AddrAndBoundsInfo info = getDataOperandBaseAddr(
+ converter, builder, symbol, operandLocation, /*unwrapFirBox=*/true);
+ mlir::Value baseAddr = info.addr;
+
+ llvm::SmallVector<mlir::Value> bounds;
+ genCacheBounds(converter, semanticsContext, stmtCtx, accObject, asFortran,
+ bounds);
+
+ mlir::acc::CacheOp cacheOp = createDataEntryOp<mlir::acc::CacheOp>(
+ builder, operandLocation, baseAddr, asFortran, bounds,
+ /*structured=*/false, /*implicit=*/false, dataClause,
+ baseAddr.getType(),
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{},
+ /*unwrapBoxAddr=*/true, /*isPresent=*/mlir::Value{});
+
+ // Update symbol map so future lowering uses the cache result
+ Fortran::lower::SymMap &symbolMap = converter.getSymbolMap();
+ if (auto hostDef = symbolMap.lookupVariableDefinition(symbol)) {
+ // Clone the host declare with cache result as input
+ // The first operand is the memref/base for both hlfir::DeclareOp and
+ // fir::DeclareOp
+ mlir::Operation *hostDefOp = (*hostDef).getOperation();
+ mlir::IRMapping mapper;
+ mapper.map(hostDefOp->getOperand(0), cacheOp.getAccVar());
+ mlir::Operation *newDef = builder.clone(*hostDefOp, mapper);
+ symbolMap.addVariableDefinition(
+ symbol, llvm::cast<fir::FortranVariableOpInterface>(newDef));
+ }
+ }
}
mlir::Value Fortran::lower::genOpenACCConstruct(
>From f6e001269fbcdb7d0d4c0b8f105edb787990e2e6 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Wed, 7 Jan 2026 16:54:34 -0800
Subject: [PATCH 02/13] Update the acc-cache test
---
flang/lib/Lower/OpenACC.cpp | 14 +--
flang/test/Lower/OpenACC/Todo/acc-cache.f90 | 15 ---
flang/test/Lower/OpenACC/acc-cache.f90 | 113 ++++++++++++++++++++
3 files changed, 116 insertions(+), 26 deletions(-)
delete mode 100644 flang/test/Lower/OpenACC/Todo/acc-cache.f90
create mode 100644 flang/test/Lower/OpenACC/acc-cache.f90
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index a189736c56e40..101e699985532 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -4899,9 +4899,7 @@ genCacheBounds(Fortran::lower::AbstractConverter &converter,
if (lowerConst) {
lb = builder.createIntegerConstant(loc, idxTy, *lowerConst);
} else {
- lb = builder.createConvert(
- loc, idxTy,
- fir::getBase(converter.genExprValue(loc, *lowerSexpr, stmtCtx)));
+ mlir::emitError(loc, "unsupported OpenACC cache subscript");
}
} else {
lb = arrayLb;
@@ -4916,9 +4914,7 @@ genCacheBounds(Fortran::lower::AbstractConverter &converter,
if (upperConst) {
ub = builder.createIntegerConstant(loc, idxTy, *upperConst);
} else {
- ub = builder.createConvert(
- loc, idxTy,
- fir::getBase(converter.genExprValue(loc, *upperSexpr, stmtCtx)));
+ mlir::emitError(loc, "unsupported OpenACC cache subscript");
}
} else {
// arr(lower:) - upper is array's upper bound
@@ -4948,11 +4944,7 @@ genCacheBounds(Fortran::lower::AbstractConverter &converter,
if (elemConst) {
elem = builder.createIntegerConstant(loc, idxTy, *elemConst);
} else {
- Fortran::semantics::SomeExpr sexpr =
- Fortran::evaluate::AsGenericExpr(std::move(scalarExpr));
- elem = builder.createConvert(
- loc, idxTy,
- fir::getBase(converter.genExprValue(loc, sexpr, stmtCtx)));
+ mlir::emitError(loc, "unsupported OpenACC cache subscript");
}
lbound = mlir::arith::SubIOp::create(builder, loc, elem, arrayLb);
diff --git a/flang/test/Lower/OpenACC/Todo/acc-cache.f90 b/flang/test/Lower/OpenACC/Todo/acc-cache.f90
deleted file mode 100644
index 8b81e876ed2c9..0000000000000
--- a/flang/test/Lower/OpenACC/Todo/acc-cache.f90
+++ /dev/null
@@ -1,15 +0,0 @@
-! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %s -o - 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: OpenACC cache directive
-
-subroutine test_cache()
- integer, parameter :: n = 10
- real, dimension(n) :: a, b
- integer :: i
-
- !$acc loop
- do i = 1, n
- !$acc cache(b)
- a(i) = b(i)
- end do
-end subroutine
diff --git a/flang/test/Lower/OpenACC/acc-cache.f90 b/flang/test/Lower/OpenACC/acc-cache.f90
new file mode 100644
index 0000000000000..ce30f52d0c687
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-cache.f90
@@ -0,0 +1,113 @@
+! This test checks lowering of OpenACC cache directive.
+
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+! CHECK-LABEL: acc.private.recipe @privatization_ref_i32 : !fir.ref<i32> init {
+
+! CHECK-LABEL: func.func @_QPtest_cache_basic()
+subroutine test_cache_basic()
+ integer, parameter :: n = 10
+ real, dimension(n) :: a, b
+ integer :: i
+
+ !$acc loop
+ do i = 1, n
+ !$acc cache(b)
+ a(i) = b(i)
+ end do
+
+! CHECK: acc.loop
+! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b"
+! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_basicEb"}
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_cache_readonly()
+subroutine test_cache_readonly()
+ integer, parameter :: n = 10
+ real, dimension(n) :: a, b
+ integer :: i
+
+ !$acc loop
+ do i = 1, n
+ !$acc cache(readonly: b)
+ a(i) = b(i)
+ end do
+
+! CHECK: acc.loop
+! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_cache_readonly>, name = "b"
+! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_readonlyEb"}
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_cache_array_section()
+! For b(2:5): lowerbound = 2-1 = 1, extent = 5-2+1 = 4
+subroutine test_cache_array_section()
+ integer, parameter :: n = 10
+ real, dimension(n) :: a, b
+ integer :: i
+
+ !$acc loop
+ do i = 1, n
+ !$acc cache(b(2:5))
+ a(i) = b(i)
+ end do
+
+! CHECK: acc.loop
+! CHECK: %[[C1:.*]] = arith.constant 1 : index
+! CHECK: %[[C2:.*]] = arith.constant 2 : index
+! CHECK: %[[C5:.*]] = arith.constant 5 : index
+! CHECK: %[[LB:.*]] = arith.subi %[[C2]], %[[C1]] : index
+! CHECK: %[[TMP1:.*]] = arith.subi %[[C5]], %[[C1]] : index
+! CHECK: %[[TMP2:.*]] = arith.subi %[[TMP1]], %[[LB]] : index
+! CHECK: %[[EXT:.*]] = arith.addi %[[TMP2]], %[[C1]] : index
+! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) extent(%[[EXT]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
+! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
+! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_array_sectionEb"}
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_cache_multiple()
+subroutine test_cache_multiple()
+ integer, parameter :: n = 10
+ real, dimension(n) :: a, b, c
+ integer :: i
+
+ !$acc loop
+ do i = 1, n
+ !$acc cache(b, c)
+ a(i) = b(i) + c(i)
+ end do
+
+! CHECK: acc.loop
+! CHECK: %[[CACHE_B:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b"
+! CHECK: hlfir.declare %[[CACHE_B]](%{{.*}}) {uniq_name = "_QFtest_cache_multipleEb"}
+! CHECK: %[[CACHE_C:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "c"
+! CHECK: hlfir.declare %[[CACHE_C]](%{{.*}}) {uniq_name = "_QFtest_cache_multipleEc"}
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_cache_2d_array()
+! For b(1:5, 1:5): each dimension has lowerbound = 0, extent = 5
+subroutine test_cache_2d_array()
+ integer, parameter :: n = 10
+ real, dimension(n, n) :: a, b
+ integer :: i, j
+
+ !$acc loop
+ do i = 1, n
+ !$acc cache(b(1:5, 1:5))
+ do j = 1, n
+ a(i,j) = b(i,j)
+ end do
+ end do
+
+! CHECK: acc.loop
+! Dimension 1: lowerbound = 1-1 = 0, extent = 5-1+1 = 5
+! CHECK: arith.constant 1 : index
+! CHECK: arith.constant 5 : index
+! CHECK: arith.subi
+! CHECK: %[[BOUND1:.*]] = acc.bounds lowerbound(%{{.*}} : index) extent(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
+! Dimension 2: lowerbound = 1-1 = 0, extent = 5-1+1 = 5
+! CHECK: arith.constant 5 : index
+! CHECK: arith.subi
+! CHECK: %[[BOUND2:.*]] = acc.bounds lowerbound(%{{.*}} : index) extent(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
+! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10x10xf32>>) bounds(%[[BOUND1]], %[[BOUND2]]) -> !fir.ref<!fir.array<10x10xf32>> {{{.*}}name = "b
+! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_2d_arrayEb"}
+end subroutine
>From 0929d7f49a90535cd3b6d2f4b003c219ddce53ce Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Wed, 7 Jan 2026 18:12:04 -0800
Subject: [PATCH 03/13] Support iterators in bounds
---
flang/lib/Lower/OpenACC.cpp | 54 ++++++++++----------------
flang/test/Lower/OpenACC/acc-cache.f90 | 24 ++++++++++++
2 files changed, 45 insertions(+), 33 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 101e699985532..a285707ea6e85 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -4870,6 +4870,16 @@ genCacheBounds(Fortran::lower::AbstractConverter &converter,
if (!arrayRef)
return;
+ // Helper to generate index value from expression.
+ // Optimize for compile-time constants to generate index type directly.
+ auto genIndex =
+ [&](const Fortran::semantics::MaybeExpr &expr) -> mlir::Value {
+ if (auto constVal = Fortran::evaluate::ToInt64(*expr))
+ return builder.createIntegerConstant(loc, idxTy, *constVal);
+ return builder.createConvert(
+ loc, idxTy, fir::getBase(converter.genExprValue(loc, *expr, stmtCtx)));
+ };
+
const auto &subscripts = arrayRef->subscript();
int dimension = 0;
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
@@ -4891,31 +4901,16 @@ genCacheBounds(Fortran::lower::AbstractConverter &converter,
asFortran << ':';
// Compute lower bound (use array lb if not specified).
- Fortran::semantics::MaybeExpr lowerSexpr =
+ Fortran::semantics::MaybeExpr lowerExpr =
Fortran::evaluate::AsGenericExpr(triplet->lower());
- mlir::Value lb;
- if (lowerSexpr) {
- auto lowerConst = Fortran::evaluate::ToInt64(*lowerSexpr);
- if (lowerConst) {
- lb = builder.createIntegerConstant(loc, idxTy, *lowerConst);
- } else {
- mlir::emitError(loc, "unsupported OpenACC cache subscript");
- }
- } else {
- lb = arrayLb;
- }
+ mlir::Value lb = lowerExpr ? genIndex(lowerExpr) : arrayLb;
// Compute upper bound (use array ub if not specified).
- Fortran::semantics::MaybeExpr upperSexpr =
+ Fortran::semantics::MaybeExpr upperExpr =
Fortran::evaluate::AsGenericExpr(triplet->upper());
mlir::Value ub;
- if (upperSexpr) {
- auto upperConst = Fortran::evaluate::ToInt64(*upperSexpr);
- if (upperConst) {
- ub = builder.createIntegerConstant(loc, idxTy, *upperConst);
- } else {
- mlir::emitError(loc, "unsupported OpenACC cache subscript");
- }
+ if (upperExpr) {
+ ub = genIndex(upperExpr);
} else {
// arr(lower:) - upper is array's upper bound
ub = mlir::arith::AddIOp::create(
@@ -4933,19 +4928,12 @@ genCacheBounds(Fortran::lower::AbstractConverter &converter,
mlir::arith::SubIOp::create(builder, loc, ubound, lbound), one);
} else {
// Single element: arr(elem)
- using IndirectSubscriptIntegerExpr =
- Fortran::evaluate::IndirectSubscriptIntegerExpr;
- using SubscriptInteger = Fortran::evaluate::SubscriptInteger;
- Fortran::evaluate::Expr<SubscriptInteger> scalarExpr =
- std::get<IndirectSubscriptIntegerExpr>(subscript.u).value();
- auto elemConst = Fortran::evaluate::ToInt64(scalarExpr);
-
- mlir::Value elem;
- if (elemConst) {
- elem = builder.createIntegerConstant(loc, idxTy, *elemConst);
- } else {
- mlir::emitError(loc, "unsupported OpenACC cache subscript");
- }
+ Fortran::evaluate::Expr<Fortran::evaluate::SubscriptInteger> scalarExpr =
+ std::get<Fortran::evaluate::IndirectSubscriptIntegerExpr>(subscript.u)
+ .value();
+ Fortran::semantics::MaybeExpr elemExpr =
+ Fortran::evaluate::AsGenericExpr(std::move(scalarExpr));
+ mlir::Value elem = genIndex(elemExpr);
lbound = mlir::arith::SubIOp::create(builder, loc, elem, arrayLb);
extent = one;
diff --git a/flang/test/Lower/OpenACC/acc-cache.f90 b/flang/test/Lower/OpenACC/acc-cache.f90
index ce30f52d0c687..cdf643c2128b6 100644
--- a/flang/test/Lower/OpenACC/acc-cache.f90
+++ b/flang/test/Lower/OpenACC/acc-cache.f90
@@ -111,3 +111,27 @@ subroutine test_cache_2d_array()
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10x10xf32>>) bounds(%[[BOUND1]], %[[BOUND2]]) -> !fir.ref<!fir.array<10x10xf32>> {{{.*}}name = "b
! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_2d_arrayEb"}
end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_cache_loop_var()
+! Test cache with loop variable dependent bounds: b(i:i+2)
+subroutine test_cache_loop_var()
+ integer, parameter :: n = 10
+ real, dimension(n) :: a, b
+ integer :: i
+
+ !$acc loop
+ do i = 1, n-2
+ !$acc cache(b(i:i+2))
+ a(i) = b(i) + b(i+1) + b(i+2)
+ end do
+
+! CHECK: acc.loop
+! CHECK: fir.load
+! CHECK: fir.convert
+! CHECK: fir.load
+! CHECK: arith.addi
+! CHECK: fir.convert
+! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%{{.*}} : index) extent(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
+! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
+! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_loop_varEb"}
+end subroutine
>From df7dfce343e82d130ba42fb2342a7e17a53f1789 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Wed, 7 Jan 2026 18:44:15 -0800
Subject: [PATCH 04/13] Fix for nested loops
---
flang/lib/Lower/OpenACC.cpp | 7 +-
flang/test/Lower/OpenACC/acc-cache.f90 | 118 ++++++++++++++++++++++---
2 files changed, 107 insertions(+), 18 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index a285707ea6e85..c8d99f1106249 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -4953,16 +4953,11 @@ genACC(Fortran::lower::AbstractConverter &converter,
const Fortran::parser::OpenACCCacheConstruct &cacheConstruct) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
- // Find enclosing acc.loop
+ // Verify we're inside an acc.loop region.
auto loopOp = builder.getRegion().getParentOfType<mlir::acc::LoopOp>();
if (!loopOp)
return;
- // Set insertion point before terminator (after loop variable setup)
- mlir::OpBuilder::InsertionGuard guard(builder);
- mlir::Block &loopBody = loopOp.getRegion().front();
- builder.setInsertionPoint(loopBody.getTerminator());
-
const auto &objectListWithModifier =
std::get<Fortran::parser::AccObjectListWithModifier>(cacheConstruct.t);
const auto &accObjectList =
diff --git a/flang/test/Lower/OpenACC/acc-cache.f90 b/flang/test/Lower/OpenACC/acc-cache.f90
index cdf643c2128b6..84f8de4ebc714 100644
--- a/flang/test/Lower/OpenACC/acc-cache.f90
+++ b/flang/test/Lower/OpenACC/acc-cache.f90
@@ -99,15 +99,20 @@ subroutine test_cache_2d_array()
end do
! CHECK: acc.loop
-! Dimension 1: lowerbound = 1-1 = 0, extent = 5-1+1 = 5
-! CHECK: arith.constant 1 : index
-! CHECK: arith.constant 5 : index
+! CHECK-DAG: arith.constant 1 : index
+! CHECK-DAG: arith.constant 5 : index
+! Dimension 1: lowerbound = 1-1 = 0, extent = 5-0+1 = 5
+! CHECK: %[[LB1:.*]] = arith.subi %{{.*}}, %{{.*}} : index
+! CHECK: arith.subi
+! CHECK: arith.subi
+! CHECK: arith.addi
+! CHECK: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB1]] : index) extent(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
+! Dimension 2: lowerbound = 1-1 = 0, extent = 5-0+1 = 5
+! CHECK: %[[LB2:.*]] = arith.subi %{{.*}}, %{{.*}} : index
! CHECK: arith.subi
-! CHECK: %[[BOUND1:.*]] = acc.bounds lowerbound(%{{.*}} : index) extent(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
-! Dimension 2: lowerbound = 1-1 = 0, extent = 5-1+1 = 5
-! CHECK: arith.constant 5 : index
! CHECK: arith.subi
-! CHECK: %[[BOUND2:.*]] = acc.bounds lowerbound(%{{.*}} : index) extent(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
+! CHECK: arith.addi
+! CHECK: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB2]] : index) extent(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10x10xf32>>) bounds(%[[BOUND1]], %[[BOUND2]]) -> !fir.ref<!fir.array<10x10xf32>> {{{.*}}name = "b
! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_2d_arrayEb"}
end subroutine
@@ -126,12 +131,101 @@ subroutine test_cache_loop_var()
end do
! CHECK: acc.loop
-! CHECK: fir.load
-! CHECK: fir.convert
-! CHECK: fir.load
+! CHECK: %[[C1:.*]] = arith.constant 1 : index
+! b(i:i+2): lowerbound = i-1, extent = (i+2)-(i)+1 = 3
+! CHECK: fir.convert %{{.*}} : (i64) -> index
! CHECK: arith.addi
-! CHECK: fir.convert
-! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%{{.*}} : index) extent(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
+! CHECK: fir.convert %{{.*}} : (i64) -> index
+! CHECK: %[[LB:.*]] = arith.subi %{{.*}}, %[[C1]] : index
+! CHECK: %[[UB:.*]] = arith.subi %{{.*}}, %[[C1]] : index
+! CHECK: %[[TMP:.*]] = arith.subi %[[UB]], %[[LB]] : index
+! CHECK: %[[EXT:.*]] = arith.addi %[[TMP]], %[[C1]] : index
+! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) extent(%[[EXT]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_loop_varEb"}
end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_cache_2d_loop_vars()
+! Test 2D cache with swapped loop variables inside nested loop: b(j:j+1, i:i+1)
+subroutine test_cache_2d_loop_vars()
+ integer, parameter :: n = 10
+ real, dimension(n, n) :: a, b
+ integer :: i, j
+
+ !$acc loop
+ do i = 1, n-1
+ do j = 1, n-1
+ !$acc cache(b(j:j+1, i:i+1))
+ a(i,j) = b(j,i) + b(j+1,i+1)
+ end do
+ end do
+
+! CHECK: acc.loop
+! The cache is generated inside fir.do_loop (the inner j loop)
+! CHECK: fir.do_loop
+! CHECK: %[[C1:.*]] = arith.constant 1 : index
+! Dimension 1: j to j+1, extent = 2
+! CHECK: %[[LB1:.*]] = arith.subi %{{.*}}, %[[C1]] : index
+! CHECK: arith.subi
+! CHECK: arith.subi
+! CHECK: arith.addi
+! CHECK: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB1]] : index) extent(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
+! Dimension 2: i to i+1, extent = 2
+! CHECK: %[[LB2:.*]] = arith.subi %{{.*}}, %[[C1]] : index
+! CHECK: arith.subi
+! CHECK: arith.subi
+! CHECK: arith.addi
+! CHECK: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB2]] : index) extent(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
+! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10x10xf32>>) bounds(%[[BOUND1]], %[[BOUND2]]) -> !fir.ref<!fir.array<10x10xf32>> {{{.*}}name = "b
+! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_2d_loop_varsEb"}
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_cache_single_element()
+! Test cache with single element access: b(i)
+subroutine test_cache_single_element()
+ integer, parameter :: n = 10
+ real, dimension(n) :: a, b
+ integer :: i
+
+ !$acc loop
+ do i = 1, n
+ !$acc cache(b(i))
+ a(i) = b(i)
+ end do
+
+! CHECK: acc.loop
+! CHECK: %[[C1:.*]] = arith.constant 1 : index
+! Single element b(i): lowerbound = i-1, extent = 1
+! CHECK: %[[I_IDX:.*]] = fir.convert %{{.*}} : (i64) -> index
+! CHECK: %[[LB:.*]] = arith.subi %[[I_IDX]], %[[C1]] : index
+! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) extent(%[[C1]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
+! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
+! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_single_elementEb"}
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_cache_mixed_bounds()
+! Test cache with mixed constant and variable bounds: b(1:i)
+subroutine test_cache_mixed_bounds()
+ integer, parameter :: n = 10
+ real, dimension(n) :: a, b
+ integer :: i
+
+ !$acc loop
+ do i = 1, n
+ !$acc cache(b(1:i))
+ a(i) = b(i)
+ end do
+
+! CHECK: acc.loop
+! CHECK: arith.constant 1 : index
+! CHECK: arith.constant 1 : index
+! b(1:i): lowerbound = 1-1 = 0, extent = (i-1) - 0 + 1 = i
+! CHECK: fir.convert %{{.*}} : (i64) -> index
+! CHECK: %[[LB:.*]] = arith.subi %{{.*}}, %{{.*}} : index
+! CHECK: %[[UB:.*]] = arith.subi %{{.*}}, %{{.*}} : index
+! CHECK: %[[TMP:.*]] = arith.subi %[[UB]], %[[LB]] : index
+! CHECK: %[[EXT:.*]] = arith.addi %[[TMP]], %{{.*}} : index
+! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) extent(%[[EXT]] : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
+! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
+! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_mixed_boundsEb"}
+end subroutine
>From d4c713f7ab2752a611a5080dc7e2656da5153f6f Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Wed, 7 Jan 2026 19:50:17 -0800
Subject: [PATCH 05/13] Improve acc-cache.f90 tests for iterator bounds
verification
---
flang/test/Lower/OpenACC/acc-cache.f90 | 133 +++++++++++++++++--------
1 file changed, 92 insertions(+), 41 deletions(-)
diff --git a/flang/test/Lower/OpenACC/acc-cache.f90 b/flang/test/Lower/OpenACC/acc-cache.f90
index 84f8de4ebc714..67af65ee10cfa 100644
--- a/flang/test/Lower/OpenACC/acc-cache.f90
+++ b/flang/test/Lower/OpenACC/acc-cache.f90
@@ -39,7 +39,7 @@ subroutine test_cache_readonly()
end subroutine
! CHECK-LABEL: func.func @_QPtest_cache_array_section()
-! For b(2:5): lowerbound = 2-1 = 1, extent = 5-2+1 = 4
+! For b(2:5) with startIdx=1: lowerbound = 2-1 = 1, upperbound = 5-1 = 4, extent = 4
subroutine test_cache_array_section()
integer, parameter :: n = 10
real, dimension(n) :: a, b
@@ -84,7 +84,7 @@ subroutine test_cache_multiple()
end subroutine
! CHECK-LABEL: func.func @_QPtest_cache_2d_array()
-! For b(1:5, 1:5): each dimension has lowerbound = 0, extent = 5
+! For b(1:5, 1:5) with startIdx=1: each dimension has lowerbound = 1-1 = 0, upperbound = 5-1 = 4, extent = 5
subroutine test_cache_2d_array()
integer, parameter :: n = 10
real, dimension(n, n) :: a, b
@@ -101,13 +101,13 @@ subroutine test_cache_2d_array()
! CHECK: acc.loop
! CHECK-DAG: arith.constant 1 : index
! CHECK-DAG: arith.constant 5 : index
-! Dimension 1: lowerbound = 1-1 = 0, extent = 5-0+1 = 5
+! Dimension 1: lowerbound = 1 - startIdx = 0, upperbound = 5 - startIdx = 4, extent = 5
! CHECK: %[[LB1:.*]] = arith.subi %{{.*}}, %{{.*}} : index
! CHECK: arith.subi
! CHECK: arith.subi
! CHECK: arith.addi
! CHECK: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB1]] : index) extent(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
-! Dimension 2: lowerbound = 1-1 = 0, extent = 5-0+1 = 5
+! Dimension 2: lowerbound = 1 - startIdx = 0, upperbound = 5 - startIdx = 4, extent = 5
! CHECK: %[[LB2:.*]] = arith.subi %{{.*}}, %{{.*}} : index
! CHECK: arith.subi
! CHECK: arith.subi
@@ -130,16 +130,29 @@ subroutine test_cache_loop_var()
a(i) = b(i) + b(i+1) + b(i+2)
end do
-! CHECK: acc.loop
+! CHECK: acc.loop private({{.*}}) control(%[[IV:.*]] : i32) = ({{.*}}) to ({{.*}})
+! The privatized iterator is declared and initialized from the loop control variable
+! CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_cache_loop_varEi"}
+! CHECK: fir.store %[[IV]] to %[[I_DECL]]#0 : !fir.ref<i32>
+! Bounds generation loads the iterator and converts it to index
! CHECK: %[[C1:.*]] = arith.constant 1 : index
-! b(i:i+2): lowerbound = i-1, extent = (i+2)-(i)+1 = 3
-! CHECK: fir.convert %{{.*}} : (i64) -> index
-! CHECK: arith.addi
-! CHECK: fir.convert %{{.*}} : (i64) -> index
-! CHECK: %[[LB:.*]] = arith.subi %{{.*}}, %[[C1]] : index
-! CHECK: %[[UB:.*]] = arith.subi %{{.*}}, %[[C1]] : index
-! CHECK: %[[TMP:.*]] = arith.subi %[[UB]], %[[LB]] : index
-! CHECK: %[[EXT:.*]] = arith.addi %[[TMP]], %[[C1]] : index
+! Load i for lower bound (i)
+! CHECK: %[[I_LOAD1:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
+! CHECK: %[[I_I64_1:.*]] = fir.convert %[[I_LOAD1]] : (i32) -> i64
+! CHECK: %[[I_IDX_1:.*]] = fir.convert %[[I_I64_1]] : (i64) -> index
+! Load i for upper bound (i+2)
+! CHECK: %[[I_LOAD2:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
+! CHECK: %[[C2_I32:.*]] = arith.constant 2 : i32
+! CHECK: %[[I_PLUS_2:.*]] = arith.addi %[[I_LOAD2]], %[[C2_I32]] : i32
+! CHECK: %[[UB_I64:.*]] = fir.convert %[[I_PLUS_2]] : (i32) -> i64
+! CHECK: %[[UB_IDX:.*]] = fir.convert %[[UB_I64]] : (i64) -> index
+! Compute lowerbound = i - startIdx (offset from startIdx)
+! CHECK: %[[LB:.*]] = arith.subi %[[I_IDX_1]], %[[C1]] : index
+! Compute upperbound = (i+2) - startIdx (offset from startIdx)
+! CHECK: %[[UB:.*]] = arith.subi %[[UB_IDX]], %[[C1]] : index
+! Compute extent = ub - lb + 1
+! CHECK: %[[DIFF:.*]] = arith.subi %[[UB]], %[[LB]] : index
+! CHECK: %[[EXT:.*]] = arith.addi %[[DIFF]], %[[C1]] : index
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) extent(%[[EXT]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_loop_varEb"}
@@ -160,22 +173,45 @@ subroutine test_cache_2d_loop_vars()
end do
end do
-! CHECK: acc.loop
-! The cache is generated inside fir.do_loop (the inner j loop)
-! CHECK: fir.do_loop
+! CHECK: acc.loop private({{.*}}) control(%[[I_IV:.*]] : i32) = ({{.*}}) to ({{.*}})
+! Outer loop iterator i is stored to privatized variable
+! CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_cache_2d_loop_varsEi"}
+! CHECK: fir.store %[[I_IV]] to %[[I_DECL]]#0 : !fir.ref<i32>
+! Inner loop j (non-acc loop, fir.do_loop)
+! CHECK: fir.do_loop %[[J_IV:.*]] = {{.*}} iter_args(%[[J_ITER:.*]] = {{.*}})
+! Inner loop iterator j is stored to j variable
+! CHECK: fir.store %[[J_ITER]] to %[[J_REF:.*]] : !fir.ref<i32>
! CHECK: %[[C1:.*]] = arith.constant 1 : index
-! Dimension 1: j to j+1, extent = 2
-! CHECK: %[[LB1:.*]] = arith.subi %{{.*}}, %[[C1]] : index
-! CHECK: arith.subi
-! CHECK: arith.subi
-! CHECK: arith.addi
-! CHECK: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB1]] : index) extent(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
-! Dimension 2: i to i+1, extent = 2
-! CHECK: %[[LB2:.*]] = arith.subi %{{.*}}, %[[C1]] : index
-! CHECK: arith.subi
-! CHECK: arith.subi
-! CHECK: arith.addi
-! CHECK: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB2]] : index) extent(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
+! Dimension 1 bounds from j: load j, convert, compute j+1
+! CHECK: %[[J_LOAD1:.*]] = fir.load %[[J_REF]] : !fir.ref<i32>
+! CHECK: %[[J_I64_1:.*]] = fir.convert %[[J_LOAD1]] : (i32) -> i64
+! CHECK: %[[J_IDX_1:.*]] = fir.convert %[[J_I64_1]] : (i64) -> index
+! CHECK: %[[J_LOAD2:.*]] = fir.load %[[J_REF]] : !fir.ref<i32>
+! CHECK: %[[C1_I32_J:.*]] = arith.constant 1 : i32
+! CHECK: %[[J_PLUS_1:.*]] = arith.addi %[[J_LOAD2]], %[[C1_I32_J]] : i32
+! CHECK: %[[J_PLUS_1_I64:.*]] = fir.convert %[[J_PLUS_1]] : (i32) -> i64
+! CHECK: %[[J_PLUS_1_IDX:.*]] = fir.convert %[[J_PLUS_1_I64]] : (i64) -> index
+! Compute lowerbound = j - 1, upperbound = (j+1) - 1, extent = 2
+! CHECK: %[[LB1:.*]] = arith.subi %[[J_IDX_1]], %[[C1]] : index
+! CHECK: %[[UB1:.*]] = arith.subi %[[J_PLUS_1_IDX]], %[[C1]] : index
+! CHECK: %[[DIFF1:.*]] = arith.subi %[[UB1]], %[[LB1]] : index
+! CHECK: %[[EXT1:.*]] = arith.addi %[[DIFF1]], %[[C1]] : index
+! CHECK: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB1]] : index) extent(%[[EXT1]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
+! Dimension 2 bounds from i (outer loop): load i, convert, compute i+1
+! CHECK: %[[I_LOAD1:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
+! CHECK: %[[I_I64_1:.*]] = fir.convert %[[I_LOAD1]] : (i32) -> i64
+! CHECK: %[[I_IDX_1:.*]] = fir.convert %[[I_I64_1]] : (i64) -> index
+! CHECK: %[[I_LOAD2:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
+! CHECK: %[[C1_I32_I:.*]] = arith.constant 1 : i32
+! CHECK: %[[I_PLUS_1:.*]] = arith.addi %[[I_LOAD2]], %[[C1_I32_I]] : i32
+! CHECK: %[[I_PLUS_1_I64:.*]] = fir.convert %[[I_PLUS_1]] : (i32) -> i64
+! CHECK: %[[I_PLUS_1_IDX:.*]] = fir.convert %[[I_PLUS_1_I64]] : (i64) -> index
+! Compute lowerbound = i - 1, upperbound = (i+1) - 1, extent = 2
+! CHECK: %[[LB2:.*]] = arith.subi %[[I_IDX_1]], %[[C1]] : index
+! CHECK: %[[UB2:.*]] = arith.subi %[[I_PLUS_1_IDX]], %[[C1]] : index
+! CHECK: %[[DIFF2:.*]] = arith.subi %[[UB2]], %[[LB2]] : index
+! CHECK: %[[EXT2:.*]] = arith.addi %[[DIFF2]], %[[C1]] : index
+! CHECK: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB2]] : index) extent(%[[EXT2]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10x10xf32>>) bounds(%[[BOUND1]], %[[BOUND2]]) -> !fir.ref<!fir.array<10x10xf32>> {{{.*}}name = "b
! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_2d_loop_varsEb"}
end subroutine
@@ -193,10 +229,16 @@ subroutine test_cache_single_element()
a(i) = b(i)
end do
-! CHECK: acc.loop
+! CHECK: acc.loop private({{.*}}) control(%[[IV:.*]] : i32) = ({{.*}}) to ({{.*}})
+! The privatized iterator is declared and initialized from the loop control variable
+! CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_cache_single_elementEi"}
+! CHECK: fir.store %[[IV]] to %[[I_DECL]]#0 : !fir.ref<i32>
! CHECK: %[[C1:.*]] = arith.constant 1 : index
-! Single element b(i): lowerbound = i-1, extent = 1
-! CHECK: %[[I_IDX:.*]] = fir.convert %{{.*}} : (i64) -> index
+! Load i from the iterator variable and convert to index
+! CHECK: %[[I_LOAD:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
+! CHECK: %[[I_I64:.*]] = fir.convert %[[I_LOAD]] : (i32) -> i64
+! CHECK: %[[I_IDX:.*]] = fir.convert %[[I_I64]] : (i64) -> index
+! Compute lowerbound = i - startIdx (offset from startIdx), extent = 1 for single element
! CHECK: %[[LB:.*]] = arith.subi %[[I_IDX]], %[[C1]] : index
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) extent(%[[C1]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
@@ -216,16 +258,25 @@ subroutine test_cache_mixed_bounds()
a(i) = b(i)
end do
-! CHECK: acc.loop
-! CHECK: arith.constant 1 : index
-! CHECK: arith.constant 1 : index
-! b(1:i): lowerbound = 1-1 = 0, extent = (i-1) - 0 + 1 = i
-! CHECK: fir.convert %{{.*}} : (i64) -> index
-! CHECK: %[[LB:.*]] = arith.subi %{{.*}}, %{{.*}} : index
-! CHECK: %[[UB:.*]] = arith.subi %{{.*}}, %{{.*}} : index
-! CHECK: %[[TMP:.*]] = arith.subi %[[UB]], %[[LB]] : index
-! CHECK: %[[EXT:.*]] = arith.addi %[[TMP]], %{{.*}} : index
-! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) extent(%[[EXT]] : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
+! CHECK: acc.loop private({{.*}}) control(%[[IV:.*]] : i32) = ({{.*}}) to ({{.*}})
+! The privatized iterator is declared and initialized
+! CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_cache_mixed_boundsEi"}
+! CHECK: fir.store %[[IV]] to %[[I_DECL]]#0 : !fir.ref<i32>
+! CHECK: %[[C1:.*]] = arith.constant 1 : index
+! b(1:i): lower bound is constant 1
+! CHECK: %[[C1_LB:.*]] = arith.constant 1 : index
+! Upper bound i is loaded from iterator variable
+! CHECK: %[[I_LOAD:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
+! CHECK: %[[I_I64:.*]] = fir.convert %[[I_LOAD]] : (i32) -> i64
+! CHECK: %[[I_IDX:.*]] = fir.convert %[[I_I64]] : (i64) -> index
+! Compute lowerbound = 1 - startIdx = 0 (constant offset)
+! CHECK: %[[LB:.*]] = arith.subi %[[C1_LB]], %[[C1]] : index
+! Compute upperbound = i - startIdx (offset from startIdx, uses iterator)
+! CHECK: %[[UB:.*]] = arith.subi %[[I_IDX]], %[[C1]] : index
+! Compute extent = ub - lb + 1 = i (dynamic based on iterator)
+! CHECK: %[[DIFF:.*]] = arith.subi %[[UB]], %[[LB]] : index
+! CHECK: %[[EXT:.*]] = arith.addi %[[DIFF]], %[[C1]] : index
+! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) extent(%[[EXT]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_mixed_boundsEb"}
end subroutine
>From a2f99c282e2930df05f9f3accafb82fc092e9b0d Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Wed, 7 Jan 2026 20:19:25 -0800
Subject: [PATCH 06/13] Detect invalid bounds
---
flang/lib/Lower/OpenACC.cpp | 22 +++++++++++++++++++---
1 file changed, 19 insertions(+), 3 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index c8d99f1106249..f88b42b530ffd 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -4900,14 +4900,30 @@ genCacheBounds(Fortran::lower::AbstractConverter &converter,
if (triplet) {
asFortran << ':';
- // Compute lower bound (use array lb if not specified).
+ // OpenACC spec requires at least one of lower or upper bound to be
+ // specified: arr(lower:upper), arr(lower:), or arr(:upper).
+ // arr(:) with both bounds missing is not allowed.
Fortran::semantics::MaybeExpr lowerExpr =
Fortran::evaluate::AsGenericExpr(triplet->lower());
+ Fortran::semantics::MaybeExpr upperExpr =
+ Fortran::evaluate::AsGenericExpr(triplet->upper());
+
+ if (!lowerExpr && !upperExpr) {
+ mlir::emitError(loc, "OpenACC cache directive requires at least one "
+ "bound to be specified for array section");
+ }
+
+ // OpenACC cache does not support strided array sections.
+ if (auto strideVal = Fortran::evaluate::ToInt64(triplet->stride())) {
+ if (*strideVal != 1)
+ mlir::emitError(loc, "OpenACC cache directive does not support "
+ "strided array sections");
+ }
+
+ // Compute lower bound (use array lb if not specified).
mlir::Value lb = lowerExpr ? genIndex(lowerExpr) : arrayLb;
// Compute upper bound (use array ub if not specified).
- Fortran::semantics::MaybeExpr upperExpr =
- Fortran::evaluate::AsGenericExpr(triplet->upper());
mlir::Value ub;
if (upperExpr) {
ub = genIndex(upperExpr);
>From db32034461360fdd29edad98ec9aed4809a14a74 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Wed, 7 Jan 2026 20:37:45 -0800
Subject: [PATCH 07/13] Use report_fatal_error
---
flang/lib/Lower/OpenACC.cpp | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index f88b42b530ffd..472c844130d30 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -4909,15 +4909,15 @@ genCacheBounds(Fortran::lower::AbstractConverter &converter,
Fortran::evaluate::AsGenericExpr(triplet->upper());
if (!lowerExpr && !upperExpr) {
- mlir::emitError(loc, "OpenACC cache directive requires at least one "
- "bound to be specified for array section");
+ llvm::report_fatal_error("OpenACC cache directive requires at least "
+ "one bound to be specified for array section");
}
- // OpenACC cache does not support strided array sections.
- if (auto strideVal = Fortran::evaluate::ToInt64(triplet->stride())) {
- if (*strideVal != 1)
- mlir::emitError(loc, "OpenACC cache directive does not support "
- "strided array sections");
+ // OpenACC cache only supports unit stride (default or explicit 1).
+ auto strideVal = Fortran::evaluate::ToInt64(triplet->stride());
+ if (!strideVal || *strideVal != 1) {
+ llvm::report_fatal_error("OpenACC cache directive does not support "
+ "strided array sections");
}
// Compute lower bound (use array lb if not specified).
>From a05af8b1ad951405b6623f8c1517d50f245df6be Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Thu, 8 Jan 2026 00:40:02 -0800
Subject: [PATCH 08/13] Support non-1 lower bounds; disregard fir::DeclareOp
---
flang/lib/Lower/OpenACC.cpp | 34 ++++++++++++++++----------
flang/test/Lower/OpenACC/acc-cache.f90 | 27 ++++++++++++++++++++
2 files changed, 48 insertions(+), 13 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 472c844130d30..b307a0f2ce932 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -4998,7 +4998,14 @@ genACC(Fortran::lower::AbstractConverter &converter,
fir::factory::AddrAndBoundsInfo info = getDataOperandBaseAddr(
converter, builder, symbol, operandLocation, /*unwrapFirBox=*/true);
- mlir::Value baseAddr = info.addr;
+
+ // For arrays with non-1 lower bounds, info.addr is a box type.
+ // Use rawInput (the underlying ref) when addr and rawInput have different
+ // element types, similar to how other data clauses handle this case.
+ bool useRawInput =
+ info.rawInput && fir::unwrapRefType(info.addr.getType()) !=
+ fir::unwrapRefType(info.rawInput.getType());
+ mlir::Value baseAddr = useRawInput ? info.rawInput : info.addr;
llvm::SmallVector<mlir::Value> bounds;
genCacheBounds(converter, semanticsContext, stmtCtx, accObject, asFortran,
@@ -5011,19 +5018,20 @@ genACC(Fortran::lower::AbstractConverter &converter,
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{},
/*unwrapBoxAddr=*/true, /*isPresent=*/mlir::Value{});
- // Update symbol map so future lowering uses the cache result
+ // Update symbol map so future lowering uses the cache result.
Fortran::lower::SymMap &symbolMap = converter.getSymbolMap();
- if (auto hostDef = symbolMap.lookupVariableDefinition(symbol)) {
- // Clone the host declare with cache result as input
- // The first operand is the memref/base for both hlfir::DeclareOp and
- // fir::DeclareOp
- mlir::Operation *hostDefOp = (*hostDef).getOperation();
- mlir::IRMapping mapper;
- mapper.map(hostDefOp->getOperand(0), cacheOp.getAccVar());
- mlir::Operation *newDef = builder.clone(*hostDefOp, mapper);
- symbolMap.addVariableDefinition(
- symbol, llvm::cast<fir::FortranVariableOpInterface>(newDef));
- }
+ std::optional<fir::FortranVariableOpInterface> hostDef =
+ symbolMap.lookupVariableDefinition(symbol);
+ assert(hostDef.has_value() && llvm::isa<hlfir::DeclareOp>(*hostDef) &&
+ "expected symbol to be mapped to hlfir.declare");
+ auto hostDeclare = llvm::cast<hlfir::DeclareOp>(*hostDef);
+ // Clone the host declare with cache result as input.
+ mlir::IRMapping mapper;
+ mapper.map(hostDeclare.getMemref(), cacheOp.getAccVar());
+ mlir::Operation *newDef =
+ builder.clone(*hostDeclare.getOperation(), mapper);
+ symbolMap.addVariableDefinition(
+ symbol, llvm::cast<fir::FortranVariableOpInterface>(newDef));
}
}
diff --git a/flang/test/Lower/OpenACC/acc-cache.f90 b/flang/test/Lower/OpenACC/acc-cache.f90
index 67af65ee10cfa..10a36f22d1b14 100644
--- a/flang/test/Lower/OpenACC/acc-cache.f90
+++ b/flang/test/Lower/OpenACC/acc-cache.f90
@@ -280,3 +280,30 @@ subroutine test_cache_mixed_bounds()
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_mixed_boundsEb"}
end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_cache_nonunit_lb()
+! Test cache with array that has non-1 lower bound: arr(10:20), cache(arr(15))
+subroutine test_cache_nonunit_lb()
+ integer :: arr(10:20)
+ integer :: i
+
+ !$acc loop
+ do i = 10, 20
+ !$acc cache(arr(15))
+ arr(i) = i
+ end do
+
+! For arr(10:20), startIdx = 10, element 15 has lowerbound = 15 - 10 = 5
+! CHECK: %[[C10:.*]] = arith.constant 10 : index
+! CHECK: acc.loop
+! CHECK: %[[C1:.*]] = arith.constant 1 : index
+! CHECK: %[[C15:.*]] = arith.constant 15 : index
+! Compute lowerbound = 15 - startIdx = 15 - 10 = 5
+! CHECK: %[[LB:.*]] = arith.subi %[[C15]], %[[C10]] : index
+! Single element has extent = 1
+! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) extent(%[[C1]] : index) stride(%[[C1]] : index) startIdx(%[[C10]] : index)
+! The varPtr uses the ref type (second result of hlfir.declare with shapeshift)
+! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<11xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<11xi32>> {{{.*}}name = "arr
+! The cloned declare produces a box and ref pair
+! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_nonunit_lbEarr"} : (!fir.ref<!fir.array<11xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<11xi32>>, !fir.ref<!fir.array<11xi32>>)
+end subroutine
>From ba3acb857504fc6fd58ea2c890306111f157ef63 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Thu, 8 Jan 2026 09:21:28 -0800
Subject: [PATCH 09/13] Bypass the hlfir.declare generation after acc.cache
---
.../OpenACC/Support/FIROpenACCOpsInterfaces.h | 20 ++++++++
flang/lib/Lower/OpenACC.cpp | 50 ++++++-------------
.../Support/RegisterOpenACCExtensions.cpp | 7 +++
flang/test/Lower/OpenACC/acc-cache.f90 | 18 ++-----
4 files changed, 45 insertions(+), 50 deletions(-)
diff --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
index b017cb4733b6c..94eeb9d06d334 100644
--- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
+++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
@@ -13,6 +13,7 @@
#ifndef FLANG_OPTIMIZER_OPENACC_FIROPENACC_OPS_INTERFACES_H_
#define FLANG_OPTIMIZER_OPENACC_FIROPENACC_OPS_INTERFACES_H_
+#include "flang/Optimizer/Dialect/FortranVariableInterface.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
namespace fir {
@@ -86,6 +87,25 @@ struct OutlineRematerializationModel
: public mlir::acc::OutlineRematerializationOpInterface::ExternalModel<
OutlineRematerializationModel<Op>, Op> {};
+/// External model to implement FortranVariableOpInterface for acc::CacheOp.
+struct CacheFortranVariableModel
+ : public fir::FortranVariableOpInterface::ExternalModel<
+ CacheFortranVariableModel, mlir::acc::CacheOp> {
+ mlir::Value getBase(mlir::Operation *op) const {
+ return mlir::cast<mlir::acc::CacheOp>(op).getAccVar();
+ }
+ std::optional<fir::FortranVariableFlagsEnum>
+ getFortranAttrs(mlir::Operation *) const {
+ return std::nullopt;
+ }
+ mlir::Value getShape(mlir::Operation *) const { return nullptr; }
+ mlir::OperandRange getExplicitTypeParams(mlir::Operation *op) const {
+ return mlir::cast<mlir::acc::CacheOp>(op).getBounds().take_front(0);
+ }
+ void setFortranAttrs(mlir::Operation *, fir::FortranVariableFlagsEnum) const {
+ }
+};
+
} // namespace fir::acc
#endif // FLANG_OPTIMIZER_OPENACC_FIROPENACC_OPS_INTERFACES_H_
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index b307a0f2ce932..52cd1c34fce6c 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -4967,13 +4967,6 @@ static void
genACC(Fortran::lower::AbstractConverter &converter,
Fortran::semantics::SemanticsContext &semanticsContext,
const Fortran::parser::OpenACCCacheConstruct &cacheConstruct) {
- fir::FirOpBuilder &builder = converter.getFirOpBuilder();
-
- // Verify we're inside an acc.loop region.
- auto loopOp = builder.getRegion().getParentOfType<mlir::acc::LoopOp>();
- if (!loopOp)
- return;
-
const auto &objectListWithModifier =
std::get<Fortran::parser::AccObjectListWithModifier>(cacheConstruct.t);
const auto &accObjectList =
@@ -4996,42 +4989,27 @@ genACC(Fortran::lower::AbstractConverter &converter,
std::stringstream asFortran;
asFortran << symbol.name().ToString();
- fir::factory::AddrAndBoundsInfo info = getDataOperandBaseAddr(
- converter, builder, symbol, operandLocation, /*unwrapFirBox=*/true);
-
- // For arrays with non-1 lower bounds, info.addr is a box type.
- // Use rawInput (the underlying ref) when addr and rawInput have different
- // element types, similar to how other data clauses handle this case.
- bool useRawInput =
- info.rawInput && fir::unwrapRefType(info.addr.getType()) !=
- fir::unwrapRefType(info.rawInput.getType());
- mlir::Value baseAddr = useRawInput ? info.rawInput : info.addr;
-
llvm::SmallVector<mlir::Value> bounds;
genCacheBounds(converter, semanticsContext, stmtCtx, accObject, asFortran,
bounds);
+ std::optional<fir::FortranVariableOpInterface> varDef =
+ converter.getSymbolMap().lookupVariableDefinition(symbol);
+ assert(varDef.has_value() && llvm::isa<hlfir::DeclareOp>(*varDef) &&
+ "expected symbol to be mapped to hlfir.declare");
+ mlir::Value base = varDef->getBase();
+
+ fir::FirOpBuilder &builder = converter.getFirOpBuilder();
mlir::acc::CacheOp cacheOp = createDataEntryOp<mlir::acc::CacheOp>(
- builder, operandLocation, baseAddr, asFortran, bounds,
- /*structured=*/false, /*implicit=*/false, dataClause,
- baseAddr.getType(),
+ builder, operandLocation, base, asFortran, bounds,
+ /*structured=*/false, /*implicit=*/false, dataClause, base.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{},
- /*unwrapBoxAddr=*/true, /*isPresent=*/mlir::Value{});
+ /*unwrapBoxAddr=*/false, /*isPresent=*/mlir::Value{});
- // Update symbol map so future lowering uses the cache result.
- Fortran::lower::SymMap &symbolMap = converter.getSymbolMap();
- std::optional<fir::FortranVariableOpInterface> hostDef =
- symbolMap.lookupVariableDefinition(symbol);
- assert(hostDef.has_value() && llvm::isa<hlfir::DeclareOp>(*hostDef) &&
- "expected symbol to be mapped to hlfir.declare");
- auto hostDeclare = llvm::cast<hlfir::DeclareOp>(*hostDef);
- // Clone the host declare with cache result as input.
- mlir::IRMapping mapper;
- mapper.map(hostDeclare.getMemref(), cacheOp.getAccVar());
- mlir::Operation *newDef =
- builder.clone(*hostDeclare.getOperation(), mapper);
- symbolMap.addVariableDefinition(
- symbol, llvm::cast<fir::FortranVariableOpInterface>(newDef));
+ // Use acc.cache directly as the variable definition.
+ converter.getSymbolMap().addVariableDefinition(
+ symbol, mlir::cast<fir::FortranVariableOpInterface>(
+ cacheOp.getOperation()));
}
}
diff --git a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
index d7e9ae4ec85b9..30aa64f0b0563 100644
--- a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
@@ -84,6 +84,13 @@ void registerOpenACCExtensions(mlir::DialectRegistry ®istry) {
PartialEntityAccessModel<hlfir::DeclareOp>>(*ctx);
});
+
+ // Register FortranVariableOpInterface for OpenACC cache operation
+ registry.addExtension(
+ +[](mlir::MLIRContext *ctx, mlir::acc::OpenACCDialect *dialect) {
+ mlir::acc::CacheOp::attachInterface<CacheFortranVariableModel>(*ctx);
+ });
+
registerAttrsExtensions(registry);
}
diff --git a/flang/test/Lower/OpenACC/acc-cache.f90 b/flang/test/Lower/OpenACC/acc-cache.f90
index 10a36f22d1b14..eecc990ae150a 100644
--- a/flang/test/Lower/OpenACC/acc-cache.f90
+++ b/flang/test/Lower/OpenACC/acc-cache.f90
@@ -18,7 +18,7 @@ subroutine test_cache_basic()
! CHECK: acc.loop
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b"
-! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_basicEb"}
+! CHECK: hlfir.designate %[[CACHE]]
end subroutine
! CHECK-LABEL: func.func @_QPtest_cache_readonly()
@@ -35,7 +35,7 @@ subroutine test_cache_readonly()
! CHECK: acc.loop
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_cache_readonly>, name = "b"
-! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_readonlyEb"}
+! CHECK: hlfir.designate %[[CACHE]]
end subroutine
! CHECK-LABEL: func.func @_QPtest_cache_array_section()
@@ -61,7 +61,6 @@ subroutine test_cache_array_section()
! CHECK: %[[EXT:.*]] = arith.addi %[[TMP2]], %[[C1]] : index
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) extent(%[[EXT]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
-! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_array_sectionEb"}
end subroutine
! CHECK-LABEL: func.func @_QPtest_cache_multiple()
@@ -78,9 +77,7 @@ subroutine test_cache_multiple()
! CHECK: acc.loop
! CHECK: %[[CACHE_B:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b"
-! CHECK: hlfir.declare %[[CACHE_B]](%{{.*}}) {uniq_name = "_QFtest_cache_multipleEb"}
! CHECK: %[[CACHE_C:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "c"
-! CHECK: hlfir.declare %[[CACHE_C]](%{{.*}}) {uniq_name = "_QFtest_cache_multipleEc"}
end subroutine
! CHECK-LABEL: func.func @_QPtest_cache_2d_array()
@@ -114,7 +111,6 @@ subroutine test_cache_2d_array()
! CHECK: arith.addi
! CHECK: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB2]] : index) extent(%{{.*}} : index) stride(%{{.*}} : index) startIdx(%{{.*}} : index)
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10x10xf32>>) bounds(%[[BOUND1]], %[[BOUND2]]) -> !fir.ref<!fir.array<10x10xf32>> {{{.*}}name = "b
-! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_2d_arrayEb"}
end subroutine
! CHECK-LABEL: func.func @_QPtest_cache_loop_var()
@@ -155,7 +151,6 @@ subroutine test_cache_loop_var()
! CHECK: %[[EXT:.*]] = arith.addi %[[DIFF]], %[[C1]] : index
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) extent(%[[EXT]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
-! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_loop_varEb"}
end subroutine
! CHECK-LABEL: func.func @_QPtest_cache_2d_loop_vars()
@@ -213,7 +208,6 @@ subroutine test_cache_2d_loop_vars()
! CHECK: %[[EXT2:.*]] = arith.addi %[[DIFF2]], %[[C1]] : index
! CHECK: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB2]] : index) extent(%[[EXT2]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10x10xf32>>) bounds(%[[BOUND1]], %[[BOUND2]]) -> !fir.ref<!fir.array<10x10xf32>> {{{.*}}name = "b
-! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_2d_loop_varsEb"}
end subroutine
! CHECK-LABEL: func.func @_QPtest_cache_single_element()
@@ -242,7 +236,6 @@ subroutine test_cache_single_element()
! CHECK: %[[LB:.*]] = arith.subi %[[I_IDX]], %[[C1]] : index
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) extent(%[[C1]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
-! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_single_elementEb"}
end subroutine
! CHECK-LABEL: func.func @_QPtest_cache_mixed_bounds()
@@ -278,7 +271,6 @@ subroutine test_cache_mixed_bounds()
! CHECK: %[[EXT:.*]] = arith.addi %[[DIFF]], %[[C1]] : index
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) extent(%[[EXT]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b
-! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_mixed_boundsEb"}
end subroutine
! CHECK-LABEL: func.func @_QPtest_cache_nonunit_lb()
@@ -302,8 +294,6 @@ subroutine test_cache_nonunit_lb()
! CHECK: %[[LB:.*]] = arith.subi %[[C15]], %[[C10]] : index
! Single element has extent = 1
! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) extent(%[[C1]] : index) stride(%[[C1]] : index) startIdx(%[[C10]] : index)
-! The varPtr uses the ref type (second result of hlfir.declare with shapeshift)
-! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<11xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<11xi32>> {{{.*}}name = "arr
-! The cloned declare produces a box and ref pair
-! CHECK: hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_nonunit_lbEarr"} : (!fir.ref<!fir.array<11xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<11xi32>>, !fir.ref<!fir.array<11xi32>>)
+! For non-unit lower bound arrays, acc.cache uses the box type from hlfir.declare
+! CHECK: %[[CACHE:.*]] = acc.cache var(%{{.*}} : !fir.box<!fir.array<11xi32>>) bounds(%[[BOUND]]) -> !fir.box<!fir.array<11xi32>> {{{.*}}name = "arr
end subroutine
>From 1ff41571f7ac4b7e83437552342448a1e32a3270 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Thu, 8 Jan 2026 09:25:25 -0800
Subject: [PATCH 10/13] format
---
flang/lib/Lower/OpenACC.cpp | 4 ++--
.../Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp | 1 -
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 52cd1c34fce6c..09bea1f353a27 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -5008,8 +5008,8 @@ genACC(Fortran::lower::AbstractConverter &converter,
// Use acc.cache directly as the variable definition.
converter.getSymbolMap().addVariableDefinition(
- symbol, mlir::cast<fir::FortranVariableOpInterface>(
- cacheOp.getOperation()));
+ symbol,
+ mlir::cast<fir::FortranVariableOpInterface>(cacheOp.getOperation()));
}
}
diff --git a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
index 30aa64f0b0563..6489ef302dd76 100644
--- a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
@@ -84,7 +84,6 @@ void registerOpenACCExtensions(mlir::DialectRegistry ®istry) {
PartialEntityAccessModel<hlfir::DeclareOp>>(*ctx);
});
-
// Register FortranVariableOpInterface for OpenACC cache operation
registry.addExtension(
+[](mlir::MLIRContext *ctx, mlir::acc::OpenACCDialect *dialect) {
>From 64317011a624115aa7fe4eb31a2b7073e686aa84 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Thu, 8 Jan 2026 10:52:52 -0800
Subject: [PATCH 11/13] Use DataClauseModifier instead of
DataClause::acc_cache_readonly
---
flang/lib/Lower/OpenACC.cpp | 20 ++++++++++++--------
flang/test/Lower/OpenACC/acc-cache.f90 | 2 +-
2 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 09bea1f353a27..a48537b644364 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -126,7 +126,9 @@ createDataEntryOp(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Type retTy, llvm::ArrayRef<mlir::Value> async,
llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes,
- bool unwrapBoxAddr = false, mlir::Value isPresent = {}) {
+ bool unwrapBoxAddr = false, mlir::Value isPresent = {},
+ mlir::acc::DataClauseModifier modifiers =
+ mlir::acc::DataClauseModifier::none) {
mlir::Value varPtrPtr;
llvm::SmallVector<mlir::Value, 8> operands;
llvm::SmallVector<int32_t, 8> operandSegments;
@@ -156,6 +158,7 @@ createDataEntryOp(fir::FirOpBuilder &builder, mlir::Location loc,
op.setAsyncOperandsDeviceTypeAttr(builder.getArrayAttr(asyncDeviceTypes));
if (!asyncOnlyDeviceTypes.empty())
op.setAsyncOnlyAttr(builder.getArrayAttr(asyncOnlyDeviceTypes));
+ op.setModifiers(modifiers);
return op;
}
@@ -4974,11 +4977,9 @@ genACC(Fortran::lower::AbstractConverter &converter,
const auto &modifier =
std::get<std::optional<Fortran::parser::AccDataModifier>>(
objectListWithModifier.t);
- mlir::acc::DataClause dataClause =
- (modifier &&
- (*modifier).v == Fortran::parser::AccDataModifier::Modifier::ReadOnly)
- ? mlir::acc::DataClause::acc_cache_readonly
- : mlir::acc::DataClause::acc_cache;
+ bool isReadonly =
+ modifier &&
+ (*modifier).v == Fortran::parser::AccDataModifier::Modifier::ReadOnly;
Fortran::lower::StatementContext stmtCtx;
@@ -5002,9 +5003,12 @@ genACC(Fortran::lower::AbstractConverter &converter,
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
mlir::acc::CacheOp cacheOp = createDataEntryOp<mlir::acc::CacheOp>(
builder, operandLocation, base, asFortran, bounds,
- /*structured=*/false, /*implicit=*/false, dataClause, base.getType(),
+ /*structured=*/false, /*implicit=*/false,
+ mlir::acc::DataClause::acc_cache, base.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{},
- /*unwrapBoxAddr=*/false, /*isPresent=*/mlir::Value{});
+ /*unwrapBoxAddr=*/false, /*isPresent=*/mlir::Value{},
+ isReadonly ? mlir::acc::DataClauseModifier::readonly
+ : mlir::acc::DataClauseModifier::none);
// Use acc.cache directly as the variable definition.
converter.getSymbolMap().addVariableDefinition(
diff --git a/flang/test/Lower/OpenACC/acc-cache.f90 b/flang/test/Lower/OpenACC/acc-cache.f90
index eecc990ae150a..0428744cdb999 100644
--- a/flang/test/Lower/OpenACC/acc-cache.f90
+++ b/flang/test/Lower/OpenACC/acc-cache.f90
@@ -34,7 +34,7 @@ subroutine test_cache_readonly()
end do
! CHECK: acc.loop
-! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_cache_readonly>, name = "b"
+! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {modifiers = #acc<data_clause_modifier readonly>, name = "b"
! CHECK: hlfir.designate %[[CACHE]]
end subroutine
>From 4bed19e22807d605b655b31e02f9765d02869cd8 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Thu, 8 Jan 2026 12:23:41 -0800
Subject: [PATCH 12/13] Revert the FortranVariableOpInterface setting for
acc.cache
---
.../OpenACC/Support/FIROpenACCOpsInterfaces.h | 20 -------------------
flang/lib/Lower/OpenACC.cpp | 7 +++----
.../Support/RegisterOpenACCExtensions.cpp | 6 ------
flang/test/Lower/OpenACC/acc-cache.f90 | 6 ++++--
4 files changed, 7 insertions(+), 32 deletions(-)
diff --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
index 94eeb9d06d334..b017cb4733b6c 100644
--- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
+++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
@@ -13,7 +13,6 @@
#ifndef FLANG_OPTIMIZER_OPENACC_FIROPENACC_OPS_INTERFACES_H_
#define FLANG_OPTIMIZER_OPENACC_FIROPENACC_OPS_INTERFACES_H_
-#include "flang/Optimizer/Dialect/FortranVariableInterface.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
namespace fir {
@@ -87,25 +86,6 @@ struct OutlineRematerializationModel
: public mlir::acc::OutlineRematerializationOpInterface::ExternalModel<
OutlineRematerializationModel<Op>, Op> {};
-/// External model to implement FortranVariableOpInterface for acc::CacheOp.
-struct CacheFortranVariableModel
- : public fir::FortranVariableOpInterface::ExternalModel<
- CacheFortranVariableModel, mlir::acc::CacheOp> {
- mlir::Value getBase(mlir::Operation *op) const {
- return mlir::cast<mlir::acc::CacheOp>(op).getAccVar();
- }
- std::optional<fir::FortranVariableFlagsEnum>
- getFortranAttrs(mlir::Operation *) const {
- return std::nullopt;
- }
- mlir::Value getShape(mlir::Operation *) const { return nullptr; }
- mlir::OperandRange getExplicitTypeParams(mlir::Operation *op) const {
- return mlir::cast<mlir::acc::CacheOp>(op).getBounds().take_front(0);
- }
- void setFortranAttrs(mlir::Operation *, fir::FortranVariableFlagsEnum) const {
- }
-};
-
} // namespace fir::acc
#endif // FLANG_OPTIMIZER_OPENACC_FIROPENACC_OPS_INTERFACES_H_
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index a48537b644364..5b4512a60fa9b 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -5010,10 +5010,9 @@ genACC(Fortran::lower::AbstractConverter &converter,
isReadonly ? mlir::acc::DataClauseModifier::readonly
: mlir::acc::DataClauseModifier::none);
- // Use acc.cache directly as the variable definition.
- converter.getSymbolMap().addVariableDefinition(
- symbol,
- mlir::cast<fir::FortranVariableOpInterface>(cacheOp.getOperation()));
+ fir::ExtendedValue hostExv = converter.getSymbolExtendedValue(symbol);
+ fir::ExtendedValue cacheExv = fir::substBase(hostExv, cacheOp.getAccVar());
+ converter.bindSymbol(symbol, cacheExv);
}
}
diff --git a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
index 6489ef302dd76..d7e9ae4ec85b9 100644
--- a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
@@ -84,12 +84,6 @@ void registerOpenACCExtensions(mlir::DialectRegistry ®istry) {
PartialEntityAccessModel<hlfir::DeclareOp>>(*ctx);
});
- // Register FortranVariableOpInterface for OpenACC cache operation
- registry.addExtension(
- +[](mlir::MLIRContext *ctx, mlir::acc::OpenACCDialect *dialect) {
- mlir::acc::CacheOp::attachInterface<CacheFortranVariableModel>(*ctx);
- });
-
registerAttrsExtensions(registry);
}
diff --git a/flang/test/Lower/OpenACC/acc-cache.f90 b/flang/test/Lower/OpenACC/acc-cache.f90
index 0428744cdb999..f5072c5291a7f 100644
--- a/flang/test/Lower/OpenACC/acc-cache.f90
+++ b/flang/test/Lower/OpenACC/acc-cache.f90
@@ -18,7 +18,8 @@ subroutine test_cache_basic()
! CHECK: acc.loop
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {{{.*}}name = "b"
-! CHECK: hlfir.designate %[[CACHE]]
+! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_basicEb"}
+! CHECK: hlfir.designate %[[DECL]]#0
end subroutine
! CHECK-LABEL: func.func @_QPtest_cache_readonly()
@@ -35,7 +36,8 @@ subroutine test_cache_readonly()
! CHECK: acc.loop
! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) -> !fir.ref<!fir.array<10xf32>> {modifiers = #acc<data_clause_modifier readonly>, name = "b"
-! CHECK: hlfir.designate %[[CACHE]]
+! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[CACHE]](%{{.*}}) {uniq_name = "_QFtest_cache_readonlyEb"}
+! CHECK: hlfir.designate %[[DECL]]#0
end subroutine
! CHECK-LABEL: func.func @_QPtest_cache_array_section()
>From 680a176cb180101852940be675f76836079df40b Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Thu, 8 Jan 2026 12:49:34 -0800
Subject: [PATCH 13/13] Rmove braces
---
flang/lib/Lower/OpenACC.cpp | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 5b4512a60fa9b..874c411fb7d62 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -4911,32 +4911,29 @@ genCacheBounds(Fortran::lower::AbstractConverter &converter,
Fortran::semantics::MaybeExpr upperExpr =
Fortran::evaluate::AsGenericExpr(triplet->upper());
- if (!lowerExpr && !upperExpr) {
+ if (!lowerExpr && !upperExpr)
llvm::report_fatal_error("OpenACC cache directive requires at least "
"one bound to be specified for array section");
- }
// OpenACC cache only supports unit stride (default or explicit 1).
auto strideVal = Fortran::evaluate::ToInt64(triplet->stride());
- if (!strideVal || *strideVal != 1) {
+ if (!strideVal || *strideVal != 1)
llvm::report_fatal_error("OpenACC cache directive does not support "
"strided array sections");
- }
// Compute lower bound (use array lb if not specified).
mlir::Value lb = lowerExpr ? genIndex(lowerExpr) : arrayLb;
// Compute upper bound (use array ub if not specified).
mlir::Value ub;
- if (upperExpr) {
+ if (upperExpr)
ub = genIndex(upperExpr);
- } else {
+ else
// arr(lower:) - upper is array's upper bound
ub = mlir::arith::AddIOp::create(
builder, loc,
mlir::arith::SubIOp::create(builder, loc, arrayLb, one),
arrayExtent);
- }
// Normalize to zero-based and compute extent.
lbound = mlir::arith::SubIOp::create(builder, loc, lb, arrayLb);
More information about the flang-commits
mailing list