[flang-commits] [flang] [flang] avoid introducing iteration dependencies in WHERE and FORALL temporaries (PR #195053)
via flang-commits
flang-commits at lists.llvm.org
Thu Apr 30 07:15:14 PDT 2026
https://github.com/jeanPerier updated https://github.com/llvm/llvm-project/pull/195053
>From 9e9b9f9334b9879e86a21e5629e79ed0395d3e8d Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Thu, 30 Apr 2026 03:19:58 -0700
Subject: [PATCH 1/2] [flang] avoid introducing iteration dependencies in WHERE
temporaries
---
.../Optimizer/Builder/TemporaryStorage.h | 54 +++-
.../Optimizer/Builder/TemporaryStorage.cpp | 82 +++++
.../LowerHLFIROrderedAssignments.cpp | 92 +++++-
.../array-temp-many-forall.f90 | 45 +++
.../HLFIR/order_assignments/array-temp.fir | 112 +++++++
.../HLFIR/order_assignments/impure-where.fir | 16 +-
.../order_assignments/inlined-stack-temp.fir | 302 ++++++++----------
.../order_assignments/saving-mask-and-rhs.fir | 18 +-
.../user-defined-assignment.fir | 19 +-
9 files changed, 514 insertions(+), 226 deletions(-)
create mode 100644 flang/test/HLFIR/order_assignments/array-temp-many-forall.f90
create mode 100644 flang/test/HLFIR/order_assignments/array-temp.fir
diff --git a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h
index cdb23a64c5c8a..e1edc5912ae97 100644
--- a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h
+++ b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h
@@ -19,6 +19,7 @@
#ifndef FORTRAN_OPTIMIZER_BUILDER_TEMPORARYSTORAGE_H
#define FORTRAN_OPTIMIZER_BUILDER_TEMPORARYSTORAGE_H
+#include "flang/Common/idioms.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
namespace fir {
@@ -98,6 +99,34 @@ class HomogeneousScalarStack {
mlir::Value temp;
};
+/// Multidimensional temporary indexed directly by the enclosing loop induction
+/// variables (innermost loop is the first dimension). The indices passed to
+/// pushValue/fetch are interpreted in the array's domain, which is described
+/// by a fir.shape_shift built from the loop extents and lower bounds. This
+/// avoids the loop-carried counter used by HomogeneousScalarStack, keeping
+/// loop iterations independent. Limited to Fortran::common::maxRank dimensions.
+class ArrayTemp {
+public:
+ ArrayTemp(mlir::Location loc, fir::FirOpBuilder &builder,
+ fir::SequenceType declaredType, llvm::ArrayRef<mlir::Value> extents,
+ llvm::ArrayRef<mlir::Value> lowerBounds,
+ llvm::ArrayRef<mlir::Value> lengths, bool allocateOnHeap,
+ llvm::StringRef name);
+
+ void pushValue(mlir::Location loc, fir::FirOpBuilder &builder,
+ mlir::Value value, mlir::ValueRange indices);
+ void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder) {}
+ mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder,
+ mlir::ValueRange indices);
+ void destroy(mlir::Location loc, fir::FirOpBuilder &builder);
+ bool canBeFetchedAfterPush() const { return true; }
+
+private:
+ const bool allocateOnHeap;
+ mlir::Value temp;
+ llvm::SmallVector<mlir::Value> typeParams;
+};
+
/// Structure to hold the value of a single entity.
class SimpleCopy {
public:
@@ -255,16 +284,26 @@ class TemporaryStorage {
TemporaryStorage(T &&impl) : impl{std::forward<T>(impl)} {}
void pushValue(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::Value value) {
- std::visit([&](auto &temp) { temp.pushValue(loc, builder, value); }, impl);
+ mlir::Value value, mlir::ValueRange indices = {}) {
+ // Only ArrayTemp uses the loop indices; other temps don't take them.
+ std::visit(Fortran::common::visitors{
+ [&](ArrayTemp &temp) {
+ temp.pushValue(loc, builder, value, indices);
+ },
+ [&](auto &temp) { temp.pushValue(loc, builder, value); }},
+ impl);
}
void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder) {
std::visit([&](auto &temp) { temp.resetFetchPosition(loc, builder); },
impl);
}
- mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder) {
- return std::visit([&](auto &temp) { return temp.fetch(loc, builder); },
- impl);
+ mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder,
+ mlir::ValueRange indices = {}) {
+ return std::visit(
+ Fortran::common::visitors{
+ [&](ArrayTemp &temp) { return temp.fetch(loc, builder, indices); },
+ [&](auto &temp) { return temp.fetch(loc, builder); }},
+ impl);
}
void destroy(mlir::Location loc, fir::FirOpBuilder &builder) {
std::visit([&](auto &temp) { temp.destroy(loc, builder); }, impl);
@@ -282,8 +321,9 @@ class TemporaryStorage {
}
private:
- std::variant<HomogeneousScalarStack, SimpleCopy, SSARegister, AnyValueStack,
- AnyVariableStack, AnyVectorSubscriptStack, AnyAddressStack>
+ std::variant<HomogeneousScalarStack, ArrayTemp, SimpleCopy, SSARegister,
+ AnyValueStack, AnyVariableStack, AnyVectorSubscriptStack,
+ AnyAddressStack>
impl;
};
} // namespace fir::factory
diff --git a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp
index 5db40aff91878..0233fc9f023de 100644
--- a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp
+++ b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp
@@ -134,6 +134,88 @@ hlfir::Entity fir::factory::HomogeneousScalarStack::moveStackAsArrayExpr(
return hlfir::Entity{hlfirExpr};
}
+//===----------------------------------------------------------------------===//
+// fir::factory::ArrayTemp implementation.
+//===----------------------------------------------------------------------===//
+
+fir::factory::ArrayTemp::ArrayTemp(mlir::Location loc,
+ fir::FirOpBuilder &builder,
+ fir::SequenceType declaredType,
+ llvm::ArrayRef<mlir::Value> extents,
+ llvm::ArrayRef<mlir::Value> lowerBounds,
+ llvm::ArrayRef<mlir::Value> lengths,
+ bool allocateOnHeap, llvm::StringRef name)
+ : allocateOnHeap{allocateOnHeap},
+ typeParams{lengths.begin(), lengths.end()} {
+ assert(extents.size() == lowerBounds.size() &&
+ "extents and lowerBounds must have the same size");
+ assert(extents.size() == declaredType.getDimension() &&
+ "declared type rank must match the number of extents");
+ mlir::Value tempStorage;
+ if (allocateOnHeap)
+ tempStorage =
+ builder.createHeapTemporary(loc, declaredType, name, extents, lengths);
+ else
+ tempStorage =
+ builder.createTemporary(loc, declaredType, name, extents, lengths);
+ // Use a fir.shape_shift so the temp's lower bounds match the loop bounds:
+ // the indices passed to pushValue/fetch can then index it directly.
+ mlir::Value shape = builder.genShape(loc, lowerBounds, extents);
+ temp =
+ hlfir::DeclareOp::create(builder, loc, tempStorage, name, shape, lengths)
+ .getBase();
+}
+
+/// Generate an hlfir.designate on \p temp for the element at \p indices. The
+/// indices are interpreted in the temp's array domain (matching its lower
+/// bounds, which were set from the enclosing loop bounds).
+static mlir::Value genArrayTempElementAddr(mlir::Location loc,
+ fir::FirOpBuilder &builder,
+ mlir::Value temp,
+ mlir::ValueRange indices,
+ mlir::ValueRange typeParams) {
+ hlfir::Entity entity{temp};
+ mlir::Type refTy = fir::ReferenceType::get(entity.getFortranElementType());
+ mlir::Type idxTy = builder.getIndexType();
+ llvm::SmallVector<mlir::Value> idxs;
+ idxs.reserve(indices.size());
+ for (mlir::Value idx : indices)
+ idxs.push_back(builder.createConvert(loc, idxTy, idx));
+ return hlfir::DesignateOp::create(builder, loc, refTy, temp, idxs,
+ typeParams);
+}
+
+void fir::factory::ArrayTemp::pushValue(mlir::Location loc,
+ fir::FirOpBuilder &builder,
+ mlir::Value value,
+ mlir::ValueRange indices) {
+ hlfir::Entity entity{value};
+ assert(entity.isScalar() && "cannot use ArrayTemp with array");
+ // Match HomogeneousScalarStack: derived types go through the runtime path.
+ if (!entity.hasIntrinsicType())
+ TODO(loc, "creating ArrayTemp for derived types");
+ mlir::Value addr =
+ genArrayTempElementAddr(loc, builder, temp, indices, typeParams);
+ hlfir::AssignOp::create(builder, loc, value, addr);
+}
+
+mlir::Value fir::factory::ArrayTemp::fetch(mlir::Location loc,
+ fir::FirOpBuilder &builder,
+ mlir::ValueRange indices) {
+ mlir::Value addr =
+ genArrayTempElementAddr(loc, builder, temp, indices, typeParams);
+ return hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{addr});
+}
+
+void fir::factory::ArrayTemp::destroy(mlir::Location loc,
+ fir::FirOpBuilder &builder) {
+ if (allocateOnHeap) {
+ auto declare = temp.getDefiningOp<hlfir::DeclareOp>();
+ assert(declare && "temp must have been declared");
+ fir::FreeMemOp::create(builder, loc, declare.getMemref());
+ }
+}
+
//===----------------------------------------------------------------------===//
// fir::factory::SimpleCopy implementation.
//===----------------------------------------------------------------------===//
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
index a3fd19d95fbbc..08a43e476c0cd 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
@@ -18,6 +18,7 @@
//===----------------------------------------------------------------------===//
#include "ScheduleOrderedAssignments.h"
+#include "flang/Common/Fortran-consts.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Builder/TemporaryStorage.h"
@@ -257,6 +258,11 @@ class OrderedAssignmentRewriter {
bool currentLoopNestIterationNumberCanBeComputed(
llvm::SmallVectorImpl<fir::DoLoopOp> &loopNest);
+ /// Return the induction variables of the enclosing fir.do_loop nest at the
+ /// current insertion point, innermost first (same order as
+ /// currentLoopNestIterationNumberCanBeComputed). Used to index ArrayTemp.
+ llvm::SmallVector<mlir::Value> getLoopIndices();
+
template <typename T>
fir::factory::TemporaryStorage *insertSavedEntity(mlir::Region ®ion,
T &&temp) {
@@ -666,10 +672,12 @@ OrderedAssignmentRewriter::getIfSaved(mlir::Region ®ion) {
if (auto savedInSameRun = savedInCurrentRunBeforeUse.find(®ion);
savedInSameRun != savedInCurrentRunBeforeUse.end())
return savedInSameRun->second;
- // If the region was saved in a previous run, fetch the saved value.
+ // If the region was saved in a previous run, fetch the saved value. The
+ // loop indices are only used by ArrayTemp; the other temps ignore them.
if (auto temp = savedEntities.find(®ion); temp != savedEntities.end()) {
doBeforeLoopNest([&]() { temp->second.resetFetchPosition(loc, builder); });
- return ValueAndCleanUp{temp->second.fetch(loc, builder), std::nullopt};
+ return ValueAndCleanUp{temp->second.fetch(loc, builder, getLoopIndices()),
+ std::nullopt};
}
return std::nullopt;
}
@@ -1109,6 +1117,41 @@ computeLoopNestIterationNumber(mlir::Location loc, fir::FirOpBuilder &builder,
return loopExtent;
}
+/// Compute the extents and lower bounds of \p loopNest, in the same order as
+/// \p loopNest (innermost first).
+static void computeLoopNestExtentsAndLowerBounds(
+ mlir::Location loc, fir::FirOpBuilder &builder,
+ llvm::ArrayRef<fir::DoLoopOp> loopNest,
+ llvm::SmallVectorImpl<mlir::Value> &extents,
+ llvm::SmallVectorImpl<mlir::Value> &lowerBounds) {
+ extents.reserve(extents.size() + loopNest.size());
+ lowerBounds.reserve(lowerBounds.size() + loopNest.size());
+ for (fir::DoLoopOp doLoop : loopNest) {
+ mlir::Value extent = builder.genExtentFromTriplet(
+ loc, doLoop.getLowerBound(), doLoop.getUpperBound(), doLoop.getStep(),
+ builder.getIndexType());
+ extents.push_back(extent);
+ lowerBounds.push_back(doLoop.getLowerBound());
+ }
+}
+
+llvm::SmallVector<mlir::Value> OrderedAssignmentRewriter::getLoopIndices() {
+ llvm::SmallVector<mlir::Value> indices;
+ if (constructStack.empty())
+ return indices;
+ mlir::Operation *outerLoop = constructStack[0];
+ mlir::Operation *currentConstruct = constructStack.back();
+ while (currentConstruct) {
+ if (auto doLoop = mlir::dyn_cast<fir::DoLoopOp>(currentConstruct))
+ indices.push_back(doLoop.getInductionVar());
+ if (currentConstruct == outerLoop)
+ currentConstruct = nullptr;
+ else
+ currentConstruct = currentConstruct->getParentOp();
+ }
+ return indices;
+}
+
/// Return a name for temporary storage that indicates in which context
/// the temporary storage was created.
static llvm::StringRef
@@ -1160,20 +1203,35 @@ void OrderedAssignmentRewriter::generateSaveEntity(
bool loopShapeCanBePreComputed =
currentLoopNestIterationNumberCanBeComputed(loopNest);
doBeforeLoopNest([&] {
- /// For simple scalars inside loops whose total iteration number can be
- /// pre-computed, create a rank-1 array outside of the loops. It will be
- /// assigned/fetched inside the loops like a normal Fortran array given
- /// the iteration count.
+ // For simple scalars in a precomputable loop nest, prefer the
+ // multidimensional ArrayTemp (indexed by loop induction variables) so
+ // there is no loop-carried counter. Fall back to the 1D counter-based
+ // HomogeneousScalarStack when the nest is deeper than the maximum
+ // fir.array rank.
if (loopShapeCanBePreComputed && fir::isa_trivial(entityType)) {
- mlir::Value loopExtent =
- computeLoopNestIterationNumber(loc, builder, loopNest);
- auto sequenceType =
- mlir::cast<fir::SequenceType>(builder.getVarLenSeqTy(entityType));
- temp = insertSavedEntity(region,
- fir::factory::HomogeneousScalarStack{
- loc, builder, sequenceType, loopExtent,
- /*lenParams=*/{}, allocateOnHeap,
- /*stackThroughLoops=*/true, tempName});
+ if (loopNest.size() <= static_cast<size_t>(Fortran::common::maxRank)) {
+ llvm::SmallVector<mlir::Value> tempExtents;
+ llvm::SmallVector<mlir::Value> tempLowerBounds;
+ computeLoopNestExtentsAndLowerBounds(loc, builder, loopNest,
+ tempExtents, tempLowerBounds);
+ auto sequenceType = mlir::cast<fir::SequenceType>(
+ builder.getVarLenSeqTy(entityType, /*rank=*/loopNest.size()));
+ temp = insertSavedEntity(
+ region,
+ fir::factory::ArrayTemp{loc, builder, sequenceType, tempExtents,
+ tempLowerBounds, /*lengths=*/{},
+ allocateOnHeap, tempName});
+ } else {
+ mlir::Value loopExtent =
+ computeLoopNestIterationNumber(loc, builder, loopNest);
+ auto sequenceType =
+ mlir::cast<fir::SequenceType>(builder.getVarLenSeqTy(entityType));
+ temp = insertSavedEntity(region,
+ fir::factory::HomogeneousScalarStack{
+ loc, builder, sequenceType, loopExtent,
+ /*lenParams=*/{}, allocateOnHeap,
+ /*stackThroughLoops=*/true, tempName});
+ }
} else {
// If the number of iteration is not known, or if the values at each
@@ -1185,8 +1243,8 @@ void OrderedAssignmentRewriter::generateSaveEntity(
}
});
// Inside the loop nest (and any fir.if if there are active masks), copy
- // the value to the temp and do clean-ups for the value if any.
- temp->pushValue(loc, builder, entity);
+ // the value to the temp and do clean-ups of the value if any.
+ temp->pushValue(loc, builder, entity, getLoopIndices());
}
// Delay the clean-up if the entity will be used in the same run (i.e., the
diff --git a/flang/test/HLFIR/order_assignments/array-temp-many-forall.f90 b/flang/test/HLFIR/order_assignments/array-temp-many-forall.f90
new file mode 100644
index 0000000000000..0078cf4e5a446
--- /dev/null
+++ b/flang/test/HLFIR/order_assignments/array-temp-many-forall.f90
@@ -0,0 +1,45 @@
+! Test that the lower-hlfir-ordered-assignments pass falls back to the
+! 1D HomogeneousScalarStack temporary (counter-based) when the FORALL loop
+! nest is deeper than Fortran::common::maxRank (15), because fir.array can
+! only hold up to maxRank dimensions.
+!
+! Below maxRank, the new ArrayTemp is used and there is no counter; here we
+! verify the opposite: the counter (a fir.alloca index, fir.load/addi/store
+! pattern) is restored when the loop nest has 16 levels.
+!
+! The test uses a rank-8 array of derived type with a rank-8 array component
+! to spread 16 indexable dimensions across the FORALL header.
+!
+! RUN: bbc -emit-hlfir -o - %s | fir-opt --lower-hlfir-ordered-assignments | FileCheck %s
+
+module many_forall_mod
+ type :: t
+ real :: c(2,2,2,2,2,2,2,2)
+ end type
+contains
+ subroutine more_than_15_forall(a)
+ type(t), intent(inout) :: a(2,2,2,2,2,2,2,2)
+ forall (i1=1:2, i2=1:2, i3=1:2, i4=1:2, i5=1:2, i6=1:2, i7=1:2, i8=1:2, &
+ j1=1:2, j2=1:2, j3=1:2, j4=1:2, j5=1:2, j6=1:2, j7=1:2, j8=1:2)
+ a(i1,i2,i3,i4,i5,i6,i7,i8)%c(j1,j2,j3,j4,j5,j6,j7,j8) = &
+ a(3-i1,3-i2,3-i3,3-i4,3-i5,3-i6,3-i7,3-i8)%c(3-j1,3-j2,3-j3,3-j4,3-j5,3-j6,3-j7,3-j8)
+ end forall
+ end subroutine
+end module
+! With 16 nested loops, the temporary must be the 1D counter-based form
+! (HomogeneousScalarStack) instead of a 16D ArrayTemp, since fir.array is
+! limited to Fortran::common::maxRank dimensions.
+!
+! CHECK-LABEL: func.func @_QMmany_forall_modPmore_than_15_forall(
+! There must be a counter in memory (fir.alloca index).
+! CHECK: %[[CTR:.*]] = fir.alloca index
+! The temporary is a 1D fir.array<?xf32>.
+! CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<?xf32>, %{{.*}} {bindc_name = ".tmp.forall", uniq_name = ""}
+! Plain fir.shape (no shift), since the temp is indexed by the counter.
+! CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
+! CHECK: hlfir.declare %[[ALLOC]](%[[SHAPE]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf32>>, !fir.heap<!fir.array<?xf32>>)
+! Inside the loop nest the counter is incremented and the temp is indexed
+! through the counter (not directly through the loop induction variables).
+! CHECK: fir.load %[[CTR]] : !fir.ref<index>
+! CHECK: arith.addi %{{.*}}, %{{.*}} : index
+! CHECK: fir.store %{{.*}} to %[[CTR]] : !fir.ref<index>
diff --git a/flang/test/HLFIR/order_assignments/array-temp.fir b/flang/test/HLFIR/order_assignments/array-temp.fir
new file mode 100644
index 0000000000000..5510d0220397a
--- /dev/null
+++ b/flang/test/HLFIR/order_assignments/array-temp.fir
@@ -0,0 +1,112 @@
+// Test that hlfir.where/hlfir.forall temporary storages used for simple
+// scalar values are lowered to a multidimensional ArrayTemp directly indexed
+// by the enclosing loop induction variables (using hlfir.designate on a
+// fir.shape_shift) rather than to a 1D HomogeneousScalarStack with a counter.
+// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s
+
+// Single-dimension case: the saved RHS is stored in a 1D ArrayTemp indexed
+// by the where loop induction variable, with a fir.shape_shift instead of a
+// fir.shape.
+func.func @where_self_overlap(%x: !fir.ref<!fir.array<10xi32>>, %mask: !fir.ref<!fir.array<10x!fir.logical<4>>>) {
+ %c-1 = arith.constant -1 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %1 = fir.shape %c10 : (index) -> !fir.shape<1>
+ hlfir.where {
+ hlfir.yield %mask : !fir.ref<!fir.array<10x!fir.logical<4>>>
+ } do {
+ hlfir.region_assign {
+ %2 = hlfir.designate %x (%c10:%c1:%c-1) shape %1 :
+(!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<10xi32>>
+ hlfir.yield %2 : !fir.ref<!fir.array<10xi32>>
+ } to {
+ hlfir.yield %x : !fir.ref<!fir.array<10xi32>>
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @where_self_overlap(
+// CHECK-NOT: fir.alloca index
+// CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<?xi32>, %{{.*}} {bindc_name = ".tmp.where", uniq_name = ""}
+// CHECK: %[[SHAPE:.*]] = fir.shape_shift %{{.*}}, %{{.*}} : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]](%[[SHAPE]]) {uniq_name = ".tmp.where"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// First loop: save the RHS values, addressed by the loop induction variable.
+// CHECK: fir.do_loop %[[IV0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: fir.if %{{.*}} {
+// CHECK-NOT: fir.load %{{.*}} : !fir.ref<index>
+// CHECK: %[[ADDR0:.*]] = hlfir.designate %[[DECL]]#0 (%[[IV0]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %{{.*}} to %[[ADDR0]] : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: }
+// Second loop: read back from the temp using the new induction variable.
+// CHECK: fir.do_loop %[[IV1:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: fir.if %{{.*}} {
+// CHECK: %[[ADDR1:.*]] = hlfir.designate %[[DECL]]#0 (%[[IV1]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL:.*]] = fir.load %[[ADDR1]] : !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL]] to %{{.*}} : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: }
+// CHECK: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<?xi32>>
+// CHECK: return
+
+// Two nested forall loops with non-trivial lower bounds: the temp is a 2D
+// ArrayTemp whose first dimension corresponds to the inner forall and second
+// dimension to the outer one. The fir.shape_shift carries the forall lower
+// bounds, and hlfir.designate uses both induction variables (innermost first).
+func.func @nested_forall_2d(%arr: !fir.box<!fir.array<?x?xi32>>) {
+ %c2_i32 = arith.constant 2 : i32
+ %c5_i32 = arith.constant 5 : i32
+ %c3_i32 = arith.constant 3 : i32
+ %c7_i32 = arith.constant 7 : i32
+ %c1_i32 = arith.constant 1 : i32
+ %0:2 = hlfir.declare %arr {uniq_name = "x"} : (!fir.box<!fir.array<?x?xi32>>) -> (!fir.box<!fir.array<?x?xi32>>, !fir.box<!fir.array<?x?xi32>>)
+ hlfir.forall lb {
+ hlfir.yield %c2_i32 : i32
+ } ub {
+ hlfir.yield %c5_i32 : i32
+ } (%i: i32) {
+ hlfir.forall lb {
+ hlfir.yield %c3_i32 : i32
+ } ub {
+ hlfir.yield %c7_i32 : i32
+ } (%j: i32) {
+ hlfir.region_assign {
+ %i_idx = fir.convert %i : (i32) -> i64
+ %j_idx = fir.convert %j : (i32) -> i64
+ %addr = hlfir.designate %0#0 (%i_idx, %j_idx) : (!fir.box<!fir.array<?x?xi32>>, i64, i64) -> !fir.ref<i32>
+ %val = fir.load %addr : !fir.ref<i32>
+ hlfir.yield %val : i32
+ } to {
+ %i_inv = arith.subi %c5_i32, %i : i32
+ %i_inv2 = arith.addi %i_inv, %c2_i32 : i32
+ %j_inv = arith.subi %c7_i32, %j : i32
+ %j_inv2 = arith.addi %j_inv, %c3_i32 : i32
+ %i_idx = fir.convert %i_inv2 : (i32) -> i64
+ %j_idx = fir.convert %j_inv2 : (i32) -> i64
+ %addr = hlfir.designate %0#0 (%i_idx, %j_idx) : (!fir.box<!fir.array<?x?xi32>>, i64, i64) -> !fir.ref<i32>
+ hlfir.yield %addr : !fir.ref<i32>
+ }
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @nested_forall_2d(
+// CHECK-NOT: fir.alloca index
+// CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<?x?xi32>, %{{.*}}, %{{.*}} {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[SHAPE:.*]] = fir.shape_shift %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (index, index, index, index) -> !fir.shapeshift<2>
+// CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]](%[[SHAPE]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?x?xi32>>, !fir.shapeshift<2>) -> (!fir.box<!fir.array<?x?xi32>>, !fir.heap<!fir.array<?x?xi32>>)
+// CHECK: fir.do_loop %[[I0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: fir.do_loop %[[J0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: %[[ADDR0:.*]] = hlfir.designate %[[DECL]]#0 (%[[J0]], %[[I0]]) : (!fir.box<!fir.array<?x?xi32>>, index, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %{{.*}} to %[[ADDR0]] : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: }
+// CHECK: fir.do_loop %[[I1:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: fir.do_loop %[[J1:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: %[[ADDR1:.*]] = hlfir.designate %[[DECL]]#0 (%[[J1]], %[[I1]]) : (!fir.box<!fir.array<?x?xi32>>, index, index) -> !fir.ref<i32>
+// CHECK: %[[V:.*]] = fir.load %[[ADDR1]] : !fir.ref<i32>
+// CHECK: hlfir.assign %[[V]] to %{{.*}} : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: }
+// CHECK: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<?x?xi32>>
+// CHECK: return
diff --git a/flang/test/HLFIR/order_assignments/impure-where.fir b/flang/test/HLFIR/order_assignments/impure-where.fir
index 011a486b2baf7..c26b007e1973f 100644
--- a/flang/test/HLFIR/order_assignments/impure-where.fir
+++ b/flang/test/HLFIR/order_assignments/impure-where.fir
@@ -38,22 +38,22 @@ func.func @test_elsewhere_impure_mask(%x: !fir.ref<!fir.array<10xi32>>, %y: !fir
// CHECK-LABEL: func.func @test_elsewhere_impure_mask(
// CHECK: %[[VAL_12:.*]] = fir.call @impure() : () -> !fir.heap<!fir.array<10x!fir.logical<4>>>
// CHECK: %[[VAL_21:.*]] = fir.allocmem !fir.array<?x!fir.logical<4>>, %[[extent:[^ ]*]]
-// CHECK: %[[VAL_22:.*]] = fir.shape %[[extent]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_23:.*]]:2 = hlfir.declare %[[VAL_21]](%{{.*}}) {uniq_name = ".tmp.where"}
-// CHECK: fir.do_loop
+// CHECK: %[[SHAPE:.*]] = fir.shape_shift %{{.*}}, %[[extent]] : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[VAL_23:.*]]:2 = hlfir.declare %[[VAL_21]](%[[SHAPE]]) {uniq_name = ".tmp.where"}
+// CHECK: fir.do_loop %[[IV0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: fir.if {{.*}} {
// CHECK: } else {
// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_12]] (%{{.*}})
// CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ref<!fir.logical<4>>
-// CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_23]]#0 (%{{.*}}) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_23]]#0 (%[[IV0]]) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
// CHECK: hlfir.assign %[[VAL_29]] to %[[VAL_32]] : !fir.logical<4>, !fir.ref<!fir.logical<4>>
// CHECK: }
// CHECK: }
// CHECK-NOT: fir.call @impure
-// CHECK: fir.do_loop
+// CHECK: fir.do_loop %[[IV1:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: fir.if {{.*}} {
// CHECK: } else {
-// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_23]]#0 (%{{.*}}) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_23]]#0 (%[[IV1]]) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
// CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
// CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
// CHECK: fir.if %[[VAL_44]] {
@@ -61,10 +61,10 @@ func.func @test_elsewhere_impure_mask(%x: !fir.ref<!fir.array<10xi32>>, %y: !fir
// CHECK: }
// CHECK: }
// CHECK-NOT: fir.call @impure
-// CHECK: fir.do_loop
+// CHECK: fir.do_loop %[[IV2:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: fir.if {{.*}} {
// CHECK: } else {
-// CHECK: %[[VAL_52:.*]] = hlfir.designate %[[VAL_23]]#0 (%{{.*}}) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_52:.*]] = hlfir.designate %[[VAL_23]]#0 (%[[IV2]]) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
// CHECK: %[[VAL_53:.*]] = fir.load %[[VAL_52]] : !fir.ref<!fir.logical<4>>
// CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_53]] : (!fir.logical<4>) -> i1
// CHECK: fir.if %[[VAL_54]] {
diff --git a/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir b/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir
index 064b12b9ed812..6eac74e23053e 100644
--- a/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir
+++ b/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir
@@ -27,53 +27,42 @@ func.func @test_scalar_save(%arg0: !fir.box<!fir.array<?xi32>>) {
}
// CHECK-LABEL: func.func @test_scalar_save(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>>) {
-// CHECK: %[[VAL_1:.*]] = fir.alloca index
-// CHECK: %[[VAL_2:.*]] = arith.constant 10 : i32
-// CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32
-// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_3]] : (i32) -> index
-// CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (i32) -> index
-// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_8:.*]] = arith.constant 0 : index
-// CHECK: %[[VAL_9:.*]] = arith.subi %[[VAL_6]], %[[VAL_5]] : index
-// CHECK: %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[VAL_7]] : index
-// CHECK: %[[VAL_11:.*]] = arith.divsi %[[VAL_10]], %[[VAL_7]] : index
-// CHECK: %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_8]] : index
-// CHECK: %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_8]] : index
-// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_15:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_14]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_16:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_13]] {bindc_name = ".tmp.forall", uniq_name = ""}
-// CHECK: %[[VAL_17:.*]] = fir.shape %[[VAL_13]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_16]](%[[VAL_17]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
-// CHECK: fir.do_loop %[[VAL_19:.*]] = %[[VAL_5]] to %[[VAL_6]] step %[[VAL_7]] {
-// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (index) -> i32
-// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
-// CHECK: %[[VAL_22:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_21]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-// CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref<i32>
-// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_24]], %[[VAL_15]] : index
-// CHECK: fir.store %[[VAL_25]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_24]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK: hlfir.assign %[[VAL_23]] to %[[VAL_26]] : i32, !fir.ref<i32>
+// CHECK: %[[VAL_1:.*]] = arith.constant 10 : i32
+// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32
+// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_2]] : (i32) -> index
+// CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_1]] : (i32) -> index
+// CHECK: %[[VAL_6:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_7:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_8:.*]] = arith.subi %[[VAL_5]], %[[VAL_4]] : index
+// CHECK: %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_6]] : index
+// CHECK: %[[VAL_10:.*]] = arith.divsi %[[VAL_9]], %[[VAL_6]] : index
+// CHECK: %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_10]], %[[VAL_7]] : index
+// CHECK: %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_10]], %[[VAL_7]] : index
+// CHECK: %[[VAL_13:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_12]] {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[VAL_14:.*]] = fir.shape_shift %[[VAL_4]], %[[VAL_12]] : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]](%[[VAL_14]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK: fir.do_loop %[[VAL_16:.*]] = %[[VAL_4]] to %[[VAL_5]] step %[[VAL_6]] {
+// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (index) -> i32
+// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i32) -> i64
+// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_18]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+// CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<i32>
+// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_15]]#0 (%[[VAL_16]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_20]] to %[[VAL_21]] : i32, !fir.ref<i32>
// CHECK: }
-// CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_3]] : (i32) -> index
-// CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_2]] : (i32) -> index
-// CHECK: %[[VAL_29:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_14]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: fir.do_loop %[[VAL_30:.*]] = %[[VAL_27]] to %[[VAL_28]] step %[[VAL_29]] {
-// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (index) -> i32
-// CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_33:.*]] = arith.addi %[[VAL_32]], %[[VAL_15]] : index
-// CHECK: fir.store %[[VAL_33]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_34:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_32]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_34]] : !fir.ref<i32>
-// CHECK: %[[VAL_36:.*]] = arith.addi %[[VAL_31]], %[[VAL_3]] : i32
-// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> i64
-// CHECK: %[[VAL_38:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_37]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-// CHECK: hlfir.assign %[[VAL_35]] to %[[VAL_38]] : i32, !fir.ref<i32>
+// CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_2]] : (i32) -> index
+// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_1]] : (i32) -> index
+// CHECK: %[[VAL_24:.*]] = arith.constant 1 : index
+// CHECK: fir.do_loop %[[VAL_25:.*]] = %[[VAL_22]] to %[[VAL_23]] step %[[VAL_24]] {
+// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (index) -> i32
+// CHECK: %[[VAL_27:.*]] = hlfir.designate %[[VAL_15]]#0 (%[[VAL_25]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref<i32>
+// CHECK: %[[VAL_29:.*]] = arith.addi %[[VAL_26]], %[[VAL_2]] : i32
+// CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i32) -> i64
+// CHECK: %[[VAL_31:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_30]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_31]] : i32, !fir.ref<i32>
// CHECK: }
-// CHECK: fir.freemem %[[VAL_16]] : !fir.heap<!fir.array<?xi32>>
+// CHECK: fir.freemem %[[VAL_13]] : !fir.heap<!fir.array<?xi32>>
// CHECK: return
// CHECK: }
@@ -111,87 +100,65 @@ func.func @mask_and_rhs_conflict(%arg0: !fir.box<!fir.array<?xi32>>) {
}
// CHECK-LABEL: func.func @mask_and_rhs_conflict(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>>) {
-// CHECK: %[[VAL_1:.*]] = fir.alloca index
-// CHECK: %[[VAL_2:.*]] = fir.alloca index
-// CHECK: %[[VAL_3:.*]] = arith.constant 42 : i32
-// CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32
-// CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
-// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_5]] : (i32) -> index
-// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_4]] : (i32) -> index
-// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_10:.*]] = arith.constant 0 : index
-// CHECK: %[[VAL_11:.*]] = arith.subi %[[VAL_8]], %[[VAL_7]] : index
-// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_9]] : index
-// CHECK: %[[VAL_13:.*]] = arith.divsi %[[VAL_12]], %[[VAL_9]] : index
-// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13]], %[[VAL_10]] : index
-// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14]], %[[VAL_13]], %[[VAL_10]] : index
-// CHECK: %[[VAL_16:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_17:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_16]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_18:.*]] = fir.allocmem !fir.array<?xi1>, %[[VAL_15]] {bindc_name = ".tmp.forall", uniq_name = ""}
-// CHECK: %[[VAL_19:.*]] = fir.shape %[[VAL_15]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_18]](%[[VAL_19]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi1>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi1>>, !fir.heap<!fir.array<?xi1>>)
-// CHECK: %[[VAL_21:.*]] = arith.constant 0 : index
-// CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_8]], %[[VAL_7]] : index
-// CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_22]], %[[VAL_9]] : index
-// CHECK: %[[VAL_24:.*]] = arith.divsi %[[VAL_23]], %[[VAL_9]] : index
-// CHECK: %[[VAL_25:.*]] = arith.cmpi sgt, %[[VAL_24]], %[[VAL_21]] : index
-// CHECK: %[[VAL_26:.*]] = arith.select %[[VAL_25]], %[[VAL_24]], %[[VAL_21]] : index
-// CHECK: %[[VAL_27:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_28:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_27]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_29:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_26]] {bindc_name = ".tmp.forall", uniq_name = ""}
-// CHECK: %[[VAL_30:.*]] = fir.shape %[[VAL_26]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_29]](%[[VAL_30]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
-// CHECK: fir.do_loop %[[VAL_32:.*]] = %[[VAL_7]] to %[[VAL_8]] step %[[VAL_9]] {
-// CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (index) -> i32
-// CHECK: %[[VAL_34:.*]] = fir.convert %[[VAL_33]] : (i32) -> i64
-// CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_34]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-// CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<i32>
-// CHECK: %[[VAL_37:.*]] = arith.cmpi sgt, %[[VAL_36]], %[[VAL_3]] : i32
-// CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_39:.*]] = arith.addi %[[VAL_38]], %[[VAL_17]] : index
-// CHECK: fir.store %[[VAL_39]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_38]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
-// CHECK: hlfir.assign %[[VAL_37]] to %[[VAL_40]] : i1, !fir.ref<i1>
-// CHECK: fir.if %[[VAL_37]] {
-// CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_33]] : (i32) -> i64
-// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_41]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-// CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<i32>
-// CHECK: %[[VAL_44:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_45:.*]] = arith.addi %[[VAL_44]], %[[VAL_28]] : index
-// CHECK: fir.store %[[VAL_45]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_31]]#0 (%[[VAL_44]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK: hlfir.assign %[[VAL_43]] to %[[VAL_46]] : i32, !fir.ref<i32>
+// CHECK: %[[VAL_1:.*]] = arith.constant 42 : i32
+// CHECK: %[[VAL_2:.*]] = arith.constant 10 : i32
+// CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32
+// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_3]] : (i32) -> index
+// CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (i32) -> index
+// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_8:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_9:.*]] = arith.subi %[[VAL_6]], %[[VAL_5]] : index
+// CHECK: %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[VAL_7]] : index
+// CHECK: %[[VAL_11:.*]] = arith.divsi %[[VAL_10]], %[[VAL_7]] : index
+// CHECK: %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_8]] : index
+// CHECK: %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_8]] : index
+// CHECK: %[[MASK_TEMP:.*]] = fir.allocmem !fir.array<?xi1>, %[[VAL_13]] {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[MASK_SHAPE:.*]] = fir.shape_shift %[[VAL_5]], %[[VAL_13]] : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[MASK_DECL:.*]]:2 = hlfir.declare %[[MASK_TEMP]](%[[MASK_SHAPE]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi1>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi1>>, !fir.heap<!fir.array<?xi1>>)
+// CHECK: %[[VAL_16:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_17:.*]] = arith.subi %[[VAL_6]], %[[VAL_5]] : index
+// CHECK: %[[VAL_18:.*]] = arith.addi %[[VAL_17]], %[[VAL_7]] : index
+// CHECK: %[[VAL_19:.*]] = arith.divsi %[[VAL_18]], %[[VAL_7]] : index
+// CHECK: %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_19]], %[[VAL_16]] : index
+// CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_19]], %[[VAL_16]] : index
+// CHECK: %[[RHS_TEMP:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_21]] {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[RHS_SHAPE:.*]] = fir.shape_shift %[[VAL_5]], %[[VAL_21]] : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[RHS_DECL:.*]]:2 = hlfir.declare %[[RHS_TEMP]](%[[RHS_SHAPE]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK: fir.do_loop %[[VAL_24:.*]] = %[[VAL_5]] to %[[VAL_6]] step %[[VAL_7]] {
+// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (index) -> i32
+// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i32) -> i64
+// CHECK: %[[VAL_27:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_26]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+// CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref<i32>
+// CHECK: %[[VAL_29:.*]] = arith.cmpi sgt, %[[VAL_28]], %[[VAL_1]] : i32
+// CHECK: %[[VAL_30:.*]] = hlfir.designate %[[MASK_DECL]]#0 (%[[VAL_24]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
+// CHECK: hlfir.assign %[[VAL_29]] to %[[VAL_30]] : i1, !fir.ref<i1>
+// CHECK: fir.if %[[VAL_29]] {
+// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_25]] : (i32) -> i64
+// CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_31]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+// CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_32]] : !fir.ref<i32>
+// CHECK: %[[VAL_34:.*]] = hlfir.designate %[[RHS_DECL]]#0 (%[[VAL_24]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_33]] to %[[VAL_34]] : i32, !fir.ref<i32>
// CHECK: }
// CHECK: }
-// CHECK: %[[VAL_47:.*]] = fir.convert %[[VAL_5]] : (i32) -> index
-// CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_4]] : (i32) -> index
-// CHECK: %[[VAL_49:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_16]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: fir.store %[[VAL_27]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: fir.do_loop %[[VAL_50:.*]] = %[[VAL_47]] to %[[VAL_48]] step %[[VAL_49]] {
-// CHECK: %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (index) -> i32
-// CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_53:.*]] = arith.addi %[[VAL_52]], %[[VAL_17]] : index
-// CHECK: fir.store %[[VAL_53]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_54:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_52]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
-// CHECK: %[[VAL_55:.*]] = fir.load %[[VAL_54]] : !fir.ref<i1>
-// CHECK: fir.if %[[VAL_55]] {
-// CHECK: %[[VAL_56:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_57:.*]] = arith.addi %[[VAL_56]], %[[VAL_28]] : index
-// CHECK: fir.store %[[VAL_57]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_58:.*]] = hlfir.designate %[[VAL_31]]#0 (%[[VAL_56]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK: %[[VAL_59:.*]] = fir.load %[[VAL_58]] : !fir.ref<i32>
-// CHECK: %[[VAL_60:.*]] = arith.addi %[[VAL_51]], %[[VAL_5]] : i32
-// CHECK: %[[VAL_61:.*]] = fir.convert %[[VAL_60]] : (i32) -> i64
-// CHECK: %[[VAL_62:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_61]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-// CHECK: hlfir.assign %[[VAL_59]] to %[[VAL_62]] : i32, !fir.ref<i32>
+// CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_3]] : (i32) -> index
+// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_2]] : (i32) -> index
+// CHECK: %[[VAL_37:.*]] = arith.constant 1 : index
+// CHECK: fir.do_loop %[[VAL_38:.*]] = %[[VAL_35]] to %[[VAL_36]] step %[[VAL_37]] {
+// CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (index) -> i32
+// CHECK: %[[VAL_40:.*]] = hlfir.designate %[[MASK_DECL]]#0 (%[[VAL_38]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
+// CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<i1>
+// CHECK: fir.if %[[VAL_41]] {
+// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[RHS_DECL]]#0 (%[[VAL_38]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<i32>
+// CHECK: %[[VAL_44:.*]] = arith.addi %[[VAL_39]], %[[VAL_3]] : i32
+// CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64
+// CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_45]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_43]] to %[[VAL_46]] : i32, !fir.ref<i32>
// CHECK: }
// CHECK: }
-// CHECK-DAG: fir.freemem %[[VAL_18]] : !fir.heap<!fir.array<?xi1>>
-// CHECK-DAG: fir.freemem %[[VAL_29]] : !fir.heap<!fir.array<?xi32>>
+// CHECK-DAG: fir.freemem %[[MASK_TEMP]] : !fir.heap<!fir.array<?xi1>>
+// CHECK-DAG: fir.freemem %[[RHS_TEMP]] : !fir.heap<!fir.array<?xi32>>
// CHECK: return
// CHECK: }
@@ -224,6 +191,8 @@ func.func @test_where_mask_save(%arg0: !fir.box<!fir.array<?xi32>>) {
}
return
}
+// This case uses hlfir.associate (SimpleCopy) to save the whole mask outside
+// of the where loop nest, which is unrelated to the new ArrayTemp path.
// CHECK-LABEL: func.func @test_where_mask_save(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>>) {
// CHECK: %[[VAL_1:.*]] = arith.constant 0 : index
@@ -278,59 +247,48 @@ func.func @test_where_rhs_save(%x: !fir.ref<!fir.array<10xi32>>, %mask: !fir.ref
// CHECK-LABEL: func.func @test_where_rhs_save(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<10xi32>>,
// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<!fir.array<10x!fir.logical<4>>>) {
-// CHECK: %[[VAL_2:.*]] = fir.alloca index
-// CHECK: %[[VAL_3:.*]] = arith.constant -1 : index
-// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_5:.*]] = arith.constant 10 : index
-// CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_7:.*]] = arith.constant 10 : index
-// CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_11:.*]] = arith.constant 0 : index
-// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_7]], %[[VAL_9]] : index
-// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[VAL_9]] : index
-// CHECK: %[[VAL_14:.*]] = arith.divsi %[[VAL_13]], %[[VAL_9]] : index
-// CHECK: %[[VAL_15:.*]] = arith.cmpi sgt, %[[VAL_14]], %[[VAL_11]] : index
-// CHECK: %[[VAL_16:.*]] = arith.select %[[VAL_15]], %[[VAL_14]], %[[VAL_11]] : index
-// CHECK: %[[VAL_17:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_18:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_17]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_19:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_16]] {bindc_name = ".tmp.where", uniq_name = ""}
-// CHECK: %[[VAL_20:.*]] = fir.shape %[[VAL_16]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_19]](%[[VAL_20]]) {uniq_name = ".tmp.where"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
-// CHECK: fir.do_loop %[[VAL_22:.*]] = %[[VAL_9]] to %[[VAL_7]] step %[[VAL_9]] {
-// CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_22]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
-// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<!fir.logical<4>>
-// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (!fir.logical<4>) -> i1
-// CHECK: fir.if %[[VAL_25]] {
-// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_5]]:%[[VAL_4]]:%[[VAL_3]]) shape %[[VAL_6]] : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<10xi32>>
-// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_10]] (%[[VAL_22]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
-// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref<i32>
-// CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_29:.*]] = arith.addi %[[VAL_28]], %[[VAL_18]] : index
-// CHECK: fir.store %[[VAL_29]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_30:.*]] = hlfir.designate %[[VAL_21]]#0 (%[[VAL_28]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK: hlfir.assign %[[VAL_27]] to %[[VAL_30]] : i32, !fir.ref<i32>
+// CHECK: %[[VAL_2:.*]] = arith.constant -1 : index
+// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_4:.*]] = arith.constant 10 : index
+// CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_6:.*]] = arith.constant 10 : index
+// CHECK: %[[VAL_7:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_9:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_10:.*]] = arith.subi %[[VAL_6]], %[[VAL_8]] : index
+// CHECK: %[[VAL_11:.*]] = arith.addi %[[VAL_10]], %[[VAL_8]] : index
+// CHECK: %[[VAL_12:.*]] = arith.divsi %[[VAL_11]], %[[VAL_8]] : index
+// CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_9]] : index
+// CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_9]] : index
+// CHECK: %[[VAL_15:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_14]] {bindc_name = ".tmp.where", uniq_name = ""}
+// CHECK: %[[VAL_16:.*]] = fir.shape_shift %[[VAL_8]], %[[VAL_14]] : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]](%[[VAL_16]]) {uniq_name = ".tmp.where"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK: fir.do_loop %[[VAL_18:.*]] = %[[VAL_8]] to %[[VAL_6]] step %[[VAL_8]] {
+// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_18]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_21]] {
+// CHECK: %[[SLICE:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_4]]:%[[VAL_3]]:%[[VAL_2]]) shape %[[VAL_5]] : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<10xi32>>
+// CHECK: %[[VAL_22:.*]] = hlfir.designate %[[SLICE]] (%[[VAL_18]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref<i32>
+// CHECK: %[[VAL_24:.*]] = hlfir.designate %[[VAL_17]]#0 (%[[VAL_18]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_23]] to %[[VAL_24]] : i32, !fir.ref<i32>
// CHECK: }
// CHECK: }
-// CHECK: %[[VAL_31:.*]] = arith.constant 10 : index
-// CHECK: %[[VAL_32:.*]] = fir.shape %[[VAL_31]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_33:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_17]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: fir.do_loop %[[VAL_34:.*]] = %[[VAL_33]] to %[[VAL_31]] step %[[VAL_33]] {
-// CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_34]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
-// CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
-// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
-// CHECK: fir.if %[[VAL_37]] {
-// CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_39:.*]] = arith.addi %[[VAL_38]], %[[VAL_18]] : index
-// CHECK: fir.store %[[VAL_39]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_21]]#0 (%[[VAL_38]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<i32>
-// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_34]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
-// CHECK: hlfir.assign %[[VAL_41]] to %[[VAL_42]] : i32, !fir.ref<i32>
+// CHECK: %[[VAL_25:.*]] = arith.constant 10 : index
+// CHECK: %[[VAL_26:.*]] = fir.shape %[[VAL_25]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_27:.*]] = arith.constant 1 : index
+// CHECK: fir.do_loop %[[VAL_28:.*]] = %[[VAL_27]] to %[[VAL_25]] step %[[VAL_27]] {
+// CHECK: %[[VAL_29:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_28]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_29]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_31]] {
+// CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_17]]#0 (%[[VAL_28]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_32]] : !fir.ref<i32>
+// CHECK: %[[VAL_34:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_28]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_33]] to %[[VAL_34]] : i32, !fir.ref<i32>
// CHECK: }
// CHECK: }
-// CHECK: fir.freemem %[[VAL_19]] : !fir.heap<!fir.array<?xi32>>
+// CHECK: fir.freemem %[[VAL_15]] : !fir.heap<!fir.array<?xi32>>
// CHECK: return
// CHECK: }
diff --git a/flang/test/HLFIR/order_assignments/saving-mask-and-rhs.fir b/flang/test/HLFIR/order_assignments/saving-mask-and-rhs.fir
index 1eb86d09a39c1..3fe6ab7e087ad 100644
--- a/flang/test/HLFIR/order_assignments/saving-mask-and-rhs.fir
+++ b/flang/test/HLFIR/order_assignments/saving-mask-and-rhs.fir
@@ -42,13 +42,14 @@ func.func @saving_mask_and_rhs(%arg0: !fir.ref<!fir.array<10xi32>>) {
// Creating RHS temporary using the mask temporary (and not the hlfir.elemental)
// CHECK: %[[VAL_25:.*]] = fir.allocmem !fir.array<?xi32>, %{{.*}} {bindc_name = ".tmp.where", uniq_name = ""}
-// CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_25]]({{.*}}) {uniq_name = ".tmp.where"}
-// CHECK: fir.do_loop
+// CHECK: %[[RHS_SHAPE:.*]] = fir.shape_shift %{{.*}}, %{{.*}} : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_25]](%[[RHS_SHAPE]]) {uniq_name = ".tmp.where"}
+// CHECK: fir.do_loop %[[IV:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: %[[VAL_29:.*]] = hlfir.designate %[[VAL_14]]#0 ({{.*}})
// CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_29]] : !fir.ref<!fir.logical<4>>
// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
// CHECK: fir.if %[[VAL_31]] {
-// CHECK: %[[VAL_36:.*]] = hlfir.designate %[[VAL_27]]#0 ({{.*}})
+// CHECK: %[[VAL_36:.*]] = hlfir.designate %[[VAL_27]]#0 (%[[IV]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
// CHECK: hlfir.assign %{{.*}} to %[[VAL_36]] : i32, !fir.ref<i32>
// CHECK: }
// CHECK: }
@@ -91,14 +92,17 @@ func.func @forall_mask_and_rhs(%arg0: !fir.ref<!fir.array<10xi32>>) {
// CHECK-LABEL: func.func @forall_mask_and_rhs(
// CHECK: %[[VAL_18:.*]] = fir.allocmem !fir.array<?xi1>, %{{.*}} {bindc_name = ".tmp.forall", uniq_name = ""}
-// CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_18]](%{{.*}}) {uniq_name = ".tmp.forall"}
+// CHECK: %[[MASK_SHAPE:.*]] = fir.shape_shift %{{.*}}, %{{.*}} : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_18]](%[[MASK_SHAPE]]) {uniq_name = ".tmp.forall"}
// CHECK: %[[VAL_29:.*]] = fir.allocmem !fir.array<?xi32>, %{{.*}} {bindc_name = ".tmp.forall", uniq_name = ""}
-// CHECK: %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_29]](%{{.*}}) {uniq_name = ".tmp.forall"}
+// CHECK: %[[RHS_SHAPE:.*]] = fir.shape_shift %{{.*}}, %{{.*}} : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_29]](%[[RHS_SHAPE]]) {uniq_name = ".tmp.forall"}
+// CHECK: fir.do_loop %[[IV:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: %[[VAL_36:.*]] = arith.cmpi sgt, %{{.*}}, %{{.*}} : i32
-// CHECK: %[[VAL_39:.*]] = hlfir.designate %[[VAL_20]]#0 (%{{.*}})
+// CHECK: %[[VAL_39:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[IV]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
// CHECK: hlfir.assign %[[VAL_36]] to %[[VAL_39]] : i1, !fir.ref<i1>
// CHECK: fir.if %[[VAL_36]] {
-// CHECK: %[[VAL_45:.*]] = hlfir.designate %[[VAL_31]]#0 (%{{.*}})
+// CHECK: %[[VAL_45:.*]] = hlfir.designate %[[VAL_31]]#0 (%[[IV]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
// CHECK: hlfir.assign %{{.*}} to %[[VAL_45]] : i32, !fir.ref<i32>
// CHECK: }
// CHECK: }
diff --git a/flang/test/HLFIR/order_assignments/user-defined-assignment.fir b/flang/test/HLFIR/order_assignments/user-defined-assignment.fir
index e64b3ef362cee..83411279d0731 100644
--- a/flang/test/HLFIR/order_assignments/user-defined-assignment.fir
+++ b/flang/test/HLFIR/order_assignments/user-defined-assignment.fir
@@ -138,7 +138,6 @@ func.func @test_scalar_forall_overlap(%i: !fir.ref<!fir.array<10xi32>>) {
}
// CHECK-LABEL: func.func @test_scalar_forall_overlap(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<10xi32>>) {
-// CHECK: %[[VAL_1:.*]] = fir.alloca index
// CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32
// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index
// CHECK: %[[VAL_4:.*]] = arith.constant 10 : index
@@ -150,30 +149,20 @@ func.func @test_scalar_forall_overlap(%i: !fir.ref<!fir.array<10xi32>>) {
// CHECK: %[[VAL_10:.*]] = arith.divsi %[[VAL_9]], %[[VAL_6]] : index
// CHECK: %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_10]], %[[VAL_7]] : index
// CHECK: %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_10]], %[[VAL_7]] : index
-// CHECK: %[[VAL_13:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_13]] to %[[VAL_1]] : !fir.ref<index>
// CHECK: %[[VAL_15:.*]] = fir.allocmem !fir.array<?xi1>, %[[VAL_12]] {bindc_name = ".tmp.forall", uniq_name = ""}
-// CHECK: %[[VAL_16:.*]] = fir.shape %[[VAL_12]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]](%[[VAL_16]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi1>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi1>>, !fir.heap<!fir.array<?xi1>>)
+// CHECK: %[[VAL_16:.*]] = fir.shape_shift %[[VAL_3]], %[[VAL_12]] : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]](%[[VAL_16]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi1>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi1>>, !fir.heap<!fir.array<?xi1>>)
// CHECK: fir.do_loop %[[VAL_18:.*]] = %[[VAL_3]] to %[[VAL_4]] step %[[VAL_6]] {
// CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_5]], %[[VAL_18]] : index
// CHECK: %[[VAL_20:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_19]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
// CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<i32>
// CHECK: %[[VAL_22:.*]] = arith.cmpi slt, %[[VAL_21]], %[[VAL_2]] : i32
-// CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_23]], %[[VAL_14]] : index
-// CHECK: fir.store %[[VAL_24]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_25:.*]] = hlfir.designate %[[VAL_17]]#0 (%[[VAL_23]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
+// CHECK: %[[VAL_25:.*]] = hlfir.designate %[[VAL_17]]#0 (%[[VAL_18]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
// CHECK: hlfir.assign %[[VAL_22]] to %[[VAL_25]] : i1, !fir.ref<i1>
// CHECK: }
// CHECK: %[[VAL_26:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_13]] to %[[VAL_1]] : !fir.ref<index>
// CHECK: fir.do_loop %[[VAL_27:.*]] = %[[VAL_3]] to %[[VAL_4]] step %[[VAL_26]] {
-// CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_29:.*]] = arith.addi %[[VAL_28]], %[[VAL_14]] : index
-// CHECK: fir.store %[[VAL_29]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_30:.*]] = hlfir.designate %[[VAL_17]]#0 (%[[VAL_28]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
+// CHECK: %[[VAL_30:.*]] = hlfir.designate %[[VAL_17]]#0 (%[[VAL_27]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
// CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_30]] : !fir.ref<i1>
// CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_27]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
// CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_31]] : (i1) -> !fir.logical<4>
>From 5ae60ab61a93cf2ae18046a98c349eccdbdad9ba Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Thu, 30 Apr 2026 07:13:34 -0700
Subject: [PATCH 2/2] do not use new temps with forall with non unit strides
---
.../LowerHLFIROrderedAssignments.cpp | 85 ++++++++++-------
.../HLFIR/order_assignments/array-temp.fir | 95 +++++++++++++++++++
2 files changed, 147 insertions(+), 33 deletions(-)
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
index 08a43e476c0cd..5ad69c2b5cafe 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
@@ -260,7 +260,7 @@ class OrderedAssignmentRewriter {
/// Return the induction variables of the enclosing fir.do_loop nest at the
/// current insertion point, innermost first (same order as
- /// currentLoopNestIterationNumberCanBeComputed). Used to index ArrayTemp.
+ /// currentLoopNestIterationNumberCanBeComputed).
llvm::SmallVector<mlir::Value> getLoopIndices();
template <typename T>
@@ -672,8 +672,7 @@ OrderedAssignmentRewriter::getIfSaved(mlir::Region ®ion) {
if (auto savedInSameRun = savedInCurrentRunBeforeUse.find(®ion);
savedInSameRun != savedInCurrentRunBeforeUse.end())
return savedInSameRun->second;
- // If the region was saved in a previous run, fetch the saved value. The
- // loop indices are only used by ArrayTemp; the other temps ignore them.
+ // If the region was saved in a previous run, fetch the saved value.
if (auto temp = savedEntities.find(®ion); temp != savedEntities.end()) {
doBeforeLoopNest([&]() { temp->second.resetFetchPosition(loc, builder); });
return ValueAndCleanUp{temp->second.fetch(loc, builder, getLoopIndices()),
@@ -1117,22 +1116,42 @@ computeLoopNestIterationNumber(mlir::Location loc, fir::FirOpBuilder &builder,
return loopExtent;
}
+/// If \p value is a compile-time integer constant (possibly hidden behind
+/// fir.convert ops), return its value. Otherwise return std::nullopt.
+static std::optional<int64_t> unwrapConstantInt(mlir::Value value) {
+ while (auto convert = value.getDefiningOp<fir::ConvertOp>())
+ value = convert.getValue();
+ return fir::getIntIfConstant(value);
+}
+
/// Compute the extents and lower bounds of \p loopNest, in the same order as
-/// \p loopNest (innermost first).
-static void computeLoopNestExtentsAndLowerBounds(
+/// \p loopNest (innermost first). The lower bound of each dimension is the
+/// smallest induction variable value, so that the loop induction variable
+/// can directly index the temp via fir.shape_shift. This only works when
+/// every loop has a unit step: for step +1 the smallest iv is the loop's
+/// lower bound; for step -1 it is the loop's upper bound. Returns false
+/// (with \p extents and \p lowerBounds left in an unspecified state) when
+/// any loop has a non-unit or non-constant step, signalling that the caller
+/// should fall back to a counter-based temp.
+static bool computeLoopNestExtentsAndLowerBounds(
mlir::Location loc, fir::FirOpBuilder &builder,
llvm::ArrayRef<fir::DoLoopOp> loopNest,
llvm::SmallVectorImpl<mlir::Value> &extents,
llvm::SmallVectorImpl<mlir::Value> &lowerBounds) {
- extents.reserve(extents.size() + loopNest.size());
- lowerBounds.reserve(lowerBounds.size() + loopNest.size());
+ extents.reserve(loopNest.size());
+ lowerBounds.reserve(loopNest.size());
for (fir::DoLoopOp doLoop : loopNest) {
+ auto step = unwrapConstantInt(doLoop.getStep());
+ if (!step || std::abs(*step) != 1)
+ return false;
mlir::Value extent = builder.genExtentFromTriplet(
loc, doLoop.getLowerBound(), doLoop.getUpperBound(), doLoop.getStep(),
builder.getIndexType());
extents.push_back(extent);
- lowerBounds.push_back(doLoop.getLowerBound());
+ lowerBounds.push_back(*step == 1 ? doLoop.getLowerBound()
+ : doLoop.getUpperBound());
}
+ return true;
}
llvm::SmallVector<mlir::Value> OrderedAssignmentRewriter::getLoopIndices() {
@@ -1207,32 +1226,32 @@ void OrderedAssignmentRewriter::generateSaveEntity(
// multidimensional ArrayTemp (indexed by loop induction variables) so
// there is no loop-carried counter. Fall back to the 1D counter-based
// HomogeneousScalarStack when the nest is deeper than the maximum
- // fir.array rank.
- if (loopShapeCanBePreComputed && fir::isa_trivial(entityType)) {
- if (loopNest.size() <= static_cast<size_t>(Fortran::common::maxRank)) {
- llvm::SmallVector<mlir::Value> tempExtents;
- llvm::SmallVector<mlir::Value> tempLowerBounds;
+ // fir.array rank or when any loop has a non-unit/non-constant step
+ // (in which case the loop induction variable cannot index the temp
+ // directly).
+ llvm::SmallVector<mlir::Value> tempExtents;
+ llvm::SmallVector<mlir::Value> tempLowerBounds;
+ if (loopShapeCanBePreComputed && fir::isa_trivial(entityType) &&
+ loopNest.size() <= static_cast<size_t>(Fortran::common::maxRank) &&
computeLoopNestExtentsAndLowerBounds(loc, builder, loopNest,
- tempExtents, tempLowerBounds);
- auto sequenceType = mlir::cast<fir::SequenceType>(
- builder.getVarLenSeqTy(entityType, /*rank=*/loopNest.size()));
- temp = insertSavedEntity(
- region,
- fir::factory::ArrayTemp{loc, builder, sequenceType, tempExtents,
- tempLowerBounds, /*lengths=*/{},
- allocateOnHeap, tempName});
- } else {
- mlir::Value loopExtent =
- computeLoopNestIterationNumber(loc, builder, loopNest);
- auto sequenceType =
- mlir::cast<fir::SequenceType>(builder.getVarLenSeqTy(entityType));
- temp = insertSavedEntity(region,
- fir::factory::HomogeneousScalarStack{
- loc, builder, sequenceType, loopExtent,
- /*lenParams=*/{}, allocateOnHeap,
- /*stackThroughLoops=*/true, tempName});
- }
-
+ tempExtents, tempLowerBounds)) {
+ auto sequenceType = mlir::cast<fir::SequenceType>(
+ builder.getVarLenSeqTy(entityType, /*rank=*/loopNest.size()));
+ temp = insertSavedEntity(
+ region,
+ fir::factory::ArrayTemp{loc, builder, sequenceType, tempExtents,
+ tempLowerBounds,
+ /*lengths=*/{}, allocateOnHeap, tempName});
+ } else if (loopShapeCanBePreComputed && fir::isa_trivial(entityType)) {
+ mlir::Value loopExtent =
+ computeLoopNestIterationNumber(loc, builder, loopNest);
+ auto sequenceType =
+ mlir::cast<fir::SequenceType>(builder.getVarLenSeqTy(entityType));
+ temp = insertSavedEntity(region,
+ fir::factory::HomogeneousScalarStack{
+ loc, builder, sequenceType, loopExtent,
+ /*lenParams=*/{}, allocateOnHeap,
+ /*stackThroughLoops=*/true, tempName});
} else {
// If the number of iteration is not known, or if the values at each
// iterations are values that may have different shape, type parameters
diff --git a/flang/test/HLFIR/order_assignments/array-temp.fir b/flang/test/HLFIR/order_assignments/array-temp.fir
index 5510d0220397a..75d7b7a781127 100644
--- a/flang/test/HLFIR/order_assignments/array-temp.fir
+++ b/flang/test/HLFIR/order_assignments/array-temp.fir
@@ -110,3 +110,98 @@ func.func @nested_forall_2d(%arr: !fir.box<!fir.array<?x?xi32>>) {
// CHECK: }
// CHECK: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<?x?xi32>>
// CHECK: return
+
+// Forall with a negative unit step. The temp's lower bound must be the
+// loop's upper bound (the smaller end of the iv range), not the loop's
+// lower bound (which is the start, i.e. the largest iv value); otherwise
+// the loop induction variable falls below the temp's domain on every
+// iteration but the first.
+func.func @forall_negative_step(%arg0: !fir.ref<!fir.array<10xi32>>) {
+ %c-1 = arith.constant -1 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %c11 = arith.constant 11 : index
+ %0 = fir.shape %c10 : (index) -> !fir.shape<1>
+ %1:2 = hlfir.declare %arg0(%0) {uniq_name = "x"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+ hlfir.forall lb {
+ hlfir.yield %c10 : index
+ } ub {
+ hlfir.yield %c1 : index
+ } step {
+ hlfir.yield %c-1 : index
+ } (%i: index) {
+ hlfir.region_assign {
+ %rev = arith.subi %c11, %i : index
+ %addr = hlfir.designate %1#0 (%rev) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+ %val = fir.load %addr : !fir.ref<i32>
+ hlfir.yield %val : i32
+ } to {
+ %addr = hlfir.designate %1#0 (%i) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+ hlfir.yield %addr : !fir.ref<i32>
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @forall_negative_step(
+// CHECK-NOT: fir.alloca index
+// The do_loop runs from the loop's lb (c10) down to its ub (c1) with step
+// c-1. The temp's lower bound is the loop's upper bound (c1), not its
+// lower bound, so all iv values [1..10] fit within the temp's domain.
+// CHECK: %[[C_NEG1:.*]] = arith.constant -1 : index
+// CHECK: %[[C1:.*]] = arith.constant 1 : index
+// CHECK: %[[C10:.*]] = arith.constant 10 : index
+// CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<?xi32>, %{{.*}} {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[SHAPE:.*]] = fir.shape_shift %[[C1]], %{{.*}} : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]](%[[SHAPE]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK: fir.do_loop %[[IV0:.*]] = %[[C10]] to %[[C1]] step %[[C_NEG1]] {
+// CHECK: %[[ADDR0:.*]] = hlfir.designate %[[DECL]]#0 (%[[IV0]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %{{.*}} to %[[ADDR0]] : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: fir.do_loop %[[IV1:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: %[[ADDR1:.*]] = hlfir.designate %[[DECL]]#0 (%[[IV1]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[V:.*]] = fir.load %[[ADDR1]] : !fir.ref<i32>
+// CHECK: hlfir.assign %[[V]] to %{{.*}} : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<?xi32>>
+// CHECK: return
+
+// Forall with a non-unit step (step=2): the loop iv values are
+// non-contiguous (1, 3, 5, 7, 9), so the loop induction variable cannot
+// directly index a contiguous fir.shape_shift temp. We must fall back to
+// the 1D counter-based HomogeneousScalarStack.
+func.func @forall_non_unit_step(%arg0: !fir.ref<!fir.array<10xi32>>) {
+ %c1 = arith.constant 1 : index
+ %c2 = arith.constant 2 : index
+ %c9 = arith.constant 9 : index
+ %c11 = arith.constant 11 : index
+ %0 = fir.shape %c9 : (index) -> !fir.shape<1>
+ %1:2 = hlfir.declare %arg0(%0) {uniq_name = "x"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+ hlfir.forall lb {
+ hlfir.yield %c1 : index
+ } ub {
+ hlfir.yield %c9 : index
+ } step {
+ hlfir.yield %c2 : index
+ } (%i: index) {
+ hlfir.region_assign {
+ %rev = arith.subi %c11, %i : index
+ %addr = hlfir.designate %1#0 (%rev) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+ %val = fir.load %addr : !fir.ref<i32>
+ hlfir.yield %val : i32
+ } to {
+ %addr = hlfir.designate %1#0 (%i) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+ hlfir.yield %addr : !fir.ref<i32>
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @forall_non_unit_step(
+// Counter-based HomogeneousScalarStack: a fir.alloca index counter and a
+// plain fir.shape (no shift), with the temp indexed through the counter.
+// CHECK: %[[CTR:.*]] = fir.alloca index
+// CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<?xi32>, %{{.*}} {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
+// CHECK: hlfir.declare %[[ALLOC]](%[[SHAPE]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK: fir.load %[[CTR]] : !fir.ref<index>
+// CHECK: arith.addi %{{.*}}, %{{.*}} : index
+// CHECK: fir.store %{{.*}} to %[[CTR]] : !fir.ref<index>
More information about the flang-commits
mailing list