[flang-commits] [flang] dc79e2a - [flang] avoid introducing iteration dependencies in WHERE and FORALL temporaries (#195053)
via flang-commits
flang-commits at lists.llvm.org
Mon May 4 01:50:19 PDT 2026
Author: jeanPerier
Date: 2026-05-04T10:50:15+02:00
New Revision: dc79e2a9a1c6809c940ef87c405ed8590768cceb
URL: https://github.com/llvm/llvm-project/commit/dc79e2a9a1c6809c940ef87c405ed8590768cceb
DIFF: https://github.com/llvm/llvm-project/commit/dc79e2a9a1c6809c940ef87c405ed8590768cceb.diff
LOG: [flang] avoid introducing iteration dependencies in WHERE and FORALL temporaries (#195053)
This patch improves the addressing of temporaries created when needed for simple FORALL or WHERE as below to not introduce iteration dependencies.
```
subroutine foo(p1, p2, mask)
real, pointer :: p1(:), p2(:)
logical :: mask(:)
where (mask) p1 = p2
end subroutine
```
Instead of using a stack like temporary that uses a counter to push and fetch elements, the loop IVs are directly used to address the temporaries. This makes it easier to later vectorize or parallelize those loops.
This is only done when:
- This is not a FORALL with array expressions
- The dynamic type is the same at each iterations
- The WHERE and FORALL do not create loops of depth more than 15.
- If there are FORALLs, their strides are constants 1 or -1.
Note that only the addressing is impacted, the stack-like approach already allocated a temporary big enough for all the iterations regardless of the masking. So the temporary size will remain the same.
Assisted by: Claude
Added:
flang/test/HLFIR/order_assignments/array-temp-many-forall.f90
flang/test/HLFIR/order_assignments/array-temp.fir
Modified:
flang/include/flang/Optimizer/Builder/TemporaryStorage.h
flang/lib/Optimizer/Builder/TemporaryStorage.cpp
flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
flang/test/HLFIR/order_assignments/impure-where.fir
flang/test/HLFIR/order_assignments/inlined-stack-temp.fir
flang/test/HLFIR/order_assignments/saving-mask-and-rhs.fir
flang/test/HLFIR/order_assignments/user-defined-assignment.fir
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h
index cdb23a64c5c8a..e1edc5912ae97 100644
--- a/flang/include/flang/Optimizer/Builder/TemporaryStorage.h
+++ b/flang/include/flang/Optimizer/Builder/TemporaryStorage.h
@@ -19,6 +19,7 @@
#ifndef FORTRAN_OPTIMIZER_BUILDER_TEMPORARYSTORAGE_H
#define FORTRAN_OPTIMIZER_BUILDER_TEMPORARYSTORAGE_H
+#include "flang/Common/idioms.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
namespace fir {
@@ -98,6 +99,34 @@ class HomogeneousScalarStack {
mlir::Value temp;
};
+/// Multidimensional temporary indexed directly by the enclosing loop induction
+/// variables (innermost loop is the first dimension). The indices passed to
+/// pushValue/fetch are interpreted in the array's domain, which is described
+/// by a fir.shape_shift built from the loop extents and lower bounds. This
+/// avoids the loop-carried counter used by HomogeneousScalarStack, keeping
+/// loop iterations independent. Limited to Fortran::common::maxRank dimensions.
+class ArrayTemp {
+public:
+ ArrayTemp(mlir::Location loc, fir::FirOpBuilder &builder,
+ fir::SequenceType declaredType, llvm::ArrayRef<mlir::Value> extents,
+ llvm::ArrayRef<mlir::Value> lowerBounds,
+ llvm::ArrayRef<mlir::Value> lengths, bool allocateOnHeap,
+ llvm::StringRef name);
+
+ void pushValue(mlir::Location loc, fir::FirOpBuilder &builder,
+ mlir::Value value, mlir::ValueRange indices);
+ void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder) {}
+ mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder,
+ mlir::ValueRange indices);
+ void destroy(mlir::Location loc, fir::FirOpBuilder &builder);
+ bool canBeFetchedAfterPush() const { return true; }
+
+private:
+ const bool allocateOnHeap;
+ mlir::Value temp;
+ llvm::SmallVector<mlir::Value> typeParams;
+};
+
/// Structure to hold the value of a single entity.
class SimpleCopy {
public:
@@ -255,16 +284,26 @@ class TemporaryStorage {
TemporaryStorage(T &&impl) : impl{std::forward<T>(impl)} {}
void pushValue(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::Value value) {
- std::visit([&](auto &temp) { temp.pushValue(loc, builder, value); }, impl);
+ mlir::Value value, mlir::ValueRange indices = {}) {
+ // Only ArrayTemp uses the loop indices; other temps don't take them.
+ std::visit(Fortran::common::visitors{
+ [&](ArrayTemp &temp) {
+ temp.pushValue(loc, builder, value, indices);
+ },
+ [&](auto &temp) { temp.pushValue(loc, builder, value); }},
+ impl);
}
void resetFetchPosition(mlir::Location loc, fir::FirOpBuilder &builder) {
std::visit([&](auto &temp) { temp.resetFetchPosition(loc, builder); },
impl);
}
- mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder) {
- return std::visit([&](auto &temp) { return temp.fetch(loc, builder); },
- impl);
+ mlir::Value fetch(mlir::Location loc, fir::FirOpBuilder &builder,
+ mlir::ValueRange indices = {}) {
+ return std::visit(
+ Fortran::common::visitors{
+ [&](ArrayTemp &temp) { return temp.fetch(loc, builder, indices); },
+ [&](auto &temp) { return temp.fetch(loc, builder); }},
+ impl);
}
void destroy(mlir::Location loc, fir::FirOpBuilder &builder) {
std::visit([&](auto &temp) { temp.destroy(loc, builder); }, impl);
@@ -282,8 +321,9 @@ class TemporaryStorage {
}
private:
- std::variant<HomogeneousScalarStack, SimpleCopy, SSARegister, AnyValueStack,
- AnyVariableStack, AnyVectorSubscriptStack, AnyAddressStack>
+ std::variant<HomogeneousScalarStack, ArrayTemp, SimpleCopy, SSARegister,
+ AnyValueStack, AnyVariableStack, AnyVectorSubscriptStack,
+ AnyAddressStack>
impl;
};
} // namespace fir::factory
diff --git a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp
index 5db40aff91878..0233fc9f023de 100644
--- a/flang/lib/Optimizer/Builder/TemporaryStorage.cpp
+++ b/flang/lib/Optimizer/Builder/TemporaryStorage.cpp
@@ -134,6 +134,88 @@ hlfir::Entity fir::factory::HomogeneousScalarStack::moveStackAsArrayExpr(
return hlfir::Entity{hlfirExpr};
}
+//===----------------------------------------------------------------------===//
+// fir::factory::ArrayTemp implementation.
+//===----------------------------------------------------------------------===//
+
+fir::factory::ArrayTemp::ArrayTemp(mlir::Location loc,
+ fir::FirOpBuilder &builder,
+ fir::SequenceType declaredType,
+ llvm::ArrayRef<mlir::Value> extents,
+ llvm::ArrayRef<mlir::Value> lowerBounds,
+ llvm::ArrayRef<mlir::Value> lengths,
+ bool allocateOnHeap, llvm::StringRef name)
+ : allocateOnHeap{allocateOnHeap},
+ typeParams{lengths.begin(), lengths.end()} {
+ assert(extents.size() == lowerBounds.size() &&
+ "extents and lowerBounds must have the same size");
+ assert(extents.size() == declaredType.getDimension() &&
+ "declared type rank must match the number of extents");
+ mlir::Value tempStorage;
+ if (allocateOnHeap)
+ tempStorage =
+ builder.createHeapTemporary(loc, declaredType, name, extents, lengths);
+ else
+ tempStorage =
+ builder.createTemporary(loc, declaredType, name, extents, lengths);
+ // Use a fir.shape_shift so the temp's lower bounds match the loop bounds:
+ // the indices passed to pushValue/fetch can then index it directly.
+ mlir::Value shape = builder.genShape(loc, lowerBounds, extents);
+ temp =
+ hlfir::DeclareOp::create(builder, loc, tempStorage, name, shape, lengths)
+ .getBase();
+}
+
+/// Generate an hlfir.designate on \p temp for the element at \p indices. The
+/// indices are interpreted in the temp's array domain (matching its lower
+/// bounds, which were set from the enclosing loop bounds).
+static mlir::Value genArrayTempElementAddr(mlir::Location loc,
+ fir::FirOpBuilder &builder,
+ mlir::Value temp,
+ mlir::ValueRange indices,
+ mlir::ValueRange typeParams) {
+ hlfir::Entity entity{temp};
+ mlir::Type refTy = fir::ReferenceType::get(entity.getFortranElementType());
+ mlir::Type idxTy = builder.getIndexType();
+ llvm::SmallVector<mlir::Value> idxs;
+ idxs.reserve(indices.size());
+ for (mlir::Value idx : indices)
+ idxs.push_back(builder.createConvert(loc, idxTy, idx));
+ return hlfir::DesignateOp::create(builder, loc, refTy, temp, idxs,
+ typeParams);
+}
+
+void fir::factory::ArrayTemp::pushValue(mlir::Location loc,
+ fir::FirOpBuilder &builder,
+ mlir::Value value,
+ mlir::ValueRange indices) {
+ hlfir::Entity entity{value};
+ assert(entity.isScalar() && "cannot use ArrayTemp with array");
+ // Match HomogeneousScalarStack: derived types go through the runtime path.
+ if (!entity.hasIntrinsicType())
+ TODO(loc, "creating ArrayTemp for derived types");
+ mlir::Value addr =
+ genArrayTempElementAddr(loc, builder, temp, indices, typeParams);
+ hlfir::AssignOp::create(builder, loc, value, addr);
+}
+
+mlir::Value fir::factory::ArrayTemp::fetch(mlir::Location loc,
+ fir::FirOpBuilder &builder,
+ mlir::ValueRange indices) {
+ mlir::Value addr =
+ genArrayTempElementAddr(loc, builder, temp, indices, typeParams);
+ return hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{addr});
+}
+
+void fir::factory::ArrayTemp::destroy(mlir::Location loc,
+ fir::FirOpBuilder &builder) {
+ if (allocateOnHeap) {
+ auto declare = temp.getDefiningOp<hlfir::DeclareOp>();
+ assert(declare && "temp must have been declared");
+ fir::FreeMemOp::create(builder, loc, declare.getMemref());
+ }
+}
+
//===----------------------------------------------------------------------===//
// fir::factory::SimpleCopy implementation.
//===----------------------------------------------------------------------===//
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
index a3fd19d95fbbc..5ad69c2b5cafe 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
@@ -18,6 +18,7 @@
//===----------------------------------------------------------------------===//
#include "ScheduleOrderedAssignments.h"
+#include "flang/Common/Fortran-consts.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Builder/TemporaryStorage.h"
@@ -257,6 +258,11 @@ class OrderedAssignmentRewriter {
bool currentLoopNestIterationNumberCanBeComputed(
llvm::SmallVectorImpl<fir::DoLoopOp> &loopNest);
+ /// Return the induction variables of the enclosing fir.do_loop nest at the
+ /// current insertion point, innermost first (same order as
+ /// currentLoopNestIterationNumberCanBeComputed).
+ llvm::SmallVector<mlir::Value> getLoopIndices();
+
template <typename T>
fir::factory::TemporaryStorage *insertSavedEntity(mlir::Region ®ion,
T &&temp) {
@@ -669,7 +675,8 @@ OrderedAssignmentRewriter::getIfSaved(mlir::Region ®ion) {
// If the region was saved in a previous run, fetch the saved value.
if (auto temp = savedEntities.find(®ion); temp != savedEntities.end()) {
doBeforeLoopNest([&]() { temp->second.resetFetchPosition(loc, builder); });
- return ValueAndCleanUp{temp->second.fetch(loc, builder), std::nullopt};
+ return ValueAndCleanUp{temp->second.fetch(loc, builder, getLoopIndices()),
+ std::nullopt};
}
return std::nullopt;
}
@@ -1109,6 +1116,61 @@ computeLoopNestIterationNumber(mlir::Location loc, fir::FirOpBuilder &builder,
return loopExtent;
}
+/// If \p value is a compile-time integer constant (possibly hidden behind
+/// fir.convert ops), return its value. Otherwise return std::nullopt.
+static std::optional<int64_t> unwrapConstantInt(mlir::Value value) {
+ while (auto convert = value.getDefiningOp<fir::ConvertOp>())
+ value = convert.getValue();
+ return fir::getIntIfConstant(value);
+}
+
+/// Compute the extents and lower bounds of \p loopNest, in the same order as
+/// \p loopNest (innermost first). The lower bound of each dimension is the
+/// smallest induction variable value, so that the loop induction variable
+/// can directly index the temp via fir.shape_shift. This only works when
+/// every loop has a unit step: for step +1 the smallest iv is the loop's
+/// lower bound; for step -1 it is the loop's upper bound. Returns false
+/// (with \p extents and \p lowerBounds left in an unspecified state) when
+/// any loop has a non-unit or non-constant step, signalling that the caller
+/// should fall back to a counter-based temp.
+static bool computeLoopNestExtentsAndLowerBounds(
+ mlir::Location loc, fir::FirOpBuilder &builder,
+ llvm::ArrayRef<fir::DoLoopOp> loopNest,
+ llvm::SmallVectorImpl<mlir::Value> &extents,
+ llvm::SmallVectorImpl<mlir::Value> &lowerBounds) {
+ extents.reserve(loopNest.size());
+ lowerBounds.reserve(loopNest.size());
+ for (fir::DoLoopOp doLoop : loopNest) {
+ auto step = unwrapConstantInt(doLoop.getStep());
+ if (!step || std::abs(*step) != 1)
+ return false;
+ mlir::Value extent = builder.genExtentFromTriplet(
+ loc, doLoop.getLowerBound(), doLoop.getUpperBound(), doLoop.getStep(),
+ builder.getIndexType());
+ extents.push_back(extent);
+ lowerBounds.push_back(*step == 1 ? doLoop.getLowerBound()
+ : doLoop.getUpperBound());
+ }
+ return true;
+}
+
+llvm::SmallVector<mlir::Value> OrderedAssignmentRewriter::getLoopIndices() {
+ llvm::SmallVector<mlir::Value> indices;
+ if (constructStack.empty())
+ return indices;
+ mlir::Operation *outerLoop = constructStack[0];
+ mlir::Operation *currentConstruct = constructStack.back();
+ while (currentConstruct) {
+ if (auto doLoop = mlir::dyn_cast<fir::DoLoopOp>(currentConstruct))
+ indices.push_back(doLoop.getInductionVar());
+ if (currentConstruct == outerLoop)
+ currentConstruct = nullptr;
+ else
+ currentConstruct = currentConstruct->getParentOp();
+ }
+ return indices;
+}
+
/// Return a name for temporary storage that indicates in which context
/// the temporary storage was created.
static llvm::StringRef
@@ -1160,11 +1222,27 @@ void OrderedAssignmentRewriter::generateSaveEntity(
bool loopShapeCanBePreComputed =
currentLoopNestIterationNumberCanBeComputed(loopNest);
doBeforeLoopNest([&] {
- /// For simple scalars inside loops whose total iteration number can be
- /// pre-computed, create a rank-1 array outside of the loops. It will be
- /// assigned/fetched inside the loops like a normal Fortran array given
- /// the iteration count.
- if (loopShapeCanBePreComputed && fir::isa_trivial(entityType)) {
+ // For simple scalars in a precomputable loop nest, prefer the
+ // multidimensional ArrayTemp (indexed by loop induction variables) so
+ // there is no loop-carried counter. Fall back to the 1D counter-based
+ // HomogeneousScalarStack when the nest is deeper than the maximum
+ // fir.array rank or when any loop has a non-unit/non-constant step
+ // (in which case the loop induction variable cannot index the temp
+ // directly).
+ llvm::SmallVector<mlir::Value> tempExtents;
+ llvm::SmallVector<mlir::Value> tempLowerBounds;
+ if (loopShapeCanBePreComputed && fir::isa_trivial(entityType) &&
+ loopNest.size() <= static_cast<size_t>(Fortran::common::maxRank) &&
+ computeLoopNestExtentsAndLowerBounds(loc, builder, loopNest,
+ tempExtents, tempLowerBounds)) {
+ auto sequenceType = mlir::cast<fir::SequenceType>(
+ builder.getVarLenSeqTy(entityType, /*rank=*/loopNest.size()));
+ temp = insertSavedEntity(
+ region,
+ fir::factory::ArrayTemp{loc, builder, sequenceType, tempExtents,
+ tempLowerBounds,
+ /*lengths=*/{}, allocateOnHeap, tempName});
+ } else if (loopShapeCanBePreComputed && fir::isa_trivial(entityType)) {
mlir::Value loopExtent =
computeLoopNestIterationNumber(loc, builder, loopNest);
auto sequenceType =
@@ -1174,7 +1252,6 @@ void OrderedAssignmentRewriter::generateSaveEntity(
loc, builder, sequenceType, loopExtent,
/*lenParams=*/{}, allocateOnHeap,
/*stackThroughLoops=*/true, tempName});
-
} else {
// If the number of iteration is not known, or if the values at each
// iterations are values that may have
diff erent shape, type parameters
@@ -1185,8 +1262,8 @@ void OrderedAssignmentRewriter::generateSaveEntity(
}
});
// Inside the loop nest (and any fir.if if there are active masks), copy
- // the value to the temp and do clean-ups for the value if any.
- temp->pushValue(loc, builder, entity);
+ // the value to the temp and do clean-ups of the value if any.
+ temp->pushValue(loc, builder, entity, getLoopIndices());
}
// Delay the clean-up if the entity will be used in the same run (i.e., the
diff --git a/flang/test/HLFIR/order_assignments/array-temp-many-forall.f90 b/flang/test/HLFIR/order_assignments/array-temp-many-forall.f90
new file mode 100644
index 0000000000000..0078cf4e5a446
--- /dev/null
+++ b/flang/test/HLFIR/order_assignments/array-temp-many-forall.f90
@@ -0,0 +1,45 @@
+! Test that the lower-hlfir-ordered-assignments pass falls back to the
+! 1D HomogeneousScalarStack temporary (counter-based) when the FORALL loop
+! nest is deeper than Fortran::common::maxRank (15), because fir.array can
+! only hold up to maxRank dimensions.
+!
+! Below maxRank, the new ArrayTemp is used and there is no counter; here we
+! verify the opposite: the counter (a fir.alloca index, fir.load/addi/store
+! pattern) is restored when the loop nest has 16 levels.
+!
+! The test uses a rank-8 array of derived type with a rank-8 array component
+! to spread 16 indexable dimensions across the FORALL header.
+!
+! RUN: bbc -emit-hlfir -o - %s | fir-opt --lower-hlfir-ordered-assignments | FileCheck %s
+
+module many_forall_mod
+ type :: t
+ real :: c(2,2,2,2,2,2,2,2)
+ end type
+contains
+ subroutine more_than_15_forall(a)
+ type(t), intent(inout) :: a(2,2,2,2,2,2,2,2)
+ forall (i1=1:2, i2=1:2, i3=1:2, i4=1:2, i5=1:2, i6=1:2, i7=1:2, i8=1:2, &
+ j1=1:2, j2=1:2, j3=1:2, j4=1:2, j5=1:2, j6=1:2, j7=1:2, j8=1:2)
+ a(i1,i2,i3,i4,i5,i6,i7,i8)%c(j1,j2,j3,j4,j5,j6,j7,j8) = &
+ a(3-i1,3-i2,3-i3,3-i4,3-i5,3-i6,3-i7,3-i8)%c(3-j1,3-j2,3-j3,3-j4,3-j5,3-j6,3-j7,3-j8)
+ end forall
+ end subroutine
+end module
+! With 16 nested loops, the temporary must be the 1D counter-based form
+! (HomogeneousScalarStack) instead of a 16D ArrayTemp, since fir.array is
+! limited to Fortran::common::maxRank dimensions.
+!
+! CHECK-LABEL: func.func @_QMmany_forall_modPmore_than_15_forall(
+! There must be a counter in memory (fir.alloca index).
+! CHECK: %[[CTR:.*]] = fir.alloca index
+! The temporary is a 1D fir.array<?xf32>.
+! CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<?xf32>, %{{.*}} {bindc_name = ".tmp.forall", uniq_name = ""}
+! Plain fir.shape (no shift), since the temp is indexed by the counter.
+! CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
+! CHECK: hlfir.declare %[[ALLOC]](%[[SHAPE]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf32>>, !fir.heap<!fir.array<?xf32>>)
+! Inside the loop nest the counter is incremented and the temp is indexed
+! through the counter (not directly through the loop induction variables).
+! CHECK: fir.load %[[CTR]] : !fir.ref<index>
+! CHECK: arith.addi %{{.*}}, %{{.*}} : index
+! CHECK: fir.store %{{.*}} to %[[CTR]] : !fir.ref<index>
diff --git a/flang/test/HLFIR/order_assignments/array-temp.fir b/flang/test/HLFIR/order_assignments/array-temp.fir
new file mode 100644
index 0000000000000..75d7b7a781127
--- /dev/null
+++ b/flang/test/HLFIR/order_assignments/array-temp.fir
@@ -0,0 +1,207 @@
+// Test that hlfir.where/hlfir.forall temporary storages used for simple
+// scalar values are lowered to a multidimensional ArrayTemp directly indexed
+// by the enclosing loop induction variables (using hlfir.designate on a
+// fir.shape_shift) rather than to a 1D HomogeneousScalarStack with a counter.
+// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s
+
+// Single-dimension case: the saved RHS is stored in a 1D ArrayTemp indexed
+// by the where loop induction variable, with a fir.shape_shift instead of a
+// fir.shape.
+func.func @where_self_overlap(%x: !fir.ref<!fir.array<10xi32>>, %mask: !fir.ref<!fir.array<10x!fir.logical<4>>>) {
+ %c-1 = arith.constant -1 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %1 = fir.shape %c10 : (index) -> !fir.shape<1>
+ hlfir.where {
+ hlfir.yield %mask : !fir.ref<!fir.array<10x!fir.logical<4>>>
+ } do {
+ hlfir.region_assign {
+ %2 = hlfir.designate %x (%c10:%c1:%c-1) shape %1 :
+(!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<10xi32>>
+ hlfir.yield %2 : !fir.ref<!fir.array<10xi32>>
+ } to {
+ hlfir.yield %x : !fir.ref<!fir.array<10xi32>>
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @where_self_overlap(
+// CHECK-NOT: fir.alloca index
+// CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<?xi32>, %{{.*}} {bindc_name = ".tmp.where", uniq_name = ""}
+// CHECK: %[[SHAPE:.*]] = fir.shape_shift %{{.*}}, %{{.*}} : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]](%[[SHAPE]]) {uniq_name = ".tmp.where"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// First loop: save the RHS values, addressed by the loop induction variable.
+// CHECK: fir.do_loop %[[IV0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: fir.if %{{.*}} {
+// CHECK-NOT: fir.load %{{.*}} : !fir.ref<index>
+// CHECK: %[[ADDR0:.*]] = hlfir.designate %[[DECL]]#0 (%[[IV0]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %{{.*}} to %[[ADDR0]] : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: }
+// Second loop: read back from the temp using the new induction variable.
+// CHECK: fir.do_loop %[[IV1:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: fir.if %{{.*}} {
+// CHECK: %[[ADDR1:.*]] = hlfir.designate %[[DECL]]#0 (%[[IV1]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL:.*]] = fir.load %[[ADDR1]] : !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL]] to %{{.*}} : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: }
+// CHECK: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<?xi32>>
+// CHECK: return
+
+// Two nested forall loops with non-trivial lower bounds: the temp is a 2D
+// ArrayTemp whose first dimension corresponds to the inner forall and second
+// dimension to the outer one. The fir.shape_shift carries the forall lower
+// bounds, and hlfir.designate uses both induction variables (innermost first).
+func.func @nested_forall_2d(%arr: !fir.box<!fir.array<?x?xi32>>) {
+ %c2_i32 = arith.constant 2 : i32
+ %c5_i32 = arith.constant 5 : i32
+ %c3_i32 = arith.constant 3 : i32
+ %c7_i32 = arith.constant 7 : i32
+ %c1_i32 = arith.constant 1 : i32
+ %0:2 = hlfir.declare %arr {uniq_name = "x"} : (!fir.box<!fir.array<?x?xi32>>) -> (!fir.box<!fir.array<?x?xi32>>, !fir.box<!fir.array<?x?xi32>>)
+ hlfir.forall lb {
+ hlfir.yield %c2_i32 : i32
+ } ub {
+ hlfir.yield %c5_i32 : i32
+ } (%i: i32) {
+ hlfir.forall lb {
+ hlfir.yield %c3_i32 : i32
+ } ub {
+ hlfir.yield %c7_i32 : i32
+ } (%j: i32) {
+ hlfir.region_assign {
+ %i_idx = fir.convert %i : (i32) -> i64
+ %j_idx = fir.convert %j : (i32) -> i64
+ %addr = hlfir.designate %0#0 (%i_idx, %j_idx) : (!fir.box<!fir.array<?x?xi32>>, i64, i64) -> !fir.ref<i32>
+ %val = fir.load %addr : !fir.ref<i32>
+ hlfir.yield %val : i32
+ } to {
+ %i_inv = arith.subi %c5_i32, %i : i32
+ %i_inv2 = arith.addi %i_inv, %c2_i32 : i32
+ %j_inv = arith.subi %c7_i32, %j : i32
+ %j_inv2 = arith.addi %j_inv, %c3_i32 : i32
+ %i_idx = fir.convert %i_inv2 : (i32) -> i64
+ %j_idx = fir.convert %j_inv2 : (i32) -> i64
+ %addr = hlfir.designate %0#0 (%i_idx, %j_idx) : (!fir.box<!fir.array<?x?xi32>>, i64, i64) -> !fir.ref<i32>
+ hlfir.yield %addr : !fir.ref<i32>
+ }
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @nested_forall_2d(
+// CHECK-NOT: fir.alloca index
+// CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<?x?xi32>, %{{.*}}, %{{.*}} {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[SHAPE:.*]] = fir.shape_shift %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (index, index, index, index) -> !fir.shapeshift<2>
+// CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]](%[[SHAPE]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?x?xi32>>, !fir.shapeshift<2>) -> (!fir.box<!fir.array<?x?xi32>>, !fir.heap<!fir.array<?x?xi32>>)
+// CHECK: fir.do_loop %[[I0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: fir.do_loop %[[J0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: %[[ADDR0:.*]] = hlfir.designate %[[DECL]]#0 (%[[J0]], %[[I0]]) : (!fir.box<!fir.array<?x?xi32>>, index, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %{{.*}} to %[[ADDR0]] : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: }
+// CHECK: fir.do_loop %[[I1:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: fir.do_loop %[[J1:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: %[[ADDR1:.*]] = hlfir.designate %[[DECL]]#0 (%[[J1]], %[[I1]]) : (!fir.box<!fir.array<?x?xi32>>, index, index) -> !fir.ref<i32>
+// CHECK: %[[V:.*]] = fir.load %[[ADDR1]] : !fir.ref<i32>
+// CHECK: hlfir.assign %[[V]] to %{{.*}} : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: }
+// CHECK: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<?x?xi32>>
+// CHECK: return
+
+// Forall with a negative unit step. The temp's lower bound must be the
+// loop's upper bound (the smaller end of the iv range), not the loop's
+// lower bound (which is the start, i.e. the largest iv value); otherwise
+// the loop induction variable falls below the temp's domain on every
+// iteration but the first.
+func.func @forall_negative_step(%arg0: !fir.ref<!fir.array<10xi32>>) {
+ %c-1 = arith.constant -1 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %c11 = arith.constant 11 : index
+ %0 = fir.shape %c10 : (index) -> !fir.shape<1>
+ %1:2 = hlfir.declare %arg0(%0) {uniq_name = "x"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+ hlfir.forall lb {
+ hlfir.yield %c10 : index
+ } ub {
+ hlfir.yield %c1 : index
+ } step {
+ hlfir.yield %c-1 : index
+ } (%i: index) {
+ hlfir.region_assign {
+ %rev = arith.subi %c11, %i : index
+ %addr = hlfir.designate %1#0 (%rev) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+ %val = fir.load %addr : !fir.ref<i32>
+ hlfir.yield %val : i32
+ } to {
+ %addr = hlfir.designate %1#0 (%i) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+ hlfir.yield %addr : !fir.ref<i32>
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @forall_negative_step(
+// CHECK-NOT: fir.alloca index
+// The do_loop runs from the loop's lb (c10) down to its ub (c1) with step
+// c-1. The temp's lower bound is the loop's upper bound (c1), not its
+// lower bound, so all iv values [1..10] fit within the temp's domain.
+// CHECK: %[[C_NEG1:.*]] = arith.constant -1 : index
+// CHECK: %[[C1:.*]] = arith.constant 1 : index
+// CHECK: %[[C10:.*]] = arith.constant 10 : index
+// CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<?xi32>, %{{.*}} {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[SHAPE:.*]] = fir.shape_shift %[[C1]], %{{.*}} : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]](%[[SHAPE]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK: fir.do_loop %[[IV0:.*]] = %[[C10]] to %[[C1]] step %[[C_NEG1]] {
+// CHECK: %[[ADDR0:.*]] = hlfir.designate %[[DECL]]#0 (%[[IV0]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %{{.*}} to %[[ADDR0]] : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: fir.do_loop %[[IV1:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
+// CHECK: %[[ADDR1:.*]] = hlfir.designate %[[DECL]]#0 (%[[IV1]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[V:.*]] = fir.load %[[ADDR1]] : !fir.ref<i32>
+// CHECK: hlfir.assign %[[V]] to %{{.*}} : i32, !fir.ref<i32>
+// CHECK: }
+// CHECK: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<?xi32>>
+// CHECK: return
+
+// Forall with a non-unit step (step=2): the loop iv values are
+// non-contiguous (1, 3, 5, 7, 9), so the loop induction variable cannot
+// directly index a contiguous fir.shape_shift temp. We must fall back to
+// the 1D counter-based HomogeneousScalarStack.
+func.func @forall_non_unit_step(%arg0: !fir.ref<!fir.array<10xi32>>) {
+ %c1 = arith.constant 1 : index
+ %c2 = arith.constant 2 : index
+ %c9 = arith.constant 9 : index
+ %c11 = arith.constant 11 : index
+ %0 = fir.shape %c9 : (index) -> !fir.shape<1>
+ %1:2 = hlfir.declare %arg0(%0) {uniq_name = "x"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+ hlfir.forall lb {
+ hlfir.yield %c1 : index
+ } ub {
+ hlfir.yield %c9 : index
+ } step {
+ hlfir.yield %c2 : index
+ } (%i: index) {
+ hlfir.region_assign {
+ %rev = arith.subi %c11, %i : index
+ %addr = hlfir.designate %1#0 (%rev) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+ %val = fir.load %addr : !fir.ref<i32>
+ hlfir.yield %val : i32
+ } to {
+ %addr = hlfir.designate %1#0 (%i) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+ hlfir.yield %addr : !fir.ref<i32>
+ }
+ }
+ return
+}
+// CHECK-LABEL: func.func @forall_non_unit_step(
+// Counter-based HomogeneousScalarStack: a fir.alloca index counter and a
+// plain fir.shape (no shift), with the temp indexed through the counter.
+// CHECK: %[[CTR:.*]] = fir.alloca index
+// CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<?xi32>, %{{.*}} {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
+// CHECK: hlfir.declare %[[ALLOC]](%[[SHAPE]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK: fir.load %[[CTR]] : !fir.ref<index>
+// CHECK: arith.addi %{{.*}}, %{{.*}} : index
+// CHECK: fir.store %{{.*}} to %[[CTR]] : !fir.ref<index>
diff --git a/flang/test/HLFIR/order_assignments/impure-where.fir b/flang/test/HLFIR/order_assignments/impure-where.fir
index 011a486b2baf7..c26b007e1973f 100644
--- a/flang/test/HLFIR/order_assignments/impure-where.fir
+++ b/flang/test/HLFIR/order_assignments/impure-where.fir
@@ -38,22 +38,22 @@ func.func @test_elsewhere_impure_mask(%x: !fir.ref<!fir.array<10xi32>>, %y: !fir
// CHECK-LABEL: func.func @test_elsewhere_impure_mask(
// CHECK: %[[VAL_12:.*]] = fir.call @impure() : () -> !fir.heap<!fir.array<10x!fir.logical<4>>>
// CHECK: %[[VAL_21:.*]] = fir.allocmem !fir.array<?x!fir.logical<4>>, %[[extent:[^ ]*]]
-// CHECK: %[[VAL_22:.*]] = fir.shape %[[extent]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_23:.*]]:2 = hlfir.declare %[[VAL_21]](%{{.*}}) {uniq_name = ".tmp.where"}
-// CHECK: fir.do_loop
+// CHECK: %[[SHAPE:.*]] = fir.shape_shift %{{.*}}, %[[extent]] : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[VAL_23:.*]]:2 = hlfir.declare %[[VAL_21]](%[[SHAPE]]) {uniq_name = ".tmp.where"}
+// CHECK: fir.do_loop %[[IV0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: fir.if {{.*}} {
// CHECK: } else {
// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_12]] (%{{.*}})
// CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ref<!fir.logical<4>>
-// CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_23]]#0 (%{{.*}}) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_23]]#0 (%[[IV0]]) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
// CHECK: hlfir.assign %[[VAL_29]] to %[[VAL_32]] : !fir.logical<4>, !fir.ref<!fir.logical<4>>
// CHECK: }
// CHECK: }
// CHECK-NOT: fir.call @impure
-// CHECK: fir.do_loop
+// CHECK: fir.do_loop %[[IV1:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: fir.if {{.*}} {
// CHECK: } else {
-// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_23]]#0 (%{{.*}}) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_23]]#0 (%[[IV1]]) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
// CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<!fir.logical<4>>
// CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1
// CHECK: fir.if %[[VAL_44]] {
@@ -61,10 +61,10 @@ func.func @test_elsewhere_impure_mask(%x: !fir.ref<!fir.array<10xi32>>, %y: !fir
// CHECK: }
// CHECK: }
// CHECK-NOT: fir.call @impure
-// CHECK: fir.do_loop
+// CHECK: fir.do_loop %[[IV2:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: fir.if {{.*}} {
// CHECK: } else {
-// CHECK: %[[VAL_52:.*]] = hlfir.designate %[[VAL_23]]#0 (%{{.*}}) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_52:.*]] = hlfir.designate %[[VAL_23]]#0 (%[[IV2]]) : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
// CHECK: %[[VAL_53:.*]] = fir.load %[[VAL_52]] : !fir.ref<!fir.logical<4>>
// CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_53]] : (!fir.logical<4>) -> i1
// CHECK: fir.if %[[VAL_54]] {
diff --git a/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir b/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir
index 064b12b9ed812..6eac74e23053e 100644
--- a/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir
+++ b/flang/test/HLFIR/order_assignments/inlined-stack-temp.fir
@@ -27,53 +27,42 @@ func.func @test_scalar_save(%arg0: !fir.box<!fir.array<?xi32>>) {
}
// CHECK-LABEL: func.func @test_scalar_save(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>>) {
-// CHECK: %[[VAL_1:.*]] = fir.alloca index
-// CHECK: %[[VAL_2:.*]] = arith.constant 10 : i32
-// CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32
-// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_3]] : (i32) -> index
-// CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (i32) -> index
-// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_8:.*]] = arith.constant 0 : index
-// CHECK: %[[VAL_9:.*]] = arith.subi %[[VAL_6]], %[[VAL_5]] : index
-// CHECK: %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[VAL_7]] : index
-// CHECK: %[[VAL_11:.*]] = arith.divsi %[[VAL_10]], %[[VAL_7]] : index
-// CHECK: %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_8]] : index
-// CHECK: %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_8]] : index
-// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_15:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_14]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_16:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_13]] {bindc_name = ".tmp.forall", uniq_name = ""}
-// CHECK: %[[VAL_17:.*]] = fir.shape %[[VAL_13]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_16]](%[[VAL_17]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
-// CHECK: fir.do_loop %[[VAL_19:.*]] = %[[VAL_5]] to %[[VAL_6]] step %[[VAL_7]] {
-// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (index) -> i32
-// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
-// CHECK: %[[VAL_22:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_21]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-// CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref<i32>
-// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_24]], %[[VAL_15]] : index
-// CHECK: fir.store %[[VAL_25]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_24]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK: hlfir.assign %[[VAL_23]] to %[[VAL_26]] : i32, !fir.ref<i32>
+// CHECK: %[[VAL_1:.*]] = arith.constant 10 : i32
+// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32
+// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_2]] : (i32) -> index
+// CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_1]] : (i32) -> index
+// CHECK: %[[VAL_6:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_7:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_8:.*]] = arith.subi %[[VAL_5]], %[[VAL_4]] : index
+// CHECK: %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_6]] : index
+// CHECK: %[[VAL_10:.*]] = arith.divsi %[[VAL_9]], %[[VAL_6]] : index
+// CHECK: %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_10]], %[[VAL_7]] : index
+// CHECK: %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_10]], %[[VAL_7]] : index
+// CHECK: %[[VAL_13:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_12]] {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[VAL_14:.*]] = fir.shape_shift %[[VAL_4]], %[[VAL_12]] : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]](%[[VAL_14]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK: fir.do_loop %[[VAL_16:.*]] = %[[VAL_4]] to %[[VAL_5]] step %[[VAL_6]] {
+// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (index) -> i32
+// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i32) -> i64
+// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_18]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+// CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<i32>
+// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_15]]#0 (%[[VAL_16]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_20]] to %[[VAL_21]] : i32, !fir.ref<i32>
// CHECK: }
-// CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_3]] : (i32) -> index
-// CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_2]] : (i32) -> index
-// CHECK: %[[VAL_29:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_14]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: fir.do_loop %[[VAL_30:.*]] = %[[VAL_27]] to %[[VAL_28]] step %[[VAL_29]] {
-// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (index) -> i32
-// CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_33:.*]] = arith.addi %[[VAL_32]], %[[VAL_15]] : index
-// CHECK: fir.store %[[VAL_33]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_34:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_32]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_34]] : !fir.ref<i32>
-// CHECK: %[[VAL_36:.*]] = arith.addi %[[VAL_31]], %[[VAL_3]] : i32
-// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> i64
-// CHECK: %[[VAL_38:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_37]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-// CHECK: hlfir.assign %[[VAL_35]] to %[[VAL_38]] : i32, !fir.ref<i32>
+// CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_2]] : (i32) -> index
+// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_1]] : (i32) -> index
+// CHECK: %[[VAL_24:.*]] = arith.constant 1 : index
+// CHECK: fir.do_loop %[[VAL_25:.*]] = %[[VAL_22]] to %[[VAL_23]] step %[[VAL_24]] {
+// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (index) -> i32
+// CHECK: %[[VAL_27:.*]] = hlfir.designate %[[VAL_15]]#0 (%[[VAL_25]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref<i32>
+// CHECK: %[[VAL_29:.*]] = arith.addi %[[VAL_26]], %[[VAL_2]] : i32
+// CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i32) -> i64
+// CHECK: %[[VAL_31:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_30]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_31]] : i32, !fir.ref<i32>
// CHECK: }
-// CHECK: fir.freemem %[[VAL_16]] : !fir.heap<!fir.array<?xi32>>
+// CHECK: fir.freemem %[[VAL_13]] : !fir.heap<!fir.array<?xi32>>
// CHECK: return
// CHECK: }
@@ -111,87 +100,65 @@ func.func @mask_and_rhs_conflict(%arg0: !fir.box<!fir.array<?xi32>>) {
}
// CHECK-LABEL: func.func @mask_and_rhs_conflict(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>>) {
-// CHECK: %[[VAL_1:.*]] = fir.alloca index
-// CHECK: %[[VAL_2:.*]] = fir.alloca index
-// CHECK: %[[VAL_3:.*]] = arith.constant 42 : i32
-// CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32
-// CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
-// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_5]] : (i32) -> index
-// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_4]] : (i32) -> index
-// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_10:.*]] = arith.constant 0 : index
-// CHECK: %[[VAL_11:.*]] = arith.subi %[[VAL_8]], %[[VAL_7]] : index
-// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_9]] : index
-// CHECK: %[[VAL_13:.*]] = arith.divsi %[[VAL_12]], %[[VAL_9]] : index
-// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13]], %[[VAL_10]] : index
-// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14]], %[[VAL_13]], %[[VAL_10]] : index
-// CHECK: %[[VAL_16:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_17:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_16]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_18:.*]] = fir.allocmem !fir.array<?xi1>, %[[VAL_15]] {bindc_name = ".tmp.forall", uniq_name = ""}
-// CHECK: %[[VAL_19:.*]] = fir.shape %[[VAL_15]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_18]](%[[VAL_19]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi1>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi1>>, !fir.heap<!fir.array<?xi1>>)
-// CHECK: %[[VAL_21:.*]] = arith.constant 0 : index
-// CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_8]], %[[VAL_7]] : index
-// CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_22]], %[[VAL_9]] : index
-// CHECK: %[[VAL_24:.*]] = arith.divsi %[[VAL_23]], %[[VAL_9]] : index
-// CHECK: %[[VAL_25:.*]] = arith.cmpi sgt, %[[VAL_24]], %[[VAL_21]] : index
-// CHECK: %[[VAL_26:.*]] = arith.select %[[VAL_25]], %[[VAL_24]], %[[VAL_21]] : index
-// CHECK: %[[VAL_27:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_28:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_27]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_29:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_26]] {bindc_name = ".tmp.forall", uniq_name = ""}
-// CHECK: %[[VAL_30:.*]] = fir.shape %[[VAL_26]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_29]](%[[VAL_30]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
-// CHECK: fir.do_loop %[[VAL_32:.*]] = %[[VAL_7]] to %[[VAL_8]] step %[[VAL_9]] {
-// CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (index) -> i32
-// CHECK: %[[VAL_34:.*]] = fir.convert %[[VAL_33]] : (i32) -> i64
-// CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_34]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-// CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<i32>
-// CHECK: %[[VAL_37:.*]] = arith.cmpi sgt, %[[VAL_36]], %[[VAL_3]] : i32
-// CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_39:.*]] = arith.addi %[[VAL_38]], %[[VAL_17]] : index
-// CHECK: fir.store %[[VAL_39]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_38]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
-// CHECK: hlfir.assign %[[VAL_37]] to %[[VAL_40]] : i1, !fir.ref<i1>
-// CHECK: fir.if %[[VAL_37]] {
-// CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_33]] : (i32) -> i64
-// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_41]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-// CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<i32>
-// CHECK: %[[VAL_44:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_45:.*]] = arith.addi %[[VAL_44]], %[[VAL_28]] : index
-// CHECK: fir.store %[[VAL_45]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_31]]#0 (%[[VAL_44]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK: hlfir.assign %[[VAL_43]] to %[[VAL_46]] : i32, !fir.ref<i32>
+// CHECK: %[[VAL_1:.*]] = arith.constant 42 : i32
+// CHECK: %[[VAL_2:.*]] = arith.constant 10 : i32
+// CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32
+// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_3]] : (i32) -> index
+// CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (i32) -> index
+// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_8:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_9:.*]] = arith.subi %[[VAL_6]], %[[VAL_5]] : index
+// CHECK: %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[VAL_7]] : index
+// CHECK: %[[VAL_11:.*]] = arith.divsi %[[VAL_10]], %[[VAL_7]] : index
+// CHECK: %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_8]] : index
+// CHECK: %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_8]] : index
+// CHECK: %[[MASK_TEMP:.*]] = fir.allocmem !fir.array<?xi1>, %[[VAL_13]] {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[MASK_SHAPE:.*]] = fir.shape_shift %[[VAL_5]], %[[VAL_13]] : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[MASK_DECL:.*]]:2 = hlfir.declare %[[MASK_TEMP]](%[[MASK_SHAPE]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi1>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi1>>, !fir.heap<!fir.array<?xi1>>)
+// CHECK: %[[VAL_16:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_17:.*]] = arith.subi %[[VAL_6]], %[[VAL_5]] : index
+// CHECK: %[[VAL_18:.*]] = arith.addi %[[VAL_17]], %[[VAL_7]] : index
+// CHECK: %[[VAL_19:.*]] = arith.divsi %[[VAL_18]], %[[VAL_7]] : index
+// CHECK: %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_19]], %[[VAL_16]] : index
+// CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_19]], %[[VAL_16]] : index
+// CHECK: %[[RHS_TEMP:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_21]] {bindc_name = ".tmp.forall", uniq_name = ""}
+// CHECK: %[[RHS_SHAPE:.*]] = fir.shape_shift %[[VAL_5]], %[[VAL_21]] : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[RHS_DECL:.*]]:2 = hlfir.declare %[[RHS_TEMP]](%[[RHS_SHAPE]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK: fir.do_loop %[[VAL_24:.*]] = %[[VAL_5]] to %[[VAL_6]] step %[[VAL_7]] {
+// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (index) -> i32
+// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i32) -> i64
+// CHECK: %[[VAL_27:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_26]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+// CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref<i32>
+// CHECK: %[[VAL_29:.*]] = arith.cmpi sgt, %[[VAL_28]], %[[VAL_1]] : i32
+// CHECK: %[[VAL_30:.*]] = hlfir.designate %[[MASK_DECL]]#0 (%[[VAL_24]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
+// CHECK: hlfir.assign %[[VAL_29]] to %[[VAL_30]] : i1, !fir.ref<i1>
+// CHECK: fir.if %[[VAL_29]] {
+// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_25]] : (i32) -> i64
+// CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_31]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+// CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_32]] : !fir.ref<i32>
+// CHECK: %[[VAL_34:.*]] = hlfir.designate %[[RHS_DECL]]#0 (%[[VAL_24]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_33]] to %[[VAL_34]] : i32, !fir.ref<i32>
// CHECK: }
// CHECK: }
-// CHECK: %[[VAL_47:.*]] = fir.convert %[[VAL_5]] : (i32) -> index
-// CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_4]] : (i32) -> index
-// CHECK: %[[VAL_49:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_16]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: fir.store %[[VAL_27]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: fir.do_loop %[[VAL_50:.*]] = %[[VAL_47]] to %[[VAL_48]] step %[[VAL_49]] {
-// CHECK: %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (index) -> i32
-// CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_53:.*]] = arith.addi %[[VAL_52]], %[[VAL_17]] : index
-// CHECK: fir.store %[[VAL_53]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_54:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_52]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
-// CHECK: %[[VAL_55:.*]] = fir.load %[[VAL_54]] : !fir.ref<i1>
-// CHECK: fir.if %[[VAL_55]] {
-// CHECK: %[[VAL_56:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_57:.*]] = arith.addi %[[VAL_56]], %[[VAL_28]] : index
-// CHECK: fir.store %[[VAL_57]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_58:.*]] = hlfir.designate %[[VAL_31]]#0 (%[[VAL_56]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK: %[[VAL_59:.*]] = fir.load %[[VAL_58]] : !fir.ref<i32>
-// CHECK: %[[VAL_60:.*]] = arith.addi %[[VAL_51]], %[[VAL_5]] : i32
-// CHECK: %[[VAL_61:.*]] = fir.convert %[[VAL_60]] : (i32) -> i64
-// CHECK: %[[VAL_62:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_61]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-// CHECK: hlfir.assign %[[VAL_59]] to %[[VAL_62]] : i32, !fir.ref<i32>
+// CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_3]] : (i32) -> index
+// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_2]] : (i32) -> index
+// CHECK: %[[VAL_37:.*]] = arith.constant 1 : index
+// CHECK: fir.do_loop %[[VAL_38:.*]] = %[[VAL_35]] to %[[VAL_36]] step %[[VAL_37]] {
+// CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (index) -> i32
+// CHECK: %[[VAL_40:.*]] = hlfir.designate %[[MASK_DECL]]#0 (%[[VAL_38]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
+// CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<i1>
+// CHECK: fir.if %[[VAL_41]] {
+// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[RHS_DECL]]#0 (%[[VAL_38]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref<i32>
+// CHECK: %[[VAL_44:.*]] = arith.addi %[[VAL_39]], %[[VAL_3]] : i32
+// CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64
+// CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_45]]) : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_43]] to %[[VAL_46]] : i32, !fir.ref<i32>
// CHECK: }
// CHECK: }
-// CHECK-DAG: fir.freemem %[[VAL_18]] : !fir.heap<!fir.array<?xi1>>
-// CHECK-DAG: fir.freemem %[[VAL_29]] : !fir.heap<!fir.array<?xi32>>
+// CHECK-DAG: fir.freemem %[[MASK_TEMP]] : !fir.heap<!fir.array<?xi1>>
+// CHECK-DAG: fir.freemem %[[RHS_TEMP]] : !fir.heap<!fir.array<?xi32>>
// CHECK: return
// CHECK: }
@@ -224,6 +191,8 @@ func.func @test_where_mask_save(%arg0: !fir.box<!fir.array<?xi32>>) {
}
return
}
+// This case uses hlfir.associate (SimpleCopy) to save the whole mask outside
+// of the where loop nest, which is unrelated to the new ArrayTemp path.
// CHECK-LABEL: func.func @test_where_mask_save(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>>) {
// CHECK: %[[VAL_1:.*]] = arith.constant 0 : index
@@ -278,59 +247,48 @@ func.func @test_where_rhs_save(%x: !fir.ref<!fir.array<10xi32>>, %mask: !fir.ref
// CHECK-LABEL: func.func @test_where_rhs_save(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<10xi32>>,
// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<!fir.array<10x!fir.logical<4>>>) {
-// CHECK: %[[VAL_2:.*]] = fir.alloca index
-// CHECK: %[[VAL_3:.*]] = arith.constant -1 : index
-// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_5:.*]] = arith.constant 10 : index
-// CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_7:.*]] = arith.constant 10 : index
-// CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_11:.*]] = arith.constant 0 : index
-// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_7]], %[[VAL_9]] : index
-// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[VAL_9]] : index
-// CHECK: %[[VAL_14:.*]] = arith.divsi %[[VAL_13]], %[[VAL_9]] : index
-// CHECK: %[[VAL_15:.*]] = arith.cmpi sgt, %[[VAL_14]], %[[VAL_11]] : index
-// CHECK: %[[VAL_16:.*]] = arith.select %[[VAL_15]], %[[VAL_14]], %[[VAL_11]] : index
-// CHECK: %[[VAL_17:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_18:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_17]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_19:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_16]] {bindc_name = ".tmp.where", uniq_name = ""}
-// CHECK: %[[VAL_20:.*]] = fir.shape %[[VAL_16]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_19]](%[[VAL_20]]) {uniq_name = ".tmp.where"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
-// CHECK: fir.do_loop %[[VAL_22:.*]] = %[[VAL_9]] to %[[VAL_7]] step %[[VAL_9]] {
-// CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_22]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
-// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<!fir.logical<4>>
-// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (!fir.logical<4>) -> i1
-// CHECK: fir.if %[[VAL_25]] {
-// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_5]]:%[[VAL_4]]:%[[VAL_3]]) shape %[[VAL_6]] : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<10xi32>>
-// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_10]] (%[[VAL_22]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
-// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref<i32>
-// CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_29:.*]] = arith.addi %[[VAL_28]], %[[VAL_18]] : index
-// CHECK: fir.store %[[VAL_29]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_30:.*]] = hlfir.designate %[[VAL_21]]#0 (%[[VAL_28]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK: hlfir.assign %[[VAL_27]] to %[[VAL_30]] : i32, !fir.ref<i32>
+// CHECK: %[[VAL_2:.*]] = arith.constant -1 : index
+// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_4:.*]] = arith.constant 10 : index
+// CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_6:.*]] = arith.constant 10 : index
+// CHECK: %[[VAL_7:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_9:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_10:.*]] = arith.subi %[[VAL_6]], %[[VAL_8]] : index
+// CHECK: %[[VAL_11:.*]] = arith.addi %[[VAL_10]], %[[VAL_8]] : index
+// CHECK: %[[VAL_12:.*]] = arith.divsi %[[VAL_11]], %[[VAL_8]] : index
+// CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_9]] : index
+// CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_9]] : index
+// CHECK: %[[VAL_15:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_14]] {bindc_name = ".tmp.where", uniq_name = ""}
+// CHECK: %[[VAL_16:.*]] = fir.shape_shift %[[VAL_8]], %[[VAL_14]] : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]](%[[VAL_16]]) {uniq_name = ".tmp.where"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK: fir.do_loop %[[VAL_18:.*]] = %[[VAL_8]] to %[[VAL_6]] step %[[VAL_8]] {
+// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_18]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_21]] {
+// CHECK: %[[SLICE:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_4]]:%[[VAL_3]]:%[[VAL_2]]) shape %[[VAL_5]] : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<10xi32>>
+// CHECK: %[[VAL_22:.*]] = hlfir.designate %[[SLICE]] (%[[VAL_18]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref<i32>
+// CHECK: %[[VAL_24:.*]] = hlfir.designate %[[VAL_17]]#0 (%[[VAL_18]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_23]] to %[[VAL_24]] : i32, !fir.ref<i32>
// CHECK: }
// CHECK: }
-// CHECK: %[[VAL_31:.*]] = arith.constant 10 : index
-// CHECK: %[[VAL_32:.*]] = fir.shape %[[VAL_31]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_33:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_17]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: fir.do_loop %[[VAL_34:.*]] = %[[VAL_33]] to %[[VAL_31]] step %[[VAL_33]] {
-// CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_34]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
-// CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.logical<4>>
-// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1
-// CHECK: fir.if %[[VAL_37]] {
-// CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_39:.*]] = arith.addi %[[VAL_38]], %[[VAL_18]] : index
-// CHECK: fir.store %[[VAL_39]] to %[[VAL_2]] : !fir.ref<index>
-// CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_21]]#0 (%[[VAL_38]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref<i32>
-// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_34]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
-// CHECK: hlfir.assign %[[VAL_41]] to %[[VAL_42]] : i32, !fir.ref<i32>
+// CHECK: %[[VAL_25:.*]] = arith.constant 10 : index
+// CHECK: %[[VAL_26:.*]] = fir.shape %[[VAL_25]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_27:.*]] = arith.constant 1 : index
+// CHECK: fir.do_loop %[[VAL_28:.*]] = %[[VAL_27]] to %[[VAL_25]] step %[[VAL_27]] {
+// CHECK: %[[VAL_29:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_28]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_29]] : !fir.ref<!fir.logical<4>>
+// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
+// CHECK: fir.if %[[VAL_31]] {
+// CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_17]]#0 (%[[VAL_28]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_32]] : !fir.ref<i32>
+// CHECK: %[[VAL_34:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_28]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_33]] to %[[VAL_34]] : i32, !fir.ref<i32>
// CHECK: }
// CHECK: }
-// CHECK: fir.freemem %[[VAL_19]] : !fir.heap<!fir.array<?xi32>>
+// CHECK: fir.freemem %[[VAL_15]] : !fir.heap<!fir.array<?xi32>>
// CHECK: return
// CHECK: }
diff --git a/flang/test/HLFIR/order_assignments/saving-mask-and-rhs.fir b/flang/test/HLFIR/order_assignments/saving-mask-and-rhs.fir
index 1eb86d09a39c1..3fe6ab7e087ad 100644
--- a/flang/test/HLFIR/order_assignments/saving-mask-and-rhs.fir
+++ b/flang/test/HLFIR/order_assignments/saving-mask-and-rhs.fir
@@ -42,13 +42,14 @@ func.func @saving_mask_and_rhs(%arg0: !fir.ref<!fir.array<10xi32>>) {
// Creating RHS temporary using the mask temporary (and not the hlfir.elemental)
// CHECK: %[[VAL_25:.*]] = fir.allocmem !fir.array<?xi32>, %{{.*}} {bindc_name = ".tmp.where", uniq_name = ""}
-// CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_25]]({{.*}}) {uniq_name = ".tmp.where"}
-// CHECK: fir.do_loop
+// CHECK: %[[RHS_SHAPE:.*]] = fir.shape_shift %{{.*}}, %{{.*}} : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_25]](%[[RHS_SHAPE]]) {uniq_name = ".tmp.where"}
+// CHECK: fir.do_loop %[[IV:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: %[[VAL_29:.*]] = hlfir.designate %[[VAL_14]]#0 ({{.*}})
// CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_29]] : !fir.ref<!fir.logical<4>>
// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1
// CHECK: fir.if %[[VAL_31]] {
-// CHECK: %[[VAL_36:.*]] = hlfir.designate %[[VAL_27]]#0 ({{.*}})
+// CHECK: %[[VAL_36:.*]] = hlfir.designate %[[VAL_27]]#0 (%[[IV]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
// CHECK: hlfir.assign %{{.*}} to %[[VAL_36]] : i32, !fir.ref<i32>
// CHECK: }
// CHECK: }
@@ -91,14 +92,17 @@ func.func @forall_mask_and_rhs(%arg0: !fir.ref<!fir.array<10xi32>>) {
// CHECK-LABEL: func.func @forall_mask_and_rhs(
// CHECK: %[[VAL_18:.*]] = fir.allocmem !fir.array<?xi1>, %{{.*}} {bindc_name = ".tmp.forall", uniq_name = ""}
-// CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_18]](%{{.*}}) {uniq_name = ".tmp.forall"}
+// CHECK: %[[MASK_SHAPE:.*]] = fir.shape_shift %{{.*}}, %{{.*}} : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_18]](%[[MASK_SHAPE]]) {uniq_name = ".tmp.forall"}
// CHECK: %[[VAL_29:.*]] = fir.allocmem !fir.array<?xi32>, %{{.*}} {bindc_name = ".tmp.forall", uniq_name = ""}
-// CHECK: %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_29]](%{{.*}}) {uniq_name = ".tmp.forall"}
+// CHECK: %[[RHS_SHAPE:.*]] = fir.shape_shift %{{.*}}, %{{.*}} : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_29]](%[[RHS_SHAPE]]) {uniq_name = ".tmp.forall"}
+// CHECK: fir.do_loop %[[IV:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK: %[[VAL_36:.*]] = arith.cmpi sgt, %{{.*}}, %{{.*}} : i32
-// CHECK: %[[VAL_39:.*]] = hlfir.designate %[[VAL_20]]#0 (%{{.*}})
+// CHECK: %[[VAL_39:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[IV]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
// CHECK: hlfir.assign %[[VAL_36]] to %[[VAL_39]] : i1, !fir.ref<i1>
// CHECK: fir.if %[[VAL_36]] {
-// CHECK: %[[VAL_45:.*]] = hlfir.designate %[[VAL_31]]#0 (%{{.*}})
+// CHECK: %[[VAL_45:.*]] = hlfir.designate %[[VAL_31]]#0 (%[[IV]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
// CHECK: hlfir.assign %{{.*}} to %[[VAL_45]] : i32, !fir.ref<i32>
// CHECK: }
// CHECK: }
diff --git a/flang/test/HLFIR/order_assignments/user-defined-assignment.fir b/flang/test/HLFIR/order_assignments/user-defined-assignment.fir
index e64b3ef362cee..83411279d0731 100644
--- a/flang/test/HLFIR/order_assignments/user-defined-assignment.fir
+++ b/flang/test/HLFIR/order_assignments/user-defined-assignment.fir
@@ -138,7 +138,6 @@ func.func @test_scalar_forall_overlap(%i: !fir.ref<!fir.array<10xi32>>) {
}
// CHECK-LABEL: func.func @test_scalar_forall_overlap(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<10xi32>>) {
-// CHECK: %[[VAL_1:.*]] = fir.alloca index
// CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32
// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index
// CHECK: %[[VAL_4:.*]] = arith.constant 10 : index
@@ -150,30 +149,20 @@ func.func @test_scalar_forall_overlap(%i: !fir.ref<!fir.array<10xi32>>) {
// CHECK: %[[VAL_10:.*]] = arith.divsi %[[VAL_9]], %[[VAL_6]] : index
// CHECK: %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_10]], %[[VAL_7]] : index
// CHECK: %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_10]], %[[VAL_7]] : index
-// CHECK: %[[VAL_13:.*]] = arith.constant 1 : index
-// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_13]] to %[[VAL_1]] : !fir.ref<index>
// CHECK: %[[VAL_15:.*]] = fir.allocmem !fir.array<?xi1>, %[[VAL_12]] {bindc_name = ".tmp.forall", uniq_name = ""}
-// CHECK: %[[VAL_16:.*]] = fir.shape %[[VAL_12]] : (index) -> !fir.shape<1>
-// CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]](%[[VAL_16]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi1>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi1>>, !fir.heap<!fir.array<?xi1>>)
+// CHECK: %[[VAL_16:.*]] = fir.shape_shift %[[VAL_3]], %[[VAL_12]] : (index, index) -> !fir.shapeshift<1>
+// CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]](%[[VAL_16]]) {uniq_name = ".tmp.forall"} : (!fir.heap<!fir.array<?xi1>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi1>>, !fir.heap<!fir.array<?xi1>>)
// CHECK: fir.do_loop %[[VAL_18:.*]] = %[[VAL_3]] to %[[VAL_4]] step %[[VAL_6]] {
// CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_5]], %[[VAL_18]] : index
// CHECK: %[[VAL_20:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_19]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
// CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<i32>
// CHECK: %[[VAL_22:.*]] = arith.cmpi slt, %[[VAL_21]], %[[VAL_2]] : i32
-// CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_23]], %[[VAL_14]] : index
-// CHECK: fir.store %[[VAL_24]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_25:.*]] = hlfir.designate %[[VAL_17]]#0 (%[[VAL_23]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
+// CHECK: %[[VAL_25:.*]] = hlfir.designate %[[VAL_17]]#0 (%[[VAL_18]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
// CHECK: hlfir.assign %[[VAL_22]] to %[[VAL_25]] : i1, !fir.ref<i1>
// CHECK: }
// CHECK: %[[VAL_26:.*]] = arith.constant 1 : index
-// CHECK: fir.store %[[VAL_13]] to %[[VAL_1]] : !fir.ref<index>
// CHECK: fir.do_loop %[[VAL_27:.*]] = %[[VAL_3]] to %[[VAL_4]] step %[[VAL_26]] {
-// CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_29:.*]] = arith.addi %[[VAL_28]], %[[VAL_14]] : index
-// CHECK: fir.store %[[VAL_29]] to %[[VAL_1]] : !fir.ref<index>
-// CHECK: %[[VAL_30:.*]] = hlfir.designate %[[VAL_17]]#0 (%[[VAL_28]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
+// CHECK: %[[VAL_30:.*]] = hlfir.designate %[[VAL_17]]#0 (%[[VAL_27]]) : (!fir.box<!fir.array<?xi1>>, index) -> !fir.ref<i1>
// CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_30]] : !fir.ref<i1>
// CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_27]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
// CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_31]] : (i1) -> !fir.logical<4>
More information about the flang-commits
mailing list