[flang-commits] [flang] 9edf0e7 - [flang] improve array section analysis for WHERE (#194399)
via flang-commits
flang-commits at lists.llvm.org
Tue Apr 28 01:14:19 PDT 2026
Author: jeanPerier
Date: 2026-04-28T10:14:14+02:00
New Revision: 9edf0e73b59540ccec25b355268ebff6d3bba4ef
URL: https://github.com/llvm/llvm-project/commit/9edf0e73b59540ccec25b355268ebff6d3bba4ef
DIFF: https://github.com/llvm/llvm-project/commit/9edf0e73b59540ccec25b355268ebff6d3bba4ef.diff
LOG: [flang] improve array section analysis for WHERE (#194399)
The array section analysis in the HLFIR pass in charge of WHERE lowering
was unable to tell that the LHS and RHS are the same array section when
the base is an assumed shape or when a variable is used as indices.
This patch adds an optional callback to the array section
analysis to tell if two SSA values have the same value. This call back
is then implemented to tell that two SSA values are the same only if:
they are the result of equivalent operations with no memory effect (ok
to be non speculatable) and with operands that have the same value
(recursively), or if they are the load from the same variable (which is
OK in the context of WHERE RHS/LHS thanks to Fortran 2023 10.1.4 that
guarantee that a variable referred both on the RHS and LHS cannot be
modified by side effects in the RHS/LHS).
Assisted by: Claude
Added:
flang/test/HLFIR/order_assignments/where-equivalent-subscripts.fir
Modified:
flang/include/flang/Optimizer/Analysis/ArraySectionAnalyzer.h
flang/lib/Optimizer/Analysis/ArraySectionAnalyzer.cpp
flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Analysis/ArraySectionAnalyzer.h b/flang/include/flang/Optimizer/Analysis/ArraySectionAnalyzer.h
index 0a9ff13e30525..e87e37c3c5590 100644
--- a/flang/include/flang/Optimizer/Analysis/ArraySectionAnalyzer.h
+++ b/flang/include/flang/Optimizer/Analysis/ArraySectionAnalyzer.h
@@ -11,6 +11,7 @@
#include "mlir/IR/Operation.h"
#include "mlir/IR/Value.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
namespace mlir {
class Operation;
@@ -38,13 +39,28 @@ class ArraySectionAnalyzer {
EitherIdenticalOrDisjoint
};
+ /// Optional callback used to teach the analyzer that two SSA values, even
+ /// though they are not the same SSA value, are known by the caller to
+ /// evaluate to the same value at runtime.
+ /// The callback may be called with null values, in which case it must
+ /// return false.
+ using ValueEquivalenceCallback =
+ llvm::function_ref<bool(mlir::Value, mlir::Value)>;
+
// Analyzes two hlfir.designate results and returns the overlap kind.
// The callers may use this method when the alias analysis reports
// an alias of some kind, so that we can run Fortran specific analysis
// on the array slices to see if they are identical or disjoint.
// Note that the alias analysis are not able to give such an answer
// about the references.
- static SlicesOverlapKind analyze(mlir::Value ref1, mlir::Value ref2);
+ //
+ // \p areKnownEquivalent is an optional callback that the analyzer may use
+ // to recognize section subscripts that have the same value even when they
+ // are not the same SSA value (e.g., subscripts that are loads of the same
+ // Fortran variable from two
diff erent regions of the same WHERE construct).
+ static SlicesOverlapKind
+ analyze(mlir::Value ref1, mlir::Value ref2,
+ ValueEquivalenceCallback areKnownEquivalent = nullptr);
static bool isDesignatingArrayInOrder(hlfir::DesignateOp designate,
hlfir::ElementalOpInterface elemental);
@@ -108,8 +124,13 @@ class ArraySectionAnalyzer {
// These sections are identical, from the point of which array
// elements are being addresses, even though the shape
// of the array slices might be
diff erent.
- static bool areIdenticalSections(const SectionDesc &desc1,
- const SectionDesc &desc2);
+ //
+ // When \p areKnownEquivalent is provided, it is used to compare the
+ // individual section components (lower bound, upper bound and stride)
+ // when they are not the same SSA value.
+ static bool
+ areIdenticalSections(const SectionDesc &desc1, const SectionDesc &desc2,
+ ValueEquivalenceCallback areKnownEquivalent = nullptr);
// Return true, if v1 is known to be less than v2.
static bool isLess(mlir::Value v1, mlir::Value v2);
diff --git a/flang/lib/Optimizer/Analysis/ArraySectionAnalyzer.cpp b/flang/lib/Optimizer/Analysis/ArraySectionAnalyzer.cpp
index f5ee298f0948c..9def133da3a52 100644
--- a/flang/lib/Optimizer/Analysis/ArraySectionAnalyzer.cpp
+++ b/flang/lib/Optimizer/Analysis/ArraySectionAnalyzer.cpp
@@ -79,15 +79,29 @@ bool ArraySectionAnalyzer::areDisjointSections(const SectionDesc &desc1,
return false;
}
-bool ArraySectionAnalyzer::areIdenticalSections(const SectionDesc &desc1,
- const SectionDesc &desc2) {
+bool ArraySectionAnalyzer::areIdenticalSections(
+ const SectionDesc &desc1, const SectionDesc &desc2,
+ ValueEquivalenceCallback areKnownEquivalent) {
if (desc1 == desc2)
return true;
- return false;
+ if (!areKnownEquivalent)
+ return false;
+ // Compare each component, falling back on the user-provided callback when
+ // the SSA values
diff er. Null values must compare equal to null only.
+ auto valuesMatch = [&](mlir::Value v1, mlir::Value v2) {
+ if (v1 == v2)
+ return true;
+ if (!v1 || !v2)
+ return false;
+ return areKnownEquivalent(v1, v2);
+ };
+ return valuesMatch(desc1.lb, desc2.lb) && valuesMatch(desc1.ub, desc2.ub) &&
+ valuesMatch(desc1.stride, desc2.stride);
}
ArraySectionAnalyzer::SlicesOverlapKind
-ArraySectionAnalyzer::analyze(mlir::Value ref1, mlir::Value ref2) {
+ArraySectionAnalyzer::analyze(mlir::Value ref1, mlir::Value ref2,
+ ValueEquivalenceCallback areKnownEquivalent) {
if (ref1 == ref2)
return SlicesOverlapKind::DefinitelyIdentical;
@@ -138,7 +152,7 @@ ArraySectionAnalyzer::analyze(mlir::Value ref1, mlir::Value ref2) {
if (areDisjointSections(desc1, desc2))
return SlicesOverlapKind::DefinitelyDisjoint;
- if (!areIdenticalSections(desc1, desc2)) {
+ if (!areIdenticalSections(desc1, desc2, areKnownEquivalent)) {
if (isTriplet1 || isTriplet2) {
// For example:
// hlfir.designate %6#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %0)
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp
index 6bc5317b25d7a..7de549e38c9d6 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp
@@ -11,7 +11,9 @@
#include "flang/Optimizer/Analysis/ArraySectionAnalyzer.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/FortranVariableInterface.h"
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
+#include "mlir/IR/OperationSupport.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Support/Debug.h"
@@ -178,8 +180,9 @@ namespace {
/// assignment evaluation.
class Scheduler {
public:
- Scheduler(bool tryFusingAssignments)
- : tryFusingAssignments{tryFusingAssignments} {}
+ Scheduler(hlfir::OrderedAssignmentTreeOpInterface root,
+ bool tryFusingAssignments)
+ : root{root}, tryFusingAssignments{tryFusingAssignments} {}
/// Start scheduling an assignment. Gather the write side effect from the
/// assignment.
@@ -245,6 +248,15 @@ class Scheduler {
/// effects conflicting with the previous run.
bool canFuseAssignmentWithPreviousRun();
+ /// Tell if \p v1 and \p v2 are guaranteed to evaluate to the same value at
+ /// runtime, used by the ArraySectionAnalyzer to recognize identical
+ /// sections whose subscripts are not the same SSA value (e.g. when CSE
+ /// could not merge loads across the LHS and RHS regions of a WHERE).
+ bool haveTheSameValue(mlir::Value v1, mlir::Value v2);
+
+ /// Root of the ordered assignment tree being scheduled.
+ hlfir::OrderedAssignmentTreeOpInterface root;
+
/// Memory effects of the assignments being lowered.
llvm::SmallVector<hlfir::DetailedEffectInstance> assignEffects;
/// Memory effects of the evaluations implied by the assignments
@@ -514,10 +526,12 @@ struct ConflictKind {
/// Could there be any read or write in effectsA on a variable written to in
/// effectsB?
-static ConflictKind
-anyRAWorWAW(llvm::ArrayRef<hlfir::DetailedEffectInstance> effectsA,
- llvm::ArrayRef<hlfir::DetailedEffectInstance> effectsB,
- fir::AliasAnalysis &aliasAnalysis) {
+static ConflictKind anyRAWorWAW(
+ llvm::ArrayRef<hlfir::DetailedEffectInstance> effectsA,
+ llvm::ArrayRef<hlfir::DetailedEffectInstance> effectsB,
+ fir::AliasAnalysis &aliasAnalysis,
+ fir::ArraySectionAnalyzer::ValueEquivalenceCallback areKnownEquivalent =
+ nullptr) {
ConflictKind result = ConflictKind::none();
for (const auto &effectB : effectsB)
if (mlir::isa<mlir::MemoryEffects::Write>(effectB.getEffect())) {
@@ -544,7 +558,8 @@ anyRAWorWAW(llvm::ArrayRef<hlfir::DetailedEffectInstance> effectsA,
writtenVarB, /*isAligned=*/true));
continue;
}
- auto overlap = fir::ArraySectionAnalyzer::analyze(arrayA, arrayB);
+ auto overlap = fir::ArraySectionAnalyzer::analyze(
+ arrayA, arrayB, areKnownEquivalent);
if (overlap == fir::ArraySectionAnalyzer::SlicesOverlapKind::
DefinitelyDisjoint)
continue;
@@ -572,15 +587,19 @@ anyRAWorWAW(llvm::ArrayRef<hlfir::DetailedEffectInstance> effectsA,
/// Could there be any read or write in effectsA on a variable written to in
/// effectsB, or any read in effectsB on a variable written to in effectsA?
-static ConflictKind
-conflict(llvm::ArrayRef<hlfir::DetailedEffectInstance> effectsA,
- llvm::ArrayRef<hlfir::DetailedEffectInstance> effectsB) {
+static ConflictKind conflict(
+ llvm::ArrayRef<hlfir::DetailedEffectInstance> effectsA,
+ llvm::ArrayRef<hlfir::DetailedEffectInstance> effectsB,
+ fir::ArraySectionAnalyzer::ValueEquivalenceCallback areKnownEquivalent =
+ nullptr) {
fir::AliasAnalysis aliasAnalysis;
// (RAW || WAW) || (WAR || WAW).
- ConflictKind result = anyRAWorWAW(effectsA, effectsB, aliasAnalysis);
+ ConflictKind result =
+ anyRAWorWAW(effectsA, effectsB, aliasAnalysis, areKnownEquivalent);
if (result.isAny())
return result;
- return result || anyRAWorWAW(effectsB, effectsA, aliasAnalysis);
+ return result ||
+ anyRAWorWAW(effectsB, effectsA, aliasAnalysis, areKnownEquivalent);
}
/// Could there be any write effects in "effects" affecting memory storages
@@ -607,6 +626,83 @@ anyNonLocalWrite(llvm::ArrayRef<hlfir::DetailedEffectInstance> effects,
// Scheduling Implementation : Scheduler class implementation
//===----------------------------------------------------------------------===//
+/// Return the closest enclosing hlfir.region_assign of \p op if \p op lives
+/// in its rhs or lhs region, null otherwise.
+static hlfir::RegionAssignOp getOwningRegionAssign(mlir::Operation *op) {
+ auto assign = op->getParentOfType<hlfir::RegionAssignOp>();
+ if (!assign)
+ return nullptr;
+ if (assign.getRhsRegion().findAncestorOpInRegion(*op) ||
+ assign.getLhsRegion().findAncestorOpInRegion(*op))
+ return assign;
+ return nullptr;
+}
+
+/// Two fir.load are considered to evaluate to the same value when:
+/// * They load the same Fortran variable (same memref SSA value coming
+/// from a fir.declare/hlfir.declare defined outside \p root).
+/// * They sit in the rhs/lhs region of the same hlfir.region_assign, where
+/// no intervening write to the variable can occur.
+/// This relies on F2023 10.1.5, which states: "The evaluation of a function
+/// reference shall neither affect nor be affected by the evaluation of any
+/// other entity within the statement". Therefore any variable used in both
+/// the RHS and LHS, or several times on one side, cannot be modified by
+/// evaluation side effects.
+/// The restriction of the fir.declare/hlfir.declare being defined outside of
+/// the root is to ensure this is not an inner variable from a function that
+/// could have been inlined and for which the rule does not apply.
+static bool
+areLoadsKnownEquivalent(fir::LoadOp load1, fir::LoadOp load2,
+ hlfir::OrderedAssignmentTreeOpInterface root) {
+ if (!root)
+ return false;
+ if (load1.getMemref() != load2.getMemref())
+ return false;
+ auto variableOp =
+ load1.getMemref().getDefiningOp<fir::FortranVariableOpInterface>();
+ if (!variableOp || root->isAncestor(variableOp.getOperation()))
+ return false;
+ hlfir::RegionAssignOp assign1 = getOwningRegionAssign(load1);
+ return assign1 && assign1 == getOwningRegionAssign(load2);
+}
+
+/// Tell whether two SSA values are guaranteed to evaluate to the same value
+/// at runtime. This is useful to compare two SSA values used as array indices
+/// in the RHS and LHS in order to prove that the array sections on the LHS
+/// and RHS are identical.
+/// Loop-invariant code motion and CSE cannot always be used before this pass
+/// because index variable references may not be safe to hoist out of masked
+/// evaluations (the variable could be optional, out of bounds, or modified by
+/// previous assignment statements in the same construct).
+bool Scheduler::haveTheSameValue(mlir::Value v1, mlir::Value v2) {
+ if (v1 == v2)
+ return true;
+ if (!v1 || !v2)
+ return false;
+ mlir::Operation *op1 = v1.getDefiningOp();
+ mlir::Operation *op2 = v2.getDefiningOp();
+ if (!op1 || !op2)
+ return false;
+ // Special-case fir.load whose memref is a Fortran variable defined outside
+ // the tree root (loads cannot be matched structurally because they have
+ // memory effects). Could be extended to recurse on hlfir.designate operands
+ // to cover patterns like x(:, vec(j)) = x(:, vec(j)).
+ if (auto load1 = mlir::dyn_cast<fir::LoadOp>(op1))
+ if (auto load2 = mlir::dyn_cast<fir::LoadOp>(op2))
+ if (areLoadsKnownEquivalent(load1, load2, root))
+ return true;
+ if (!mlir::isMemoryEffectFree(op1) || !mlir::isMemoryEffectFree(op2))
+ return false;
+ // Otherwise, structural equivalence of pure ops, recursing through operands.
+ return mlir::OperationEquivalence::isEquivalentTo(
+ op1, op2,
+ [this](mlir::Value a, mlir::Value b) {
+ return mlir::success(haveTheSameValue(a, b));
+ },
+ /*markEquivalent=*/nullptr,
+ mlir::OperationEquivalence::Flags::IgnoreLocations);
+}
+
void Scheduler::startSchedulingAssignment(hlfir::RegionAssignOp assign,
bool leafRegionsMayOnlyRead) {
gatherAssignEffects(assign, leafRegionsMayOnlyRead, assignEffects);
@@ -654,7 +750,10 @@ void Scheduler::saveEvaluationIfConflict(mlir::Region &yieldRegion,
<< "\n";);
saveEvaluation(yieldRegion, effects, /*anyWrite=*/true);
} else {
- ConflictKind conflictKind = conflict(effects, assignEffects);
+ auto sameValue = [&](mlir::Value v1, mlir::Value v2) {
+ return haveTheSameValue(v1, v2);
+ };
+ ConflictKind conflictKind = conflict(effects, assignEffects, sameValue);
if (conflictKind.isAny()) {
// Region that conflicts with the current assignments must be fully
// evaluated and saved before doing the assignment (Note that it may
@@ -667,7 +766,7 @@ void Scheduler::saveEvaluationIfConflict(mlir::Region &yieldRegion,
pendingAlignedRegions.push_back(&yieldRegion);
if (evaluationsMayConflict &&
- !conflict(effects, assignEvaluateEffects).isNone()) {
+ !conflict(effects, assignEvaluateEffects, sameValue).isNone()) {
// If evaluations of the assignment may conflict with the yield
// evaluations, we have to save yield evaluation.
// For example, a WHERE mask might be written by the masked assignment
@@ -868,7 +967,7 @@ hlfir::buildEvaluationSchedule(hlfir::OrderedAssignmentTreeOpInterface root,
mlir::isa<hlfir::ForallOp>(root.getOperation());
// Loop through the assignments and schedule them.
- Scheduler scheduler(tryFusingAssignments);
+ Scheduler scheduler(root, tryFusingAssignments);
llvm::SmallVector<hlfir::RegionAssignOp> assignments;
gatherAssignments(root, assignments);
for (hlfir::RegionAssignOp assign : assignments) {
diff --git a/flang/test/HLFIR/order_assignments/where-equivalent-subscripts.fir b/flang/test/HLFIR/order_assignments/where-equivalent-subscripts.fir
new file mode 100644
index 0000000000000..0d1cd7bf64ece
--- /dev/null
+++ b/flang/test/HLFIR/order_assignments/where-equivalent-subscripts.fir
@@ -0,0 +1,143 @@
+// Test scheduling of WHERE assignments where the LHS and RHS subscript
+// indices are loads of the same scalar variable that is declared outside
+// the WHERE construct. The two loads have
diff erent SSA values (CSE cannot
+// merge them across the LHS and RHS regions), but the scheduling analysis
+// recognizes them as equivalent values via the ArraySectionAnalyzer
+// ValueEquivalenceCallback, so the conflict between the LHS and RHS array
+// sections is "aligned" and no temporary needs to be saved for the RHS.
+//
+// RUN: fir-opt %s --lower-hlfir-ordered-assignments \
+// RUN: --debug-only=flang-ordered-assignment \
+// RUN: -flang-dbg-order-assignment-schedule-only 2>&1 | FileCheck %s
+//
+// REQUIRES: asserts
+
+// Positive case: same scalar `i` loaded in the LHS and RHS yield regions of
+// the same hlfir.region_assign. The hlfir.declare for `i` is outside the
+// hlfir.where, so the analysis can prove that the two fir.load operations
+// produce the same value.
+func.func @test_same_index(%arg_var: !fir.box<!fir.array<?x?xf32>>,
+ %arg_i: !fir.ref<i32>,
+ %arg_mask: !fir.box<!fir.array<?x!fir.logical<4>>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %dscope = fir.dummy_scope : !fir.dscope
+ %var:2 = hlfir.declare %arg_var dummy_scope %dscope {uniq_name = "_QFtest_same_indexEvar"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+ %i:2 = hlfir.declare %arg_i dummy_scope %dscope {uniq_name = "_QFtest_same_indexEi"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %mask:2 = hlfir.declare %arg_mask dummy_scope %dscope {uniq_name = "_QFtest_same_indexEmask"} : (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+ hlfir.where {
+ hlfir.yield %mask#0 : !fir.box<!fir.array<?x!fir.logical<4>>>
+ } do {
+ hlfir.region_assign {
+ %dims:3 = fir.box_dims %var#1, %c0 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %shape = fir.shape %dims#1 : (index) -> !fir.shape<1>
+ %i_val = fir.load %i#0 : !fir.ref<i32>
+ %i_idx = fir.convert %i_val : (i32) -> i64
+ %slice = hlfir.designate %var#0 (%c1:%dims#1:%c1, %i_idx) shape %shape : (!fir.box<!fir.array<?x?xf32>>, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ hlfir.yield %slice : !fir.box<!fir.array<?xf32>>
+ } to {
+ %dims:3 = fir.box_dims %var#1, %c0 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %shape = fir.shape %dims#1 : (index) -> !fir.shape<1>
+ %i_val = fir.load %i#0 : !fir.ref<i32>
+ %i_idx = fir.convert %i_val : (i32) -> i64
+ %slice = hlfir.designate %var#0 (%c1:%dims#1:%c1, %i_idx) shape %shape : (!fir.box<!fir.array<?x?xf32>>, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ hlfir.yield %slice : !fir.box<!fir.array<?xf32>>
+ }
+ }
+ return
+}
+
+// CHECK-LABEL: scheduling where in test_same_index ------------
+// CHECK: conflict (aligned)
+// CHECK: run 1 evaluate: where/region_assign1
+// CHECK-NOT: save
+// CHECK-NOT: run {{[2-9]}}
+
+// Positive case with an arith expression on the index. The recursion through
+// mlir::OperationEquivalence::isEquivalentTo matches the two arith.addi ops
+// (one in the LHS region, one in the RHS region), then their %i_val operands
+// via the ValueEquivalenceCallback's load-equivalence rule.
+func.func @test_same_index_expr(%arg_var: !fir.box<!fir.array<?x?xf32>>,
+ %arg_i: !fir.ref<i32>,
+ %arg_mask: !fir.box<!fir.array<?x!fir.logical<4>>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c1_i32 = arith.constant 1 : i32
+ %dscope = fir.dummy_scope : !fir.dscope
+ %var:2 = hlfir.declare %arg_var dummy_scope %dscope {uniq_name = "_QFtest_same_index_exprEvar"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+ %i:2 = hlfir.declare %arg_i dummy_scope %dscope {uniq_name = "_QFtest_same_index_exprEi"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %mask:2 = hlfir.declare %arg_mask dummy_scope %dscope {uniq_name = "_QFtest_same_index_exprEmask"} : (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+ hlfir.where {
+ hlfir.yield %mask#0 : !fir.box<!fir.array<?x!fir.logical<4>>>
+ } do {
+ hlfir.region_assign {
+ %dims:3 = fir.box_dims %var#1, %c0 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %shape = fir.shape %dims#1 : (index) -> !fir.shape<1>
+ %i_val = fir.load %i#0 : !fir.ref<i32>
+ %i_plus = arith.addi %i_val, %c1_i32 : i32
+ %i_idx = fir.convert %i_plus : (i32) -> i64
+ %slice = hlfir.designate %var#0 (%c1:%dims#1:%c1, %i_idx) shape %shape : (!fir.box<!fir.array<?x?xf32>>, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ hlfir.yield %slice : !fir.box<!fir.array<?xf32>>
+ } to {
+ %dims:3 = fir.box_dims %var#1, %c0 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %shape = fir.shape %dims#1 : (index) -> !fir.shape<1>
+ %i_val = fir.load %i#0 : !fir.ref<i32>
+ %i_plus = arith.addi %i_val, %c1_i32 : i32
+ %i_idx = fir.convert %i_plus : (i32) -> i64
+ %slice = hlfir.designate %var#0 (%c1:%dims#1:%c1, %i_idx) shape %shape : (!fir.box<!fir.array<?x?xf32>>, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ hlfir.yield %slice : !fir.box<!fir.array<?xf32>>
+ }
+ }
+ return
+}
+
+// CHECK-LABEL: scheduling where in test_same_index_expr ------------
+// CHECK: conflict (aligned)
+// CHECK: run 1 evaluate: where/region_assign1
+// CHECK-NOT: save
+// CHECK-NOT: run {{[2-9]}}
+
+// Negative case: the triplets actually
diff er between the LHS and the RHS
+// (the RHS reads from a shifted lower bound). The ValueEquivalenceCallback
+// can match the loads of `i`, but the section triplets are not identical
+// (
diff erent `lb`), so the analysis still has to fall back to "Unknown"
+// overlap, which forces saving the RHS to a temporary. This confirms the
+// callback only relaxes equality, never strengthens it.
+func.func @test_disjoint_section(%arg_var: !fir.box<!fir.array<?x?xf32>>,
+ %arg_i: !fir.ref<i32>,
+ %arg_mask: !fir.box<!fir.array<?x!fir.logical<4>>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c2 = arith.constant 2 : index
+ %dscope = fir.dummy_scope : !fir.dscope
+ %var:2 = hlfir.declare %arg_var dummy_scope %dscope {uniq_name = "_QFtest_disjoint_sectionEvar"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+ %i:2 = hlfir.declare %arg_i dummy_scope %dscope {uniq_name = "_QFtest_disjoint_sectionEi"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %mask:2 = hlfir.declare %arg_mask dummy_scope %dscope {uniq_name = "_QFtest_disjoint_sectionEmask"} : (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.dscope) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+ hlfir.where {
+ hlfir.yield %mask#0 : !fir.box<!fir.array<?x!fir.logical<4>>>
+ } do {
+ hlfir.region_assign {
+ %dims:3 = fir.box_dims %var#1, %c0 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %shape = fir.shape %dims#1 : (index) -> !fir.shape<1>
+ %i_val = fir.load %i#0 : !fir.ref<i32>
+ %i_idx = fir.convert %i_val : (i32) -> i64
+ // RHS section: var(2:dims, i) - lower bound
diff ers from LHS.
+ %slice = hlfir.designate %var#0 (%c2:%dims#1:%c1, %i_idx) shape %shape : (!fir.box<!fir.array<?x?xf32>>, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ hlfir.yield %slice : !fir.box<!fir.array<?xf32>>
+ } to {
+ %dims:3 = fir.box_dims %var#1, %c0 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %shape = fir.shape %dims#1 : (index) -> !fir.shape<1>
+ %i_val = fir.load %i#0 : !fir.ref<i32>
+ %i_idx = fir.convert %i_val : (i32) -> i64
+ // LHS section: var(1:dims, i).
+ %slice = hlfir.designate %var#0 (%c1:%dims#1:%c1, %i_idx) shape %shape : (!fir.box<!fir.array<?x?xf32>>, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ hlfir.yield %slice : !fir.box<!fir.array<?xf32>>
+ }
+ }
+ return
+}
+
+// CHECK-LABEL: scheduling where in test_disjoint_section ------------
+// CHECK: conflicting arrays:
+// CHECK: run {{[0-9]+}} save {{.*}}: where/region_assign1/rhs
+// CHECK: run {{[0-9]+}} evaluate: where/region_assign1
More information about the flang-commits
mailing list