[flang-commits] [flang] 54aa928 - [flang] Undo the effects of CSE for hlfir.exactly_once. (#140190)
via flang-commits
flang-commits at lists.llvm.org
Tue May 20 09:22:09 PDT 2025
Author: Slava Zakharin
Date: 2025-05-20T09:22:05-07:00
New Revision: 54aa9282edb5a3abe625893a63018bb75dc5c541
URL: https://github.com/llvm/llvm-project/commit/54aa9282edb5a3abe625893a63018bb75dc5c541
DIFF: https://github.com/llvm/llvm-project/commit/54aa9282edb5a3abe625893a63018bb75dc5c541.diff
LOG: [flang] Undo the effects of CSE for hlfir.exactly_once. (#140190)
CSE may delete operations from hlfir.exactly_once and reuse
the equivalent results from the parent region(s), e.g. from the parent
hlfir.region_assign. This makes it problematic to clone
hlfir.exactly_once before the top-level hlfir.where.
This patch adds a "canonicalizer" that pulls in such operations
back into hlfir.exactly_once.
Added:
flang/test/HLFIR/order_assignments/where-after-cse.fir
Modified:
flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
Removed:
################################################################################
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
index 5cae7cf443c86..89b5ccb7d850e 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
@@ -24,12 +24,15 @@
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
#include "flang/Optimizer/HLFIR/Passes.h"
+#include "mlir/Analysis/Liveness.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/Debug.h"
+#include <unordered_set>
namespace hlfir {
#define GEN_PASS_DEF_LOWERHLFIRORDEREDASSIGNMENTS
@@ -263,6 +266,19 @@ class OrderedAssignmentRewriter {
return &inserted.first->second;
}
+ /// Given a top-level hlfir.where, look for hlfir.exactly_once operations
+ /// inside it and see if any of the values live into hlfir.exactly_once
+ /// do not dominate hlfir.where. This may happen due to CSE reusing
+ /// results of operations from the region parent to hlfir.exactly_once.
+ /// Since we are going to clone the body of hlfir.exactly_once before
+ /// the top-level hlfir.where, such def-use will cause problems.
+ /// There are options how to resolve this in a
diff erent way,
+ /// e.g. making hlfir.exactly_once IsolatedFromAbove or making
+ /// it a region of hlfir.where and wiring the result(s) through
+ /// the block arguments. For the time being, this canonicalization
+ /// tries to undo the effects of CSE.
+ void canonicalizeExactlyOnceInsideWhere(hlfir::WhereOp whereOp);
+
fir::FirOpBuilder &builder;
/// Map containing the mapping between the original order assignment tree
@@ -523,6 +539,10 @@ void OrderedAssignmentRewriter::generateMaskIfOp(mlir::Value cdt) {
void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) {
mlir::Location loc = whereOp.getLoc();
if (!whereLoopNest) {
+ // Make sure liveness information is valid for the inner hlfir.exactly_once
+ // operations, and their bodies can be cloned before the top-level
+ // hlfir.where.
+ canonicalizeExactlyOnceInsideWhere(whereOp);
// This is the top-level WHERE. Start a loop nest iterating on the shape of
// the where mask.
if (auto maybeSaved = getIfSaved(whereOp.getMaskRegion())) {
@@ -1350,6 +1370,105 @@ void OrderedAssignmentRewriter::saveLeftHandSide(
}
}
+void OrderedAssignmentRewriter::canonicalizeExactlyOnceInsideWhere(
+ hlfir::WhereOp whereOp) {
+ auto getDefinition = [](mlir::Value v) {
+ mlir::Operation *op = v.getDefiningOp();
+ bool isValid = true;
+ if (!op) {
+ LLVM_DEBUG(
+ llvm::dbgs()
+ << "Value live into hlfir.exactly_once has no defining operation: "
+ << v << "\n");
+ isValid = false;
+ }
+ if (op->getNumRegions() != 0) {
+ LLVM_DEBUG(
+ llvm::dbgs()
+ << "Cannot pull an operation with regions into hlfir.exactly_once"
+ << *op << "\n");
+ isValid = false;
+ }
+ auto effects = mlir::getEffectsRecursively(op);
+ if (!effects || !effects->empty()) {
+ LLVM_DEBUG(llvm::dbgs() << "Side effects on operation with result live "
+ "into hlfir.exactly_once"
+ << *op << "\n");
+ isValid = false;
+ }
+ assert(isValid && "invalid live-in");
+ return op;
+ };
+ mlir::Liveness liveness(whereOp.getOperation());
+ whereOp->walk([&](hlfir::ExactlyOnceOp op) {
+ std::unordered_set<mlir::Operation *> liveInSet;
+ LLVM_DEBUG(llvm::dbgs() << "Canonicalizing:\n" << op << "\n");
+ auto &liveIns = liveness.getLiveIn(&op.getBody().front());
+ if (liveIns.empty())
+ return;
+ // Note that the liveIns set is not ordered.
+ for (mlir::Value liveIn : liveIns) {
+ if (!dominanceInfo.properlyDominates(liveIn, whereOp)) {
+ LLVM_DEBUG(llvm::dbgs()
+ << "Does not dominate top-level where: " << liveIn << "\n");
+ liveInSet.insert(getDefinition(liveIn));
+ }
+ }
+
+ // Populate the set of operations that we need to pull into
+ // hlfir.exactly_once, so that the only live-ins left are the ones
+ // that dominate whereOp.
+ std::unordered_set<mlir::Operation *> cloneSet(liveInSet);
+ llvm::SmallVector<mlir::Operation *> workList(cloneSet.begin(),
+ cloneSet.end());
+ while (!workList.empty()) {
+ mlir::Operation *current = workList.pop_back_val();
+ for (mlir::Value operand : current->getOperands()) {
+ if (dominanceInfo.properlyDominates(operand, whereOp))
+ continue;
+ mlir::Operation *def = getDefinition(operand);
+ if (cloneSet.count(def))
+ continue;
+ cloneSet.insert(def);
+ workList.push_back(def);
+ }
+ }
+
+ // Sort the operations by dominance. This preserves their order
+ // after the cloning, and also guarantees stable IR generation.
+ llvm::SmallVector<mlir::Operation *> cloneList(cloneSet.begin(),
+ cloneSet.end());
+ llvm::sort(cloneList, [&](mlir::Operation *L, mlir::Operation *R) {
+ return dominanceInfo.properlyDominates(L, R);
+ });
+
+ // Clone the operations.
+ mlir::IRMapping mapper;
+ mlir::Operation::CloneOptions options;
+ options.cloneOperands();
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ builder.setInsertionPointToStart(&op.getBody().front());
+
+ for (auto *toClone : cloneList) {
+ LLVM_DEBUG(llvm::dbgs() << "Cloning: " << *toClone << "\n");
+ builder.insert(toClone->clone(mapper, options));
+ }
+ for (mlir::Operation *oldOps : liveInSet)
+ for (mlir::Value oldVal : oldOps->getResults()) {
+ mlir::Value newVal = mapper.lookup(oldVal);
+ if (!newVal) {
+ LLVM_DEBUG(llvm::dbgs() << "No clone found for: " << oldVal << "\n");
+ assert(false && "missing clone");
+ }
+ mlir::replaceAllUsesInRegionWith(oldVal, newVal, op.getBody());
+ }
+
+ LLVM_DEBUG(llvm::dbgs() << "Finished canonicalization\n");
+ if (!liveInSet.empty())
+ LLVM_DEBUG(llvm::dbgs() << op << "\n");
+ });
+}
+
/// Lower an ordered assignment tree to fir.do_loop and hlfir.assign given
/// a schedule.
static void lower(hlfir::OrderedAssignmentTreeOpInterface root,
diff --git a/flang/test/HLFIR/order_assignments/where-after-cse.fir b/flang/test/HLFIR/order_assignments/where-after-cse.fir
new file mode 100644
index 0000000000000..4505c879c7b0f
--- /dev/null
+++ b/flang/test/HLFIR/order_assignments/where-after-cse.fir
@@ -0,0 +1,254 @@
+// Test canonicalization of hlfir.exactly_once operations
+// after CSE. The live-in values that are not dominating
+// the top-level hlfir.where must be cloned inside hlfir.exactly_once,
+// otherwise, the cloning of the hlfir.exactly_once before hlfir.where
+// would cause def-use issues:
+// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s
+
+// Simple case, where CSE makes only hlfir.designate live-in:
+// CHECK-LABEL: func.func @_QPtest1(
+func.func @_QPtest1(%arg0: !fir.ref<!fir.type<_QMmy_moduleTtt{p1:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>,p2:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>> {fir.bindc_name = "x"}) {
+ %true = arith.constant true
+ %cst = arith.constant 0.000000e+00 : f32
+ %c1 = arith.constant 1 : index
+ %c0 = arith.constant 0 : index
+ %0 = fir.dummy_scope : !fir.dscope
+ %1:2 = hlfir.declare %arg0 dummy_scope %0 {fortran_attrs = #fir.var_attrs<intent_inout>, uniq_name = "_QFtest1Ex"} : (!fir.ref<!fir.type<_QMmy_moduleTtt{p1:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>,p2:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>, !fir.dscope) -> (!fir.ref<!fir.type<_QMmy_moduleTtt{p1:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>,p2:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>, !fir.ref<!fir.type<_QMmy_moduleTtt{p1:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>,p2:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>)
+ hlfir.where {
+ %2 = hlfir.designate %1#0{"p2"} {fortran_attrs = #fir.var_attrs<pointer>} : (!fir.ref<!fir.type<_QMmy_moduleTtt{p1:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>,p2:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
+ %3 = fir.load %2 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
+ %4:3 = fir.box_dims %3, %c0 : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index) -> (index, index, index)
+ %5 = arith.addi %4#0, %4#1 : index
+ %6 = arith.subi %5, %c1 : index
+ %7 = arith.subi %6, %4#0 : index
+ %8 = arith.addi %7, %c1 : index
+ %9 = arith.cmpi sgt, %8, %c0 : index
+ %10 = arith.select %9, %8, %c0 : index
+ %11 = fir.shape %10 : (index) -> !fir.shape<1>
+ %12 = hlfir.designate %3 (%4#0:%6:%c1) shape %11 : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ %13 = hlfir.elemental %11 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+ ^bb0(%arg1: index):
+ %14 = hlfir.designate %12 (%arg1) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ %15 = fir.load %14 : !fir.ref<f32>
+ %16 = arith.cmpf ogt, %15, %cst fastmath<contract> : f32
+ %17 = fir.convert %16 : (i1) -> !fir.logical<4>
+ hlfir.yield_element %17 : !fir.logical<4>
+ }
+ hlfir.yield %13 : !hlfir.expr<?x!fir.logical<4>> cleanup {
+ hlfir.destroy %13 : !hlfir.expr<?x!fir.logical<4>>
+ }
+ } do {
+ hlfir.region_assign {
+ %2 = hlfir.designate %1#0{"p1"} {fortran_attrs = #fir.var_attrs<pointer>} : (!fir.ref<!fir.type<_QMmy_moduleTtt{p1:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>,p2:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
+ %3 = fir.load %2 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
+ %4:3 = fir.box_dims %3, %c0 : (!fir.box<!fir.ptr<!fir.array<?x?xf32>>>, index) -> (index, index, index)
+ %5 = arith.addi %4#0, %4#1 : index
+ %6 = arith.subi %5, %c1 : index
+ %7 = arith.subi %6, %4#0 : index
+ %8 = arith.addi %7, %c1 : index
+ %9 = arith.cmpi sgt, %8, %c0 : index
+ %10 = arith.select %9, %8, %c0 : index
+ %11 = fir.shape %10 : (index) -> !fir.shape<1>
+ %12 = hlfir.designate %3 (%4#0:%6:%c1, %c1) shape %11 : (!fir.box<!fir.ptr<!fir.array<?x?xf32>>>, index, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ %13 = hlfir.exactly_once : !hlfir.expr<?xf32> {
+// CHECK: %[[VAL_26:.*]] = hlfir.designate %{{.*}}#0{"p1"} {fortran_attrs = #fir.var_attrs<pointer>} : (!fir.ref<!fir.type<_QMmy_moduleTtt{p1:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>,p2:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
+// CHECK: fir.load %[[VAL_26]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
+// CHECK: %[[VAL_47:.*]] = fir.call @_QPcallee(%{{.*}}) fastmath<contract> : (!fir.box<!fir.array<?xf32>>) -> !fir.array<?xf32>
+// CHECK: fir.do_loop
+ %15 = fir.load %2 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
+ %16:3 = fir.box_dims %15, %c0 : (!fir.box<!fir.ptr<!fir.array<?x?xf32>>>, index) -> (index, index, index)
+ %17 = arith.addi %16#0, %16#1 : index
+ %18 = arith.subi %17, %c1 : index
+ %19 = arith.subi %18, %16#0 : index
+ %20 = arith.addi %19, %c1 : index
+ %21 = arith.cmpi sgt, %20, %c0 : index
+ %22 = arith.select %21, %20, %c0 : index
+ %23 = fir.shape %22 : (index) -> !fir.shape<1>
+ %24 = hlfir.designate %15 (%16#0:%18:%c1, %c1) shape %23 : (!fir.box<!fir.ptr<!fir.array<?x?xf32>>>, index, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ %25:2 = hlfir.declare %24 {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QMmy_moduleFcalleeEx"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+ %26:3 = fir.box_dims %25#0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+ %27 = fir.convert %26#1 : (index) -> i64
+ %28 = fir.convert %27 : (i64) -> index
+ %29 = arith.cmpi sgt, %28, %c0 : index
+ %30 = arith.select %29, %28, %c0 : index
+ %31 = fir.shape %30 : (index) -> !fir.shape<1>
+ %32 = fir.allocmem !fir.array<?xf32>, %30 {bindc_name = ".tmp.expr_result", uniq_name = ""}
+ %33 = fir.convert %32 : (!fir.heap<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+ %34:2 = hlfir.declare %33(%31) {uniq_name = ".tmp.expr_result"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+ %35 = fir.call @_QPcallee(%24) fastmath<contract> : (!fir.box<!fir.array<?xf32>>) -> !fir.array<?xf32>
+ fir.save_result %35 to %34#1(%31) : !fir.array<?xf32>, !fir.ref<!fir.array<?xf32>>, !fir.shape<1>
+ %36 = hlfir.as_expr %34#0 move %true : (!fir.box<!fir.array<?xf32>>, i1) -> !hlfir.expr<?xf32>
+ hlfir.yield %36 : !hlfir.expr<?xf32> cleanup {
+ hlfir.destroy %36 : !hlfir.expr<?xf32>
+ }
+ }
+ %14 = hlfir.elemental %11 unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
+ ^bb0(%arg1: index):
+ %15 = hlfir.designate %12 (%arg1) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ %16 = hlfir.apply %13, %arg1 : (!hlfir.expr<?xf32>, index) -> f32
+ %17 = fir.load %15 : !fir.ref<f32>
+ %18 = arith.divf %17, %16 fastmath<contract> : f32
+ hlfir.yield_element %18 : f32
+ }
+ hlfir.yield %14 : !hlfir.expr<?xf32> cleanup {
+ hlfir.destroy %14 : !hlfir.expr<?xf32>
+ }
+ } to {
+ %2 = hlfir.designate %1#0{"p2"} {fortran_attrs = #fir.var_attrs<pointer>} : (!fir.ref<!fir.type<_QMmy_moduleTtt{p1:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>,p2:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
+ %3 = fir.load %2 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
+ %4:3 = fir.box_dims %3, %c0 : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index) -> (index, index, index)
+ %5 = arith.addi %4#0, %4#1 : index
+ %6 = arith.subi %5, %c1 : index
+ %7 = arith.subi %6, %4#0 : index
+ %8 = arith.addi %7, %c1 : index
+ %9 = arith.cmpi sgt, %8, %c0 : index
+ %10 = arith.select %9, %8, %c0 : index
+ %11 = fir.shape %10 : (index) -> !fir.shape<1>
+ %12 = hlfir.designate %3 (%4#0:%6:%c1) shape %11 : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ hlfir.yield %12 : !fir.box<!fir.array<?xf32>>
+ }
+ }
+ return
+}
+
+// CSE makes a chain of operations live-in:
+// CHECK-LABEL: func.func @_QPtest_where_in_forall(
+func.func @_QPtest_where_in_forall(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "b"}, %arg2: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "c"}) {
+ %false = arith.constant false
+ %c1_i32 = arith.constant 1 : i32
+ %c10_i32 = arith.constant 10 : i32
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c2_i32 = arith.constant 2 : i32
+ %c100 = arith.constant 100 : index
+ %0 = fir.alloca !fir.array<100x!fir.logical<4>> {bindc_name = ".tmp.expr_result"}
+ %1 = fir.alloca !fir.array<100x!fir.logical<4>> {bindc_name = ".tmp.expr_result"}
+ %2 = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %{{.*}} {uniq_name = "_QFtest_where_in_forallEb"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+ %3:2 = hlfir.declare %arg0 dummy_scope %2 {uniq_name = "_QFtest_where_in_forallEa"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+ %4:2 = hlfir.declare %arg1 dummy_scope %2 {uniq_name = "_QFtest_where_in_forallEb"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+ %5:2 = hlfir.declare %arg2 dummy_scope %2 {uniq_name = "_QFtest_where_in_forallEc"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+ hlfir.forall lb {
+ hlfir.yield %c1_i32 : i32
+ } ub {
+ hlfir.yield %c10_i32 : i32
+ } (%arg3: i32) {
+ hlfir.where {
+ %6 = fir.shape %c100 : (index) -> !fir.shape<1>
+ %7:2 = hlfir.declare %0(%6) {uniq_name = ".tmp.expr_result"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+ %8 = fir.call @_QPpure_logical_func1() proc_attrs<pure> fastmath<contract> : () -> !fir.array<100x!fir.logical<4>>
+ fir.save_result %8 to %7#1(%6) : !fir.array<100x!fir.logical<4>>, !fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>
+ %9 = hlfir.as_expr %7#0 move %false : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i1) -> !hlfir.expr<100x!fir.logical<4>>
+ hlfir.yield %9 : !hlfir.expr<100x!fir.logical<4>> cleanup {
+ hlfir.destroy %9 : !hlfir.expr<100x!fir.logical<4>>
+ }
+ } do {
+ hlfir.region_assign {
+ %6 = fir.convert %arg3 : (i32) -> i64
+// CHECK: %[[VAL_58:.*]]:3 = fir.box_dims %[[VAL_21]]#1, %{{.*}} : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+// CHECK: %[[VAL_59:.*]] = arith.cmpi sgt, %[[VAL_58]]#1, %{{.*}} : index
+// CHECK: %[[VAL_60:.*]] = arith.select %[[VAL_59]], %[[VAL_58]]#1, %{{.*}} : index
+// CHECK: %[[VAL_61:.*]] = fir.shape %[[VAL_60]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_62:.*]] = hlfir.designate %[[VAL_21]]#0 (%{{.*}}, %{{.*}}:%[[VAL_58]]#1:%{{.*}}) shape %[[VAL_61]] : (!fir.box<!fir.array<?x?xf32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ %7:3 = fir.box_dims %4#1, %c1 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %8 = arith.cmpi sgt, %7#1, %c0 : index
+ %9 = arith.select %8, %7#1, %c0 : index
+ %10 = fir.shape %9 : (index) -> !fir.shape<1>
+ %11 = hlfir.designate %4#0 (%6, %c1:%7#1:%c1) shape %10 : (!fir.box<!fir.array<?x?xf32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ %12 = hlfir.exactly_once : f32 {
+ %19:3 = fir.box_dims %3#1, %c1 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %20 = arith.cmpi sgt, %19#1, %c0 : index
+ %21 = arith.select %20, %19#1, %c0 : index
+ %22 = fir.shape %21 : (index) -> !fir.shape<1>
+ %23 = hlfir.designate %3#0 (%6, %c1:%19#1:%c1) shape %22 : (!fir.box<!fir.array<?x?xf32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+// CHECK: %[[VAL_68:.*]] = fir.call @_QPpure_real_func2() fastmath<contract> : () -> f32
+// CHECK: %[[VAL_69:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
+// CHECK: ^bb0(%[[VAL_70:.*]]: index):
+// CHECK: %[[VAL_72:.*]] = hlfir.designate %[[VAL_62]] (%[[VAL_70]]) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ %24 = fir.call @_QPpure_real_func2() fastmath<contract> : () -> f32
+ %25 = hlfir.elemental %22 unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
+ ^bb0(%arg4: index):
+ %28 = hlfir.designate %23 (%arg4) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ %29 = hlfir.designate %11 (%arg4) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ %30 = fir.load %28 : !fir.ref<f32>
+ %31 = fir.load %29 : !fir.ref<f32>
+ %32 = arith.addf %30, %31 fastmath<contract> : f32
+ %33 = arith.addf %32, %24 fastmath<contract> : f32
+ hlfir.yield_element %33 : f32
+ }
+ %26:3 = hlfir.associate %25(%22) {adapt.valuebyref} : (!hlfir.expr<?xf32>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>, i1)
+ %27 = fir.call @_QPpure_real_func(%26#1) fastmath<contract> : (!fir.ref<!fir.array<?xf32>>) -> f32
+ hlfir.yield %27 : f32 cleanup {
+ hlfir.end_associate %26#1, %26#2 : !fir.ref<!fir.array<?xf32>>, i1
+ hlfir.destroy %25 : !hlfir.expr<?xf32>
+ }
+ }
+ %13:3 = fir.box_dims %3#1, %c1 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %14 = arith.cmpi sgt, %13#1, %c0 : index
+ %15 = arith.select %14, %13#1, %c0 : index
+ %16 = fir.shape %15 : (index) -> !fir.shape<1>
+ %17 = hlfir.designate %3#0 (%6, %c1:%13#1:%c1) shape %16 : (!fir.box<!fir.array<?x?xf32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ %18 = hlfir.elemental %10 unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
+ ^bb0(%arg4: index):
+ %19 = hlfir.designate %11 (%arg4) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ %20 = fir.load %19 : !fir.ref<f32>
+ %21 = arith.addf %20, %12 fastmath<contract> : f32
+ %22 = hlfir.designate %17 (%arg4) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ %23 = fir.call @_QPpure_elem_func(%22) proc_attrs<elemental, pure> fastmath<contract> : (!fir.ref<f32>) -> f32
+ %24 = arith.addf %21, %23 fastmath<contract> : f32
+ hlfir.yield_element %24 : f32
+ }
+ hlfir.yield %18 : !hlfir.expr<?xf32> cleanup {
+ hlfir.destroy %18 : !hlfir.expr<?xf32>
+ }
+ } to {
+ %6 = arith.muli %arg3, %c2_i32 overflow<nsw> : i32
+ %7 = fir.convert %6 : (i32) -> i64
+ %8:3 = fir.box_dims %3#1, %c1 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %9 = arith.cmpi sgt, %8#1, %c0 : index
+ %10 = arith.select %9, %8#1, %c0 : index
+ %11 = fir.shape %10 : (index) -> !fir.shape<1>
+ %12 = hlfir.designate %3#0 (%7, %c1:%8#1:%c1) shape %11 : (!fir.box<!fir.array<?x?xf32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ hlfir.yield %12 : !fir.box<!fir.array<?xf32>>
+ }
+ hlfir.elsewhere mask {
+ %6 = hlfir.exactly_once : !hlfir.expr<100x!fir.logical<4>> {
+ %7 = fir.shape %c100 : (index) -> !fir.shape<1>
+ %8:2 = hlfir.declare %1(%7) {uniq_name = ".tmp.expr_result"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+ %9 = fir.call @_QPpure_logical_func2() proc_attrs<pure> fastmath<contract> : () -> !fir.array<100x!fir.logical<4>>
+ fir.save_result %9 to %8#1(%7) : !fir.array<100x!fir.logical<4>>, !fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>
+ %10 = hlfir.as_expr %8#0 move %false : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i1) -> !hlfir.expr<100x!fir.logical<4>>
+ hlfir.yield %10 : !hlfir.expr<100x!fir.logical<4>> cleanup {
+ hlfir.destroy %10 : !hlfir.expr<100x!fir.logical<4>>
+ }
+ }
+ hlfir.yield %6 : !hlfir.expr<100x!fir.logical<4>>
+ } do {
+ hlfir.region_assign {
+ %6 = fir.convert %arg3 : (i32) -> i64
+ %7:3 = fir.box_dims %5#1, %c1 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %8 = arith.cmpi sgt, %7#1, %c0 : index
+ %9 = arith.select %8, %7#1, %c0 : index
+ %10 = fir.shape %9 : (index) -> !fir.shape<1>
+ %11 = hlfir.designate %5#0 (%6, %c1:%7#1:%c1) shape %10 : (!fir.box<!fir.array<?x?xf32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ hlfir.yield %11 : !fir.box<!fir.array<?xf32>>
+ } to {
+ %6 = arith.muli %arg3, %c2_i32 overflow<nsw> : i32
+ %7 = fir.convert %6 : (i32) -> i64
+ %8 = hlfir.exactly_once : i32 {
+ %14 = fir.call @_QPpure_ifoo() proc_attrs<pure> fastmath<contract> : () -> i32
+ hlfir.yield %14 : i32 cleanup {
+ }
+ }
+ %9 = fir.convert %8 : (i32) -> index
+ %10 = arith.cmpi sgt, %9, %c0 : index
+ %11 = arith.select %10, %9, %c0 : index
+ %12 = fir.shape %11 : (index) -> !fir.shape<1>
+ %13 = hlfir.designate %3#0 (%7, %c1:%9:%c1) shape %12 : (!fir.box<!fir.array<?x?xf32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ hlfir.yield %13 : !fir.box<!fir.array<?xf32>>
+ }
+ }
+ }
+ }
+ return
+}
More information about the flang-commits
mailing list