[flang-commits] [flang] [flang] Enable loop-versioning for slices. (PR #120344)
Slava Zakharin via flang-commits
flang-commits at lists.llvm.org
Wed Dec 18 11:48:26 PST 2024
https://github.com/vzakhari updated https://github.com/llvm/llvm-project/pull/120344
>From 0f6cb878b67d5b69afa669caaa501f6c544528df Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Tue, 17 Dec 2024 17:16:32 -0800
Subject: [PATCH] [flang] Enable loop-versioning for slices.
Loops resulting from array expressions like array(:,i)
may be versioned for the unit stride of the innermost dimension,
when the initial array is an assumed-shape array (which are contiguous
in many Fortran programs).
This speeds up facerec for about 12% due to further vectorization
of the innermost loop produced for the total SUM reduction.
---
.../Optimizer/Transforms/LoopVersioning.cpp | 116 ++++--
flang/test/Transforms/loop-versioning.fir | 353 ++++++++++++++++++
2 files changed, 442 insertions(+), 27 deletions(-)
diff --git a/flang/lib/Optimizer/Transforms/LoopVersioning.cpp b/flang/lib/Optimizer/Transforms/LoopVersioning.cpp
index adc39861840ab1..b534ec160ce215 100644
--- a/flang/lib/Optimizer/Transforms/LoopVersioning.cpp
+++ b/flang/lib/Optimizer/Transforms/LoopVersioning.cpp
@@ -145,11 +145,45 @@ struct ArgsUsageInLoop {
};
} // namespace
-static fir::SequenceType getAsSequenceType(mlir::Value *v) {
- mlir::Type argTy = fir::unwrapPassByRefType(fir::unwrapRefType(v->getType()));
+static fir::SequenceType getAsSequenceType(mlir::Value v) {
+ mlir::Type argTy = fir::unwrapPassByRefType(fir::unwrapRefType(v.getType()));
return mlir::dyn_cast<fir::SequenceType>(argTy);
}
+/// Return the rank and the element size (in bytes) of the given
+/// value \p v. If it is not an array or the element type is not
+/// supported, then return <0, 0>. Only trivial data types
+/// are currently supported.
+/// When \p isArgument is true, \p v is assumed to be a function
+/// argument. If \p v's type does not look like a type of an assumed
+/// shape array, then the function returns <0, 0>.
+/// When \p isArgument is false, array types with known innermost
+/// dimension are allowed to proceed.
+static std::pair<unsigned, size_t>
+getRankAndElementSize(const fir::KindMapping &kindMap,
+ const mlir::DataLayout &dl, mlir::Value v,
+ bool isArgument = false) {
+ if (auto seqTy = getAsSequenceType(v)) {
+ unsigned rank = seqTy.getDimension();
+ if (rank > 0 &&
+ (!isArgument ||
+ seqTy.getShape()[0] == fir::SequenceType::getUnknownExtent())) {
+ size_t typeSize = 0;
+ mlir::Type elementType = fir::unwrapSeqOrBoxedSeqType(v.getType());
+ if (fir::isa_trivial(elementType)) {
+ auto [eleSize, eleAlign] = fir::getTypeSizeAndAlignmentOrCrash(
+ v.getLoc(), elementType, dl, kindMap);
+ typeSize = llvm::alignTo(eleSize, eleAlign);
+ }
+ if (typeSize)
+ return {rank, typeSize};
+ }
+ }
+
+ LLVM_DEBUG(llvm::dbgs() << "Unsupported rank/type: " << v << '\n');
+ return {0, 0};
+}
+
/// if a value comes from a fir.declare, follow it to the original source,
/// otherwise return the value
static mlir::Value unwrapFirDeclare(mlir::Value val) {
@@ -160,12 +194,48 @@ static mlir::Value unwrapFirDeclare(mlir::Value val) {
return val;
}
+/// Return true, if \p rebox operation keeps the input array
+/// continuous in the innermost dimension, if it is initially continuous
+/// in the innermost dimension.
+static bool reboxPreservesContinuity(fir::ReboxOp rebox) {
+ // If slicing is not involved, then the rebox does not affect
+ // the continuity of the array.
+ auto sliceArg = rebox.getSlice();
+ if (!sliceArg)
+ return true;
+
+ // A slice with step=1 in the innermost dimension preserves
+ // the continuity of the array in the innermost dimension.
+ if (auto sliceOp =
+ mlir::dyn_cast_or_null<fir::SliceOp>(sliceArg.getDefiningOp())) {
+ if (sliceOp.getFields().empty() && sliceOp.getSubstr().empty()) {
+ auto triples = sliceOp.getTriples();
+ if (triples.size() > 2)
+ if (auto innermostStep = fir::getIntIfConstant(triples[2]))
+ if (*innermostStep == 1)
+ return true;
+ }
+
+ LLVM_DEBUG(llvm::dbgs()
+ << "REBOX with slicing may produce non-contiguous array: "
+ << sliceOp << '\n'
+ << rebox << '\n');
+ return false;
+ }
+
+ LLVM_DEBUG(llvm::dbgs() << "REBOX with unknown slice" << sliceArg << '\n'
+ << rebox << '\n');
+ return false;
+}
+
/// if a value comes from a fir.rebox, follow the rebox to the original source,
/// of the value, otherwise return the value
static mlir::Value unwrapReboxOp(mlir::Value val) {
- // don't support reboxes of reboxes
- if (fir::ReboxOp rebox = val.getDefiningOp<fir::ReboxOp>())
+ while (fir::ReboxOp rebox = val.getDefiningOp<fir::ReboxOp>()) {
+ if (!reboxPreservesContinuity(rebox))
+ break;
val = rebox.getBox();
+ }
return val;
}
@@ -257,25 +327,10 @@ void LoopVersioningPass::runOnOperation() {
continue;
}
- if (auto seqTy = getAsSequenceType(&arg)) {
- unsigned rank = seqTy.getDimension();
- if (rank > 0 &&
- seqTy.getShape()[0] == fir::SequenceType::getUnknownExtent()) {
- size_t typeSize = 0;
- mlir::Type elementType = fir::unwrapSeqOrBoxedSeqType(arg.getType());
- if (mlir::isa<mlir::FloatType>(elementType) ||
- mlir::isa<mlir::IntegerType>(elementType) ||
- mlir::isa<mlir::ComplexType>(elementType)) {
- auto [eleSize, eleAlign] = fir::getTypeSizeAndAlignmentOrCrash(
- arg.getLoc(), elementType, *dl, kindMap);
- typeSize = llvm::alignTo(eleSize, eleAlign);
- }
- if (typeSize)
- argsOfInterest.push_back({arg, typeSize, rank, {}});
- else
- LLVM_DEBUG(llvm::dbgs() << "Type not supported\n");
- }
- }
+ auto [rank, typeSize] =
+ getRankAndElementSize(kindMap, *dl, arg, /*isArgument=*/true);
+ if (rank != 0 && typeSize != 0)
+ argsOfInterest.push_back({arg, typeSize, rank, {}});
}
if (argsOfInterest.empty()) {
@@ -326,6 +381,13 @@ void LoopVersioningPass::runOnOperation() {
if (arrayCoor.getSlice())
argsInLoop.cannotTransform.insert(a.arg);
+ // We need to compute the rank and element size
+ // based on the operand, not the original argument,
+ // because array slicing may affect it.
+ std::tie(a.rank, a.size) = getRankAndElementSize(kindMap, *dl, a.arg);
+ if (a.rank == 0 || a.size == 0)
+ argsInLoop.cannotTransform.insert(a.arg);
+
if (argsInLoop.cannotTransform.contains(a.arg)) {
// Remove any previously recorded usage, if any.
argsInLoop.usageInfo.erase(a.arg);
@@ -416,8 +478,8 @@ void LoopVersioningPass::runOnOperation() {
mlir::Location loc = builder.getUnknownLoc();
mlir::IndexType idxTy = builder.getIndexType();
- LLVM_DEBUG(llvm::dbgs() << "Module Before transformation:");
- LLVM_DEBUG(module->dump());
+ LLVM_DEBUG(llvm::dbgs() << "Func Before transformation:\n");
+ LLVM_DEBUG(func->dump());
LLVM_DEBUG(llvm::dbgs() << "loopsOfInterest: " << loopsOfInterest.size()
<< "\n");
@@ -551,8 +613,8 @@ void LoopVersioningPass::runOnOperation() {
}
}
- LLVM_DEBUG(llvm::dbgs() << "After transform:\n");
- LLVM_DEBUG(module->dump());
+ LLVM_DEBUG(llvm::dbgs() << "Func After transform:\n");
+ LLVM_DEBUG(func->dump());
LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
}
diff --git a/flang/test/Transforms/loop-versioning.fir b/flang/test/Transforms/loop-versioning.fir
index 7528d14b3670d5..2f7c439ed3f4e1 100644
--- a/flang/test/Transforms/loop-versioning.fir
+++ b/flang/test/Transforms/loop-versioning.fir
@@ -113,8 +113,10 @@ func.func @sum1dfixed(%arg0: !fir.ref<!fir.array<?xf64>> {fir.bindc_name = "a"},
// CHECK-LABEL: func.func @sum1dfixed(
// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xf64>> {{.*}})
// CHECK: fir.do_loop {{.*}}
+// CHECK-NOT: fir.do_loop
// CHECK: %[[COORD:.*]] = fir.coordinate_of %[[ARG0]], {{.*}}
// CHECK: %{{.*}} = fir.load %[[COORD]]
+// CHECK-NOT: fir.do_loop
// -----
@@ -1641,4 +1643,355 @@ func.func @_QPtest_complex10(%arg0: !fir.box<!fir.array<?x?xcomplex<f80>>> {fir.
// CHECK: } else {
// CHECK: fir.do_loop
+// Test that the loop is not versioned with non-contiguous slices:
+//subroutine test_step2_slice(x, y)
+// real :: x(:,:), y(:,:)
+// do i=1,10
+// x(::2,i) = y(::2,i) + 1.0
+// end do
+//end subroutine
+func.func @_QPtest_step2_slice(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "y"}) {
+ %c10 = arith.constant 10 : index
+ %cst = arith.constant 1.000000e+00 : f32
+ %c2 = arith.constant 2 : index
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_step2_sliceEi"}
+ %2 = fir.declare %1 {uniq_name = "_QFtest_step2_sliceEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %3 = fir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtest_step2_sliceEx"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xf32>>
+ %4 = fir.rebox %3 : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>>
+ %5 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtest_step2_sliceEy"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xf32>>
+ %6 = fir.rebox %5 : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>>
+ %7 = fir.convert %c1 : (index) -> i32
+ %8:2 = fir.do_loop %arg2 = %c1 to %c10 step %c1 iter_args(%arg3 = %7) -> (index, i32) {
+ fir.store %arg3 to %2 : !fir.ref<i32>
+ %9:3 = fir.box_dims %6, %c0 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %10 = arith.addi %9#1, %c1 : index
+ %11 = arith.divsi %10, %c2 : index
+ %12 = arith.cmpi sgt, %11, %c0 : index
+ %13 = arith.select %12, %11, %c0 : index
+ %14 = fir.load %2 : !fir.ref<i32>
+ %15 = fir.convert %14 : (i32) -> i64
+ %16 = fir.undefined index
+ %17 = fir.slice %c1, %9#1, %c2, %15, %16, %16 : (index, index, index, i64, index, index) -> !fir.slice<2>
+ %18 = fir.rebox %6 [%17] : (!fir.box<!fir.array<?x?xf32>>, !fir.slice<2>) -> !fir.box<!fir.array<?xf32>>
+ %19:3 = fir.box_dims %4, %c0 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %20 = fir.slice %c1, %19#1, %c2, %15, %16, %16 : (index, index, index, i64, index, index) -> !fir.slice<2>
+ %21 = fir.rebox %4 [%20] : (!fir.box<!fir.array<?x?xf32>>, !fir.slice<2>) -> !fir.box<!fir.array<?xf32>>
+ fir.do_loop %arg4 = %c1 to %13 step %c1 unordered {
+ %25 = fir.array_coor %18 %arg4 : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ %26 = fir.load %25 : !fir.ref<f32>
+ %27 = arith.addf %26, %cst fastmath<fast> : f32
+ %28 = fir.array_coor %21 %arg4 : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ fir.store %27 to %28 : !fir.ref<f32>
+ }
+ %22 = arith.addi %arg2, %c1 overflow<nsw> : index
+ %23 = fir.load %2 : !fir.ref<i32>
+ %24 = arith.addi %23, %7 overflow<nsw> : i32
+ fir.result %22, %24 : index, i32
+ }
+ fir.store %8#1 to %2 : !fir.ref<i32>
+ return
+}
+// CHECK-LABEL: func.func @_QPtest_step2_slice(
+// CHECK-NOT: fir.if
+
+// Test that the loop is versioned with most probably
+// contiguous slices:
+//subroutine test_step1_slice(x, y)
+// real :: x(:,:), y(:,:)
+// do i=1,10
+// x(:,i) = y(:,i) + 1.0
+// end do
+//end subroutine
+func.func @_QPtest_step1_slice(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "y"}) {
+ %c10 = arith.constant 10 : index
+ %cst = arith.constant 1.000000e+00 : f32
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_step1_sliceEi"}
+ %2 = fir.declare %1 {uniq_name = "_QFtest_step1_sliceEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %3 = fir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtest_step1_sliceEx"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xf32>>
+ %4 = fir.rebox %3 : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>>
+ %5 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtest_step1_sliceEy"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xf32>>
+ %6 = fir.rebox %5 : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>>
+ %7 = fir.convert %c1 : (index) -> i32
+ %8:2 = fir.do_loop %arg2 = %c1 to %c10 step %c1 iter_args(%arg3 = %7) -> (index, i32) {
+ fir.store %arg3 to %2 : !fir.ref<i32>
+ %9:3 = fir.box_dims %6, %c0 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %10 = arith.cmpi sgt, %9#1, %c0 : index
+ %11 = arith.select %10, %9#1, %c0 : index
+ %12 = fir.load %2 : !fir.ref<i32>
+ %13 = fir.convert %12 : (i32) -> i64
+ %14 = fir.undefined index
+ %15 = fir.slice %c1, %9#1, %c1, %13, %14, %14 : (index, index, index, i64, index, index) -> !fir.slice<2>
+ %16 = fir.rebox %6 [%15] : (!fir.box<!fir.array<?x?xf32>>, !fir.slice<2>) -> !fir.box<!fir.array<?xf32>>
+ %17:3 = fir.box_dims %4, %c0 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %18 = fir.slice %c1, %17#1, %c1, %13, %14, %14 : (index, index, index, i64, index, index) -> !fir.slice<2>
+ %19 = fir.rebox %4 [%18] : (!fir.box<!fir.array<?x?xf32>>, !fir.slice<2>) -> !fir.box<!fir.array<?xf32>>
+ fir.do_loop %arg4 = %c1 to %11 step %c1 unordered {
+ %23 = fir.array_coor %16 %arg4 : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ %24 = fir.load %23 : !fir.ref<f32>
+ %25 = arith.addf %24, %cst fastmath<fast> : f32
+ %26 = fir.array_coor %19 %arg4 : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ fir.store %25 to %26 : !fir.ref<f32>
+ }
+ %20 = arith.addi %arg2, %c1 overflow<nsw> : index
+ %21 = fir.load %2 : !fir.ref<i32>
+ %22 = arith.addi %21, %7 overflow<nsw> : i32
+ fir.result %20, %22 : index, i32
+ }
+ fir.store %8#1 to %2 : !fir.ref<i32>
+ return
+}
+// CHECK-LABEL: func.func @_QPtest_step1_slice(
+// CHECK: fir.do_loop
+// CHECK: fir.if
+// CHECK: fir.do_loop
+// CHECK: } else {
+// CHECK: fir.do_loop
+
+// Test that the loop is versioned with logical arrays:
+//subroutine test_logical_slice(x, y)
+// logical :: x(:,:), y(:,:)
+// do i=1,10
+// x(:,i) = y(:,i) .or. y(i,:)
+// end do
+//end subroutine
+func.func @_QPtest_logical_slice(%arg0: !fir.box<!fir.array<?x?x!fir.logical<4>>> {fir.bindc_name = "x"}, %arg1: !fir.box<!fir.array<?x?x!fir.logical<4>>> {fir.bindc_name = "y"}) {
+ %c10 = arith.constant 10 : index
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_logical_sliceEi"}
+ %2 = fir.declare %1 {uniq_name = "_QFtest_logical_sliceEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %3 = fir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtest_logical_sliceEx"} : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, !fir.dscope) -> !fir.box<!fir.array<?x?x!fir.logical<4>>>
+ %4 = fir.rebox %3 : (!fir.box<!fir.array<?x?x!fir.logical<4>>>) -> !fir.box<!fir.array<?x?x!fir.logical<4>>>
+ %5 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtest_logical_sliceEy"} : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, !fir.dscope) -> !fir.box<!fir.array<?x?x!fir.logical<4>>>
+ %6 = fir.rebox %5 : (!fir.box<!fir.array<?x?x!fir.logical<4>>>) -> !fir.box<!fir.array<?x?x!fir.logical<4>>>
+ %7 = fir.convert %c1 : (index) -> i32
+ %8:2 = fir.do_loop %arg2 = %c1 to %c10 step %c1 iter_args(%arg3 = %7) -> (index, i32) {
+ fir.store %arg3 to %2 : !fir.ref<i32>
+ %9:3 = fir.box_dims %6, %c0 : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+ %10 = arith.cmpi sgt, %9#1, %c0 : index
+ %11 = arith.select %10, %9#1, %c0 : index
+ %12 = fir.load %2 : !fir.ref<i32>
+ %13 = fir.convert %12 : (i32) -> i64
+ %14 = fir.undefined index
+ %15 = fir.slice %c1, %9#1, %c1, %13, %14, %14 : (index, index, index, i64, index, index) -> !fir.slice<2>
+ %16 = fir.rebox %6 [%15] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, !fir.slice<2>) -> !fir.box<!fir.array<?x!fir.logical<4>>>
+ %17:3 = fir.box_dims %6, %c1 : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+ %18 = fir.slice %13, %14, %14, %c1, %17#1, %c1 : (i64, index, index, index, index, index) -> !fir.slice<2>
+ %19 = fir.rebox %6 [%18] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, !fir.slice<2>) -> !fir.box<!fir.array<?x!fir.logical<4>>>
+ %20:3 = fir.box_dims %4, %c0 : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+ %21 = fir.slice %c1, %20#1, %c1, %13, %14, %14 : (index, index, index, i64, index, index) -> !fir.slice<2>
+ %22 = fir.rebox %4 [%21] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, !fir.slice<2>) -> !fir.box<!fir.array<?x!fir.logical<4>>>
+ fir.do_loop %arg4 = %c1 to %11 step %c1 unordered {
+ %26 = fir.array_coor %16 %arg4 : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+ %27 = fir.array_coor %19 %arg4 : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+ %28 = fir.load %26 : !fir.ref<!fir.logical<4>>
+ %29 = fir.load %27 : !fir.ref<!fir.logical<4>>
+ %30 = fir.convert %28 : (!fir.logical<4>) -> i1
+ %31 = fir.convert %29 : (!fir.logical<4>) -> i1
+ %32 = arith.ori %30, %31 : i1
+ %33 = fir.convert %32 : (i1) -> !fir.logical<4>
+ %34 = fir.array_coor %22 %arg4 : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+ fir.store %33 to %34 : !fir.ref<!fir.logical<4>>
+ }
+ %23 = arith.addi %arg2, %c1 overflow<nsw> : index
+ %24 = fir.load %2 : !fir.ref<i32>
+ %25 = arith.addi %24, %7 overflow<nsw> : i32
+ fir.result %23, %25 : index, i32
+ }
+ fir.store %8#1 to %2 : !fir.ref<i32>
+ return
+}
+// CHECK-LABEL: func.func @_QPtest_logical_slice(
+// CHECK: fir.do_loop
+// CHECK: fir.if
+// CHECK: fir.do_loop
+// CHECK: } else {
+// CHECK: fir.do_loop
+
+// Test that the loop is versioned when a most probably
+// contiguous slices have known shape:
+//subroutine test_known_shape_slice(x, y)
+// integer :: x(:,:), y(:,:)
+// do i=1,10
+// x(1:10,i) = y(1:10,i) + 1
+// end do
+//end subroutine
+func.func @_QPtest_known_shape_slice(%arg0: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "x"}, %arg1: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "y"}) {
+ %c10 = arith.constant 10 : index
+ %c1 = arith.constant 1 : index
+ %c1_i32 = arith.constant 1 : i32
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_known_shape_sliceEi"}
+ %2 = fir.declare %1 {uniq_name = "_QFtest_known_shape_sliceEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %3 = fir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtest_known_shape_sliceEx"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+ %4 = fir.rebox %3 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+ %5 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtest_known_shape_sliceEy"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
+ %6 = fir.rebox %5 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
+ %7 = fir.convert %c1 : (index) -> i32
+ %8:2 = fir.do_loop %arg2 = %c1 to %c10 step %c1 iter_args(%arg3 = %7) -> (index, i32) {
+ fir.store %arg3 to %2 : !fir.ref<i32>
+ %9 = fir.load %2 : !fir.ref<i32>
+ %10 = fir.convert %9 : (i32) -> i64
+ %11 = fir.undefined index
+ %12 = fir.slice %c1, %c10, %c1, %10, %11, %11 : (index, index, index, i64, index, index) -> !fir.slice<2>
+ %13 = fir.rebox %6 [%12] : (!fir.box<!fir.array<?x?xi32>>, !fir.slice<2>) -> !fir.box<!fir.array<10xi32>>
+ %14 = fir.rebox %4 [%12] : (!fir.box<!fir.array<?x?xi32>>, !fir.slice<2>) -> !fir.box<!fir.array<10xi32>>
+ fir.do_loop %arg4 = %c1 to %c10 step %c1 unordered {
+ %18 = fir.array_coor %13 %arg4 : (!fir.box<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+ %19 = fir.load %18 : !fir.ref<i32>
+ %20 = arith.addi %19, %c1_i32 : i32
+ %21 = fir.array_coor %14 %arg4 : (!fir.box<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+ fir.store %20 to %21 : !fir.ref<i32>
+ }
+ %15 = arith.addi %arg2, %c1 overflow<nsw> : index
+ %16 = fir.load %2 : !fir.ref<i32>
+ %17 = arith.addi %16, %7 overflow<nsw> : i32
+ fir.result %15, %17 : index, i32
+ }
+ fir.store %8#1 to %2 : !fir.ref<i32>
+ return
+}
+// CHECK-LABEL: func.func @_QPtest_known_shape_slice(
+// CHECK: fir.do_loop
+// CHECK: fir.if
+// CHECK: fir.do_loop
+// CHECK: } else {
+// CHECK: fir.do_loop
+
+// Test that the loop is not versioned for most probably
+// not-contiguous slices:
+//subroutine test_maybe_noncontig_slice(x, y)
+// real :: x(:,:), y(:,:)
+// do i=1,10
+// x(i,:) = y(i,:) + 1.0
+// end do
+//end subroutine
+func.func @_QPtest_maybe_noncontig_slice(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "y"}) {
+ %c10 = arith.constant 10 : index
+ %cst = arith.constant 1.000000e+00 : f32
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_maybe_noncontig_sliceEi"}
+ %2 = fir.declare %1 {uniq_name = "_QFtest_maybe_noncontig_sliceEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %3 = fir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtest_maybe_noncontig_sliceEx"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xf32>>
+ %4 = fir.rebox %3 : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>>
+ %5 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtest_maybe_noncontig_sliceEy"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xf32>>
+ %6 = fir.rebox %5 : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>>
+ %7 = fir.convert %c1 : (index) -> i32
+ %8:2 = fir.do_loop %arg2 = %c1 to %c10 step %c1 iter_args(%arg3 = %7) -> (index, i32) {
+ fir.store %arg3 to %2 : !fir.ref<i32>
+ %9 = fir.load %2 : !fir.ref<i32>
+ %10 = fir.convert %9 : (i32) -> i64
+ %11:3 = fir.box_dims %6, %c1 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %12 = arith.cmpi sgt, %11#1, %c0 : index
+ %13 = arith.select %12, %11#1, %c0 : index
+ %14 = fir.undefined index
+ %15 = fir.slice %10, %14, %14, %c1, %11#1, %c1 : (i64, index, index, index, index, index) -> !fir.slice<2>
+ %16 = fir.rebox %6 [%15] : (!fir.box<!fir.array<?x?xf32>>, !fir.slice<2>) -> !fir.box<!fir.array<?xf32>>
+ %17:3 = fir.box_dims %4, %c1 : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+ %18 = fir.slice %10, %14, %14, %c1, %17#1, %c1 : (i64, index, index, index, index, index) -> !fir.slice<2>
+ %19 = fir.rebox %4 [%18] : (!fir.box<!fir.array<?x?xf32>>, !fir.slice<2>) -> !fir.box<!fir.array<?xf32>>
+ fir.do_loop %arg4 = %c1 to %13 step %c1 unordered {
+ %23 = fir.array_coor %16 %arg4 : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ %24 = fir.load %23 : !fir.ref<f32>
+ %25 = arith.addf %24, %cst fastmath<fast> : f32
+ %26 = fir.array_coor %19 %arg4 : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+ fir.store %25 to %26 : !fir.ref<f32>
+ }
+ %20 = arith.addi %arg2, %c1 overflow<nsw> : index
+ %21 = fir.load %2 : !fir.ref<i32>
+ %22 = arith.addi %21, %7 overflow<nsw> : i32
+ fir.result %20, %22 : index, i32
+ }
+ fir.store %8#1 to %2 : !fir.ref<i32>
+ return
+}
+// CHECK-LABEL: func.func @_QPtest_maybe_noncontig_slice(
+// CHECK-NOT: fir.if
+
+// Regression test for facerec's GraphSimFct:
+//real function test_graphsimfct(a1, a2)
+// integer :: i
+// real, intent(in) :: a1(:,:,:)
+// real, intent(in) :: a2(:,:,:,:)
+// graphsimfct = 0.0
+// do i=1,10
+// test_graphsimfct = test_graphsimfct + SUM(a1(:,:,i) * a2(:,:,i,i))
+// end do
+//end function
+func.func @_QPtest_graphsimfct(%arg0: !fir.box<!fir.array<?x?x?xf32>> {fir.bindc_name = "a1"}, %arg1: !fir.box<!fir.array<?x?x?x?xf32>> {fir.bindc_name = "a2"}) -> f32 {
+ %c10 = arith.constant 10 : index
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %cst = arith.constant 0.000000e+00 : f32
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.declare %arg0 dummy_scope %0 {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QFtest_graphsimfctEa1"} : (!fir.box<!fir.array<?x?x?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?x?x?xf32>>
+ %2 = fir.rebox %1 : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+ %3 = fir.declare %arg1 dummy_scope %0 {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QFtest_graphsimfctEa2"} : (!fir.box<!fir.array<?x?x?x?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?x?x?x?xf32>>
+ %4 = fir.rebox %3 : (!fir.box<!fir.array<?x?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?x?xf32>>
+ %5 = fir.alloca f32 {bindc_name = "graphsimfct", uniq_name = "_QFtest_graphsimfctEgraphsimfct"}
+ %6 = fir.declare %5 {uniq_name = "_QFtest_graphsimfctEgraphsimfct"} : (!fir.ref<f32>) -> !fir.ref<f32>
+ %7 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_graphsimfctEi"}
+ %8 = fir.declare %7 {uniq_name = "_QFtest_graphsimfctEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %9 = fir.alloca f32 {bindc_name = "test_graphsimfct", uniq_name = "_QFtest_graphsimfctEtest_graphsimfct"}
+ %10 = fir.declare %9 {uniq_name = "_QFtest_graphsimfctEtest_graphsimfct"} : (!fir.ref<f32>) -> !fir.ref<f32>
+ fir.store %cst to %6 : !fir.ref<f32>
+ %11 = fir.convert %c1 : (index) -> i32
+ %12:2 = fir.do_loop %arg2 = %c1 to %c10 step %c1 iter_args(%arg3 = %11) -> (index, i32) {
+ fir.store %arg3 to %8 : !fir.ref<i32>
+ %14 = fir.load %10 : !fir.ref<f32>
+ %15:3 = fir.box_dims %2, %c0 : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+ %16:3 = fir.box_dims %2, %c1 : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+ %17 = arith.cmpi sgt, %15#1, %c0 : index
+ %18 = arith.select %17, %15#1, %c0 : index
+ %19 = arith.cmpi sgt, %16#1, %c0 : index
+ %20 = arith.select %19, %16#1, %c0 : index
+ %21 = fir.load %8 : !fir.ref<i32>
+ %22 = fir.convert %21 : (i32) -> i64
+ %23 = fir.undefined index
+ %24 = fir.slice %c1, %15#1, %c1, %c1, %16#1, %c1, %22, %23, %23 : (index, index, index, index, index, index, i64, index, index) -> !fir.slice<3>
+ %25 = fir.rebox %2 [%24] : (!fir.box<!fir.array<?x?x?xf32>>, !fir.slice<3>) -> !fir.box<!fir.array<?x?xf32>>
+ %26:3 = fir.box_dims %4, %c0 : (!fir.box<!fir.array<?x?x?x?xf32>>, index) -> (index, index, index)
+ %27:3 = fir.box_dims %4, %c1 : (!fir.box<!fir.array<?x?x?x?xf32>>, index) -> (index, index, index)
+ %28 = fir.slice %c1, %26#1, %c1, %c1, %27#1, %c1, %22, %23, %23, %22, %23, %23 : (index, index, index, index, index, index, i64, index, index, i64, index, index) -> !fir.slice<4>
+ %29 = fir.rebox %4 [%28] : (!fir.box<!fir.array<?x?x?x?xf32>>, !fir.slice<4>) -> !fir.box<!fir.array<?x?xf32>>
+ %30 = fir.do_loop %arg4 = %c1 to %20 step %c1 unordered iter_args(%arg5 = %cst) -> (f32) {
+ %35 = fir.do_loop %arg6 = %c1 to %18 step %c1 unordered iter_args(%arg7 = %arg5) -> (f32) {
+ %36 = fir.array_coor %25 %arg6, %arg4 : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
+ %37 = fir.array_coor %29 %arg6, %arg4 : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
+ %38 = fir.load %36 : !fir.ref<f32>
+ %39 = fir.load %37 : !fir.ref<f32>
+ %40 = arith.mulf %38, %39 fastmath<fast> : f32
+ %41 = arith.addf %arg7, %40 fastmath<fast> : f32
+ fir.result %41 : f32
+ }
+ fir.result %35 : f32
+ }
+ %31 = arith.addf %14, %30 fastmath<fast> : f32
+ fir.store %31 to %10 : !fir.ref<f32>
+ %32 = arith.addi %arg2, %c1 overflow<nsw> : index
+ %33 = fir.load %8 : !fir.ref<i32>
+ %34 = arith.addi %33, %11 overflow<nsw> : i32
+ fir.result %32, %34 : index, i32
+ }
+ fir.store %12#1 to %8 : !fir.ref<i32>
+ %13 = fir.load %10 : !fir.ref<f32>
+ return %13 : f32
+}
+// CHECK-LABEL: func.func @_QPtest_graphsimfct(
+// CHECK: fir.do_loop
+// CHECK: fir.do_loop
+// CHECK: fir.if
+// CHECK: fir.do_loop
+// CHECK: } else {
+// CHECK: fir.do_loop
+
} // End module
More information about the flang-commits
mailing list