[flang-commits] [flang] 1d5e7a4 - [flang] Support DOT_PRODUCT in late inlining.
Slava Zakharin via flang-commits
flang-commits at lists.llvm.org
Wed Aug 10 16:33:00 PDT 2022
Author: Slava Zakharin
Date: 2022-08-10T16:30:35-07:00
New Revision: 1d5e7a498f7bfb3fb15f1a6e7a78c01efa351c4b
URL: https://github.com/llvm/llvm-project/commit/1d5e7a498f7bfb3fb15f1a6e7a78c01efa351c4b
DIFF: https://github.com/llvm/llvm-project/commit/1d5e7a498f7bfb3fb15f1a6e7a78c01efa351c4b.diff
LOG: [flang] Support DOT_PRODUCT in late inlining.
This change inlines DOT_PRODUCT calls for real and integer types.
Differential Revision: https://reviews.llvm.org/D131538
Added:
Modified:
flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
flang/test/Transforms/simplifyintrinsics.fir
Removed:
################################################################################
diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
index ff8f4cff18ec6..4f5f7ad4d571b 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
@@ -35,6 +35,9 @@
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/RegionUtils.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "flang-simplify-intrinsics"
namespace {
@@ -56,6 +59,7 @@ class SimplifyIntrinsicsPass
FunctionTypeGeneratorTy typeGenerator,
FunctionBodyGeneratorTy bodyGenerator);
void runOnOperation() override;
+ void getDependentDialects(mlir::DialectRegistry ®istry) const override;
};
} // namespace
@@ -142,6 +146,102 @@ static void genFortranASumBody(fir::FirOpBuilder &builder,
builder.create<mlir::func::ReturnOp>(loc, resultVal);
}
+/// Generate function type for the simplified version of FortranADotProduct
+/// operating on the given \p elementType.
+static mlir::FunctionType genFortranADotType(fir::FirOpBuilder &builder,
+ const mlir::Type &elementType) {
+ mlir::Type boxType = fir::BoxType::get(builder.getNoneType());
+ return mlir::FunctionType::get(builder.getContext(), {boxType, boxType},
+ {elementType});
+}
+
+/// Generate function body of the simplified version of FortranADotProduct
+/// with signature provided by \p funcOp. The caller is responsible
+/// for saving/restoring the original insertion point of \p builder.
+/// \p funcOp is expected to be empty on entry to this function.
+static void genFortranADotBody(fir::FirOpBuilder &builder,
+ mlir::func::FuncOp &funcOp) {
+ // function FortranADotProduct<T>_simplified(arr1, arr2)
+ // T, dimension(:) :: arr1, arr2
+ // T product = 0
+ // integer iter
+ // do iter = 0, extent(arr1)
+ // product = product + arr1[iter] * arr2[iter]
+ // end do
+ // FortranADotProduct<T>_simplified = product
+ // end function FortranADotProduct<T>_simplified
+ auto loc = mlir::UnknownLoc::get(builder.getContext());
+ mlir::Type elementType = funcOp.getResultTypes()[0];
+ builder.setInsertionPointToEnd(funcOp.addEntryBlock());
+
+ mlir::IndexType idxTy = builder.getIndexType();
+
+ mlir::Value zero = elementType.isa<mlir::FloatType>()
+ ? builder.createRealConstant(loc, elementType, 0.0)
+ : builder.createIntegerConstant(loc, elementType, 0);
+
+ mlir::Block::BlockArgListType args = funcOp.front().getArguments();
+ mlir::Value arg1 = args[0];
+ mlir::Value arg2 = args[1];
+
+ mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);
+
+ fir::SequenceType::Shape flatShape = {fir::SequenceType::getUnknownExtent()};
+ mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
+ mlir::Type boxArrTy = fir::BoxType::get(arrTy);
+ mlir::Value array1 = builder.create<fir::ConvertOp>(loc, boxArrTy, arg1);
+ mlir::Value array2 = builder.create<fir::ConvertOp>(loc, boxArrTy, arg2);
+ // This version takes the loop trip count from the first argument.
+ // If the first argument's box has unknown (at compilation time)
+ // extent, then it may be better to take the extent from the second
+ // argument - so that after inlining the loop may be better optimized, e.g.
+ // fully unrolled. This requires generating two versions of the simplified
+ // function and some analysis at the call site to choose which version
+ // is more profitable to call.
+ // Note that we can assume that both arguments have the same extent.
+ auto dims =
+ builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array1, zeroIdx);
+ mlir::Value len = dims.getResult(1);
+ mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
+ mlir::Value step = one;
+
+ // We use C indexing here, so len-1 as loopcount
+ mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one);
+ auto loop = builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step,
+ /*unordered=*/false,
+ /*finalCountValue=*/false, zero);
+ mlir::Value sumVal = loop.getRegionIterArgs()[0];
+
+ // Begin loop code
+ mlir::OpBuilder::InsertPoint loopEndPt = builder.saveInsertionPoint();
+ builder.setInsertionPointToStart(loop.getBody());
+
+ mlir::Type eleRefTy = builder.getRefType(elementType);
+ mlir::Value index = loop.getInductionVar();
+ mlir::Value addr1 =
+ builder.create<fir::CoordinateOp>(loc, eleRefTy, array1, index);
+ mlir::Value elem1 = builder.create<fir::LoadOp>(loc, addr1);
+ mlir::Value addr2 =
+ builder.create<fir::CoordinateOp>(loc, eleRefTy, array2, index);
+ mlir::Value elem2 = builder.create<fir::LoadOp>(loc, addr2);
+
+ if (elementType.isa<mlir::FloatType>())
+ sumVal = builder.create<mlir::arith::AddFOp>(
+ loc, builder.create<mlir::arith::MulFOp>(loc, elem1, elem2), sumVal);
+ else if (elementType.isa<mlir::IntegerType>())
+ sumVal = builder.create<mlir::arith::AddIOp>(
+ loc, builder.create<mlir::arith::MulIOp>(loc, elem1, elem2), sumVal);
+ else
+ llvm_unreachable("unsupported type");
+
+ builder.create<fir::ResultOp>(loc, sumVal);
+ // End of loop.
+ builder.restoreInsertionPoint(loopEndPt);
+
+ mlir::Value resultVal = loop.getResult(0);
+ builder.create<mlir::func::ReturnOp>(loc, resultVal);
+}
+
mlir::func::FuncOp SimplifyIntrinsicsPass::getOrCreateFunction(
fir::FirOpBuilder &builder, const mlir::StringRef &baseName,
FunctionTypeGeneratorTy typeGenerator,
@@ -218,6 +318,7 @@ static unsigned getDimCount(mlir::Value val) {
}
void SimplifyIntrinsicsPass::runOnOperation() {
+ LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n");
mlir::ModuleOp module = getOperation();
fir::KindMapping kindMap = fir::getKindMapping(module);
module.walk([&](mlir::Operation *op) {
@@ -263,12 +364,48 @@ void SimplifyIntrinsicsPass::runOnOperation() {
call->dropAllReferences();
call->erase();
}
+
+ return;
+ }
+ if (funcName.startswith("_FortranADotProduct")) {
+ LLVM_DEBUG(llvm::dbgs() << "Handling " << funcName << "\n");
+ LLVM_DEBUG(llvm::dbgs() << "Call operation:\n"; op->dump();
+ llvm::dbgs() << "\n");
+ mlir::Operation::operand_range args = call.getArgs();
+ const mlir::Value &v1 = args[0];
+ const mlir::Value &v2 = args[1];
+ mlir::Location loc = call.getLoc();
+ fir::FirOpBuilder builder(op, kindMap);
+ mlir::Type type = call.getResult(0).getType();
+ if (!type.isa<mlir::FloatType>() && !type.isa<mlir::IntegerType>())
+ return;
+
+ auto typeGenerator = [&type](fir::FirOpBuilder &builder) {
+ return genFortranADotType(builder, type);
+ };
+ mlir::func::FuncOp newFunc = getOrCreateFunction(
+ builder, funcName, typeGenerator, genFortranADotBody);
+ auto newCall = builder.create<fir::CallOp>(loc, newFunc,
+ mlir::ValueRange{v1, v2});
+ call->replaceAllUsesWith(newCall.getResults());
+ call->dropAllReferences();
+ call->erase();
+
+ LLVM_DEBUG(llvm::dbgs() << "Replaced with:\n"; newCall.dump();
+ llvm::dbgs() << "\n");
+ return;
}
}
}
});
+ LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
}
+void SimplifyIntrinsicsPass::getDependentDialects(
+ mlir::DialectRegistry ®istry) const {
+ // LLVM::LinkageAttr creation requires that LLVM dialect is loaded.
+ registry.insert<mlir::LLVM::LLVMDialect>();
+}
std::unique_ptr<mlir::Pass> fir::createSimplifyIntrinsicsPass() {
return std::make_unique<SimplifyIntrinsicsPass>();
}
diff --git a/flang/test/Transforms/simplifyintrinsics.fir b/flang/test/Transforms/simplifyintrinsics.fir
index 69f2cb8ae658c..78df5d57f91d9 100644
--- a/flang/test/Transforms/simplifyintrinsics.fir
+++ b/flang/test/Transforms/simplifyintrinsics.fir
@@ -317,3 +317,273 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.targ
// CHECK: }
// CHECK: return %{{.*}} : i32
// CHECK: }
+
+// -----
+
+func.func @dot_f32(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "b"}) -> f32 {
+ %0 = fir.alloca f32 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+ %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+ %c3_i32 = arith.constant 3 : i32
+ %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xf32>>) -> !fir.box<none>
+ %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xf32>>) -> !fir.box<none>
+ %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+ %5 = fir.call @_FortranADotProductReal4(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f32
+ fir.store %5 to %0 : !fir.ref<f32>
+ %6 = fir.load %0 : !fir.ref<f32>
+ return %6 : f32
+}
+func.func private @_FortranADotProductReal4(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f32 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+ %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+ fir.has_value %0 : !fir.char<1,10>
+}
+
+// CHECK-LABEL: func.func @dot_f32(
+// CHECK-SAME: %[[A:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "a"},
+// CHECK-SAME: %[[B:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "b"}) -> f32 {
+// CHECK: %[[RESLOC:.*]] = fir.alloca f32 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+// CHECK: %[[ACAST:.*]] = fir.convert %[[A]] : (!fir.box<!fir.array<?xf32>>) -> !fir.box<none>
+// CHECK: %[[BCAST:.*]] = fir.convert %[[B]] : (!fir.box<!fir.array<?xf32>>) -> !fir.box<none>
+// CHECK: %[[RES:.*]] = fir.call @_FortranADotProductReal4_simplified(%[[ACAST]], %[[BCAST]]) : (!fir.box<none>, !fir.box<none>) -> f32
+// CHECK: fir.store %[[RES]] to %[[RESLOC]] : !fir.ref<f32>
+// CHECK: %[[RET:.*]] = fir.load %[[RESLOC]] : !fir.ref<f32>
+// CHECK: return %[[RET]] : f32
+// CHECK: }
+
+// CHECK-LABEL: func.func private @_FortranADotProductReal4_simplified(
+// CHECK-SAME: %[[A:.*]]: !fir.box<none>,
+// CHECK-SAME: %[[B:.*]]: !fir.box<none>) -> f32 attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
+// CHECK: %[[FZERO:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK: %[[IZERO:.*]] = arith.constant 0 : index
+// CHECK: %[[ACAST:.*]] = fir.convert %[[A]] : (!fir.box<none>) -> !fir.box<!fir.array<?xf32>>
+// CHECK: %[[BCAST:.*]] = fir.convert %[[B]] : (!fir.box<none>) -> !fir.box<!fir.array<?xf32>>
+// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[ACAST]], %[[IZERO]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK: %[[IONE:.*]] = arith.constant 1 : index
+// CHECK: %[[LEN:.*]] = arith.subi %[[DIMS]]#1, %[[IONE]] : index
+// CHECK: %[[RES:.*]] = fir.do_loop %[[IDX:.*]] = %[[IZERO]] to %[[LEN]] step %[[IONE]] iter_args(%[[SUM:.*]] = %[[FZERO]]) -> (f32) {
+// CHECK: %[[ALOC:.*]] = fir.coordinate_of %[[ACAST]], %[[IDX]] : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK: %[[AVAL:.*]] = fir.load %[[ALOC]] : !fir.ref<f32>
+// CHECK: %[[BLOC:.*]] = fir.coordinate_of %[[BCAST]], %[[IDX]] : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK: %[[BVAL:.*]] = fir.load %[[BLOC]] : !fir.ref<f32>
+// CHECK: %[[MUL:.*]] = arith.mulf %[[AVAL]], %[[BVAL]] : f32
+// CHECK: %[[NEWSUM:.*]] = arith.addf %[[MUL]], %[[SUM]] : f32
+// CHECK: fir.result %[[NEWSUM]] : f32
+// CHECK: }
+// CHECK: return %[[RES]] : f32
+// CHECK: }
+
+// -----
+
+func.func @dot_f64(%arg0: !fir.box<!fir.array<?xf64>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xf64>> {fir.bindc_name = "b"}) -> f64 {
+ %0 = fir.alloca f64 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+ %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+ %c3_i32 = arith.constant 3 : i32
+ %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xf64>>) -> !fir.box<none>
+ %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xf64>>) -> !fir.box<none>
+ %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+ %5 = fir.call @_FortranADotProductReal8(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f64
+ fir.store %5 to %0 : !fir.ref<f64>
+ %6 = fir.load %0 : !fir.ref<f64>
+ return %6 : f64
+}
+func.func private @_FortranADotProductReal8(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f64 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+ %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+ fir.has_value %0 : !fir.char<1,10>
+}
+
+// The same code handles all FP types, so just check that there is no
+// call to runtime:
+// CHECK-LABEL: func.func @dot_f64(
+// CHECK-SAME: %[[A:.*]]: !fir.box<!fir.array<?xf64>> {fir.bindc_name = "a"},
+// CHECK-SAME: %[[B:.*]]: !fir.box<!fir.array<?xf64>> {fir.bindc_name = "b"}) -> f64 {
+// CHECK-NOT: call{{.*}}_FortranADotProductReal8(
+
+// -----
+
+func.func @dot_f80(%arg0: !fir.box<!fir.array<?xf80>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xf80>> {fir.bindc_name = "b"}) -> f80 {
+ %0 = fir.alloca f80 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+ %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+ %c3_i32 = arith.constant 3 : i32
+ %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xf80>>) -> !fir.box<none>
+ %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xf80>>) -> !fir.box<none>
+ %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+ %5 = fir.call @_FortranADotProductReal10(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f80
+ fir.store %5 to %0 : !fir.ref<f80>
+ %6 = fir.load %0 : !fir.ref<f80>
+ return %6 : f80
+}
+func.func private @_FortranADotProductReal10(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f80 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+ %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+ fir.has_value %0 : !fir.char<1,10>
+}
+
+// The same code handles all FP types, so just check that there is no
+// call to runtime:
+// CHECK-LABEL: func.func @dot_f80(
+// CHECK-SAME: %[[A:.*]]: !fir.box<!fir.array<?xf80>> {fir.bindc_name = "a"},
+// CHECK-SAME: %[[B:.*]]: !fir.box<!fir.array<?xf80>> {fir.bindc_name = "b"}) -> f80 {
+// CHECK-NOT: call{{.*}}_FortranADotProductReal10(
+
+// -----
+
+func.func @dot_f128(%arg0: !fir.box<!fir.array<?xf128>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xf128>> {fir.bindc_name = "b"}) -> f128 {
+ %0 = fir.alloca f128 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+ %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+ %c3_i32 = arith.constant 3 : i32
+ %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xf128>>) -> !fir.box<none>
+ %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xf128>>) -> !fir.box<none>
+ %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+ %5 = fir.call @_FortranADotProductReal16(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f128
+ fir.store %5 to %0 : !fir.ref<f128>
+ %6 = fir.load %0 : !fir.ref<f128>
+ return %6 : f128
+}
+func.func private @_FortranADotProductReal16(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f128 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+ %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+ fir.has_value %0 : !fir.char<1,10>
+}
+
+// The same code handles all FP types, so just check that there is no
+// call to runtime:
+// CHECK-LABEL: func.func @dot_f128(
+// CHECK-SAME: %[[A:.*]]: !fir.box<!fir.array<?xf128>> {fir.bindc_name = "a"},
+// CHECK-SAME: %[[B:.*]]: !fir.box<!fir.array<?xf128>> {fir.bindc_name = "b"}) -> f128 {
+// CHECK-NOT: call{{.*}}_FortranADotProductReal16(
+
+// -----
+
+func.func @dot_i32(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) -> i32 {
+ %0 = fir.alloca i32 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+ %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+ %c3_i32 = arith.constant 3 : i32
+ %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+ %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+ %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+ %5 = fir.call @_FortranADotProductInteger4(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+ fir.store %5 to %0 : !fir.ref<i32>
+ %6 = fir.load %0 : !fir.ref<i32>
+ return %6 : i32
+}
+func.func private @_FortranADotProductInteger4(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i32 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+ %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+ fir.has_value %0 : !fir.char<1,10>
+}
+
+// CHECK-LABEL: func.func @dot_i32(
+// CHECK-SAME: %[[A:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"},
+// CHECK-SAME: %[[B:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) -> i32 {
+// CHECK: %[[RESLOC:.*]] = fir.alloca i32 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+// CHECK: %[[ACAST:.*]] = fir.convert %[[A]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+// CHECK: %[[BCAST:.*]] = fir.convert %[[B]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+// CHECK: %[[RES:.*]] = fir.call @_FortranADotProductInteger4_simplified(%[[ACAST]], %[[BCAST]]) : (!fir.box<none>, !fir.box<none>) -> i32
+// CHECK: fir.store %[[RES]] to %[[RESLOC]] : !fir.ref<i32>
+// CHECK: %[[RET:.*]] = fir.load %[[RESLOC]] : !fir.ref<i32>
+// CHECK: return %[[RET]] : i32
+// CHECK: }
+
+// CHECK-LABEL: func.func private @_FortranADotProductInteger4_simplified(
+// CHECK-SAME: %[[A:.*]]: !fir.box<none>,
+// CHECK-SAME: %[[B:.*]]: !fir.box<none>) -> i32 attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
+// CHECK: %[[I32ZERO:.*]] = arith.constant 0 : i32
+// CHECK: %[[IZERO:.*]] = arith.constant 0 : index
+// CHECK: %[[ACAST:.*]] = fir.convert %[[A]] : (!fir.box<none>) -> !fir.box<!fir.array<?xi32>>
+// CHECK: %[[BCAST:.*]] = fir.convert %[[B]] : (!fir.box<none>) -> !fir.box<!fir.array<?xi32>>
+// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[ACAST]], %[[IZERO]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK: %[[IONE:.*]] = arith.constant 1 : index
+// CHECK: %[[LEN:.*]] = arith.subi %[[DIMS]]#1, %[[IONE]] : index
+// CHECK: %[[RES:.*]] = fir.do_loop %[[IDX:.*]] = %[[IZERO]] to %[[LEN]] step %[[IONE]] iter_args(%[[SUM:.*]] = %[[I32ZERO]]) -> (i32) {
+// CHECK: %[[ALOC:.*]] = fir.coordinate_of %[[ACAST]], %[[IDX]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[AVAL:.*]] = fir.load %[[ALOC]] : !fir.ref<i32>
+// CHECK: %[[BLOC:.*]] = fir.coordinate_of %[[BCAST]], %[[IDX]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[BVAL:.*]] = fir.load %[[BLOC]] : !fir.ref<i32>
+// CHECK: %[[MUL:.*]] = arith.muli %[[AVAL]], %[[BVAL]] : i32
+// CHECK: %[[NEWSUM:.*]] = arith.addi %[[MUL]], %[[SUM]] : i32
+// CHECK: fir.result %[[NEWSUM]] : i32
+// CHECK: }
+// CHECK: return %[[RES]] : i32
+// CHECK: }
+
+// -----
+
+func.func @dot_i8(%arg0: !fir.box<!fir.array<?xi8>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi8>> {fir.bindc_name = "b"}) -> i8 {
+ %0 = fir.alloca i8 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+ %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+ %c3_i32 = arith.constant 3 : i32
+ %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xi8>>) -> !fir.box<none>
+ %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xi8>>) -> !fir.box<none>
+ %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+ %5 = fir.call @_FortranADotProductInteger1(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i8
+ fir.store %5 to %0 : !fir.ref<i8>
+ %6 = fir.load %0 : !fir.ref<i8>
+ return %6 : i8
+}
+func.func private @_FortranADotProductInteger1(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i8 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+ %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+ fir.has_value %0 : !fir.char<1,10>
+}
+
+// The same code handles all integer types, so just check that there is no
+// call to runtime:
+// CHECK-LABEL: func.func @dot_i8(
+// CHECK-SAME: %[[A:.*]]: !fir.box<!fir.array<?xi8>> {fir.bindc_name = "a"},
+// CHECK-SAME: %[[B:.*]]: !fir.box<!fir.array<?xi8>> {fir.bindc_name = "b"}) -> i8 {
+// CHECK-NOT: call{{.*}}_FortranADotProductInteger1(
+
+// -----
+
+func.func @dot_i16(%arg0: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "b"}) -> i16 {
+ %0 = fir.alloca i16 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+ %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+ %c3_i32 = arith.constant 3 : i32
+ %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xi16>>) -> !fir.box<none>
+ %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xi16>>) -> !fir.box<none>
+ %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+ %5 = fir.call @_FortranADotProductInteger2(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i16
+ fir.store %5 to %0 : !fir.ref<i16>
+ %6 = fir.load %0 : !fir.ref<i16>
+ return %6 : i16
+}
+func.func private @_FortranADotProductInteger2(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i16 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+ %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+ fir.has_value %0 : !fir.char<1,10>
+}
+
+// The same code handles all integer types, so just check that there is no
+// call to runtime:
+// CHECK-LABEL: func.func @dot_i16(
+// CHECK-SAME: %[[A:.*]]: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "a"},
+// CHECK-SAME: %[[B:.*]]: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "b"}) -> i16 {
+// CHECK-NOT: call{{.*}}_FortranADotProductInteger2(
+
+// -----
+
+func.func @dot_i64(%arg0: !fir.box<!fir.array<?xi64>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi64>> {fir.bindc_name = "b"}) -> i64 {
+ %0 = fir.alloca i64 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+ %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+ %c3_i32 = arith.constant 3 : i32
+ %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xi64>>) -> !fir.box<none>
+ %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xi64>>) -> !fir.box<none>
+ %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+ %5 = fir.call @_FortranADotProductInteger8(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i64
+ fir.store %5 to %0 : !fir.ref<i64>
+ %6 = fir.load %0 : !fir.ref<i64>
+ return %6 : i64
+}
+func.func private @_FortranADotProductInteger8(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i64 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+ %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+ fir.has_value %0 : !fir.char<1,10>
+}
+
+// The same code handles all integer types, so just check that there is no
+// call to runtime:
+// CHECK-LABEL: func.func @dot_i64(
+// CHECK-SAME: %[[A:.*]]: !fir.box<!fir.array<?xi64>> {fir.bindc_name = "a"},
+// CHECK-SAME: %[[B:.*]]: !fir.box<!fir.array<?xi64>> {fir.bindc_name = "b"}) -> i64 {
+// CHECK-NOT: call{{.*}}_FortranADotProductInteger8(
More information about the flang-commits
mailing list