[flang-commits] [flang] 1d5e7a4 - [flang] Support DOT_PRODUCT in late inlining.

Slava Zakharin via flang-commits flang-commits at lists.llvm.org
Wed Aug 10 16:33:00 PDT 2022


Author: Slava Zakharin
Date: 2022-08-10T16:30:35-07:00
New Revision: 1d5e7a498f7bfb3fb15f1a6e7a78c01efa351c4b

URL: https://github.com/llvm/llvm-project/commit/1d5e7a498f7bfb3fb15f1a6e7a78c01efa351c4b
DIFF: https://github.com/llvm/llvm-project/commit/1d5e7a498f7bfb3fb15f1a6e7a78c01efa351c4b.diff

LOG: [flang] Support DOT_PRODUCT in late inlining.

This change inlines DOT_PRODUCT calls for real and integer types.

Differential Revision: https://reviews.llvm.org/D131538

Added: 
    

Modified: 
    flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
    flang/test/Transforms/simplifyintrinsics.fir

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
index ff8f4cff18ec6..4f5f7ad4d571b 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
@@ -35,6 +35,9 @@
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "mlir/Transforms/RegionUtils.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "flang-simplify-intrinsics"
 
 namespace {
 
@@ -56,6 +59,7 @@ class SimplifyIntrinsicsPass
                                          FunctionTypeGeneratorTy typeGenerator,
                                          FunctionBodyGeneratorTy bodyGenerator);
   void runOnOperation() override;
+  void getDependentDialects(mlir::DialectRegistry &registry) const override;
 };
 
 } // namespace
@@ -142,6 +146,102 @@ static void genFortranASumBody(fir::FirOpBuilder &builder,
   builder.create<mlir::func::ReturnOp>(loc, resultVal);
 }
 
+/// Generate function type for the simplified version of FortranADotProduct
+/// operating on the given \p elementType.
+static mlir::FunctionType genFortranADotType(fir::FirOpBuilder &builder,
+                                             const mlir::Type &elementType) {
+  mlir::Type boxType = fir::BoxType::get(builder.getNoneType());
+  return mlir::FunctionType::get(builder.getContext(), {boxType, boxType},
+                                 {elementType});
+}
+
+/// Generate function body of the simplified version of FortranADotProduct
+/// with signature provided by \p funcOp. The caller is responsible
+/// for saving/restoring the original insertion point of \p builder.
+/// \p funcOp is expected to be empty on entry to this function.
+static void genFortranADotBody(fir::FirOpBuilder &builder,
+                               mlir::func::FuncOp &funcOp) {
+  // function FortranADotProduct<T>_simplified(arr1, arr2)
+  //   T, dimension(:) :: arr1, arr2
+  //   T product = 0
+  //   integer iter
+  //   do iter = 0, extent(arr1)
+  //     product = product + arr1[iter] * arr2[iter]
+  //   end do
+  //   FortranADotProduct<T>_simplified = product
+  // end function FortranADotProduct<T>_simplified
+  auto loc = mlir::UnknownLoc::get(builder.getContext());
+  mlir::Type elementType = funcOp.getResultTypes()[0];
+  builder.setInsertionPointToEnd(funcOp.addEntryBlock());
+
+  mlir::IndexType idxTy = builder.getIndexType();
+
+  mlir::Value zero = elementType.isa<mlir::FloatType>()
+                         ? builder.createRealConstant(loc, elementType, 0.0)
+                         : builder.createIntegerConstant(loc, elementType, 0);
+
+  mlir::Block::BlockArgListType args = funcOp.front().getArguments();
+  mlir::Value arg1 = args[0];
+  mlir::Value arg2 = args[1];
+
+  mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);
+
+  fir::SequenceType::Shape flatShape = {fir::SequenceType::getUnknownExtent()};
+  mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
+  mlir::Type boxArrTy = fir::BoxType::get(arrTy);
+  mlir::Value array1 = builder.create<fir::ConvertOp>(loc, boxArrTy, arg1);
+  mlir::Value array2 = builder.create<fir::ConvertOp>(loc, boxArrTy, arg2);
+  // This version takes the loop trip count from the first argument.
+  // If the first argument's box has unknown (at compilation time)
+  // extent, then it may be better to take the extent from the second
+  // argument - so that after inlining the loop may be better optimized, e.g.
+  // fully unrolled. This requires generating two versions of the simplified
+  // function and some analysis at the call site to choose which version
+  // is more profitable to call.
+  // Note that we can assume that both arguments have the same extent.
+  auto dims =
+      builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array1, zeroIdx);
+  mlir::Value len = dims.getResult(1);
+  mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
+  mlir::Value step = one;
+
+  // We use C indexing here, so len-1 as loopcount
+  mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one);
+  auto loop = builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step,
+                                            /*unordered=*/false,
+                                            /*finalCountValue=*/false, zero);
+  mlir::Value sumVal = loop.getRegionIterArgs()[0];
+
+  // Begin loop code
+  mlir::OpBuilder::InsertPoint loopEndPt = builder.saveInsertionPoint();
+  builder.setInsertionPointToStart(loop.getBody());
+
+  mlir::Type eleRefTy = builder.getRefType(elementType);
+  mlir::Value index = loop.getInductionVar();
+  mlir::Value addr1 =
+      builder.create<fir::CoordinateOp>(loc, eleRefTy, array1, index);
+  mlir::Value elem1 = builder.create<fir::LoadOp>(loc, addr1);
+  mlir::Value addr2 =
+      builder.create<fir::CoordinateOp>(loc, eleRefTy, array2, index);
+  mlir::Value elem2 = builder.create<fir::LoadOp>(loc, addr2);
+
+  if (elementType.isa<mlir::FloatType>())
+    sumVal = builder.create<mlir::arith::AddFOp>(
+        loc, builder.create<mlir::arith::MulFOp>(loc, elem1, elem2), sumVal);
+  else if (elementType.isa<mlir::IntegerType>())
+    sumVal = builder.create<mlir::arith::AddIOp>(
+        loc, builder.create<mlir::arith::MulIOp>(loc, elem1, elem2), sumVal);
+  else
+    llvm_unreachable("unsupported type");
+
+  builder.create<fir::ResultOp>(loc, sumVal);
+  // End of loop.
+  builder.restoreInsertionPoint(loopEndPt);
+
+  mlir::Value resultVal = loop.getResult(0);
+  builder.create<mlir::func::ReturnOp>(loc, resultVal);
+}
+
 mlir::func::FuncOp SimplifyIntrinsicsPass::getOrCreateFunction(
     fir::FirOpBuilder &builder, const mlir::StringRef &baseName,
     FunctionTypeGeneratorTy typeGenerator,
@@ -218,6 +318,7 @@ static unsigned getDimCount(mlir::Value val) {
 }
 
 void SimplifyIntrinsicsPass::runOnOperation() {
+  LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n");
   mlir::ModuleOp module = getOperation();
   fir::KindMapping kindMap = fir::getKindMapping(module);
   module.walk([&](mlir::Operation *op) {
@@ -263,12 +364,48 @@ void SimplifyIntrinsicsPass::runOnOperation() {
             call->dropAllReferences();
             call->erase();
           }
+
+          return;
+        }
+        if (funcName.startswith("_FortranADotProduct")) {
+          LLVM_DEBUG(llvm::dbgs() << "Handling " << funcName << "\n");
+          LLVM_DEBUG(llvm::dbgs() << "Call operation:\n"; op->dump();
+                     llvm::dbgs() << "\n");
+          mlir::Operation::operand_range args = call.getArgs();
+          const mlir::Value &v1 = args[0];
+          const mlir::Value &v2 = args[1];
+          mlir::Location loc = call.getLoc();
+          fir::FirOpBuilder builder(op, kindMap);
+          mlir::Type type = call.getResult(0).getType();
+          if (!type.isa<mlir::FloatType>() && !type.isa<mlir::IntegerType>())
+            return;
+
+          auto typeGenerator = [&type](fir::FirOpBuilder &builder) {
+            return genFortranADotType(builder, type);
+          };
+          mlir::func::FuncOp newFunc = getOrCreateFunction(
+              builder, funcName, typeGenerator, genFortranADotBody);
+          auto newCall = builder.create<fir::CallOp>(loc, newFunc,
+                                                     mlir::ValueRange{v1, v2});
+          call->replaceAllUsesWith(newCall.getResults());
+          call->dropAllReferences();
+          call->erase();
+
+          LLVM_DEBUG(llvm::dbgs() << "Replaced with:\n"; newCall.dump();
+                     llvm::dbgs() << "\n");
+          return;
         }
       }
     }
   });
+  LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
 }
 
+void SimplifyIntrinsicsPass::getDependentDialects(
+    mlir::DialectRegistry &registry) const {
+  // LLVM::LinkageAttr creation requires that LLVM dialect is loaded.
+  registry.insert<mlir::LLVM::LLVMDialect>();
+}
 std::unique_ptr<mlir::Pass> fir::createSimplifyIntrinsicsPass() {
   return std::make_unique<SimplifyIntrinsicsPass>();
 }

diff  --git a/flang/test/Transforms/simplifyintrinsics.fir b/flang/test/Transforms/simplifyintrinsics.fir
index 69f2cb8ae658c..78df5d57f91d9 100644
--- a/flang/test/Transforms/simplifyintrinsics.fir
+++ b/flang/test/Transforms/simplifyintrinsics.fir
@@ -317,3 +317,273 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.targ
 // CHECK:           }
 // CHECK:           return %{{.*}} : i32
 // CHECK:         }
+
+// -----
+
+func.func @dot_f32(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "b"}) -> f32 {
+  %0 = fir.alloca f32 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+  %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+  %c3_i32 = arith.constant 3 : i32
+  %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xf32>>) -> !fir.box<none>
+  %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xf32>>) -> !fir.box<none>
+  %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+  %5 = fir.call @_FortranADotProductReal4(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f32
+  fir.store %5 to %0 : !fir.ref<f32>
+  %6 = fir.load %0 : !fir.ref<f32>
+  return %6 : f32
+}
+func.func private @_FortranADotProductReal4(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f32 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+  %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+  fir.has_value %0 : !fir.char<1,10>
+}
+
+// CHECK-LABEL:   func.func @dot_f32(
+// CHECK-SAME:                      %[[A:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "a"},
+// CHECK-SAME:                      %[[B:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "b"}) -> f32 {
+// CHECK:           %[[RESLOC:.*]] = fir.alloca f32 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+// CHECK:           %[[ACAST:.*]] = fir.convert %[[A]] : (!fir.box<!fir.array<?xf32>>) -> !fir.box<none>
+// CHECK:           %[[BCAST:.*]] = fir.convert %[[B]] : (!fir.box<!fir.array<?xf32>>) -> !fir.box<none>
+// CHECK:           %[[RES:.*]] = fir.call @_FortranADotProductReal4_simplified(%[[ACAST]], %[[BCAST]]) : (!fir.box<none>, !fir.box<none>) -> f32
+// CHECK:           fir.store %[[RES]] to %[[RESLOC]] : !fir.ref<f32>
+// CHECK:           %[[RET:.*]] = fir.load %[[RESLOC]] : !fir.ref<f32>
+// CHECK:           return %[[RET]] : f32
+// CHECK:         }
+
+// CHECK-LABEL:   func.func private @_FortranADotProductReal4_simplified(
+// CHECK-SAME:                                                           %[[A:.*]]: !fir.box<none>,
+// CHECK-SAME:                                                           %[[B:.*]]: !fir.box<none>) -> f32 attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
+// CHECK:           %[[FZERO:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[IZERO:.*]] = arith.constant 0 : index
+// CHECK:           %[[ACAST:.*]] = fir.convert %[[A]] : (!fir.box<none>) -> !fir.box<!fir.array<?xf32>>
+// CHECK:           %[[BCAST:.*]] = fir.convert %[[B]] : (!fir.box<none>) -> !fir.box<!fir.array<?xf32>>
+// CHECK:           %[[DIMS:.*]]:3 = fir.box_dims %[[ACAST]], %[[IZERO]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[IONE:.*]] = arith.constant 1 : index
+// CHECK:           %[[LEN:.*]] = arith.subi %[[DIMS]]#1, %[[IONE]] : index
+// CHECK:           %[[RES:.*]] = fir.do_loop %[[IDX:.*]] = %[[IZERO]] to %[[LEN]] step %[[IONE]] iter_args(%[[SUM:.*]] = %[[FZERO]]) -> (f32) {
+// CHECK:             %[[ALOC:.*]] = fir.coordinate_of %[[ACAST]], %[[IDX]] : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK:             %[[AVAL:.*]] = fir.load %[[ALOC]] : !fir.ref<f32>
+// CHECK:             %[[BLOC:.*]] = fir.coordinate_of %[[BCAST]], %[[IDX]] : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK:             %[[BVAL:.*]] = fir.load %[[BLOC]] : !fir.ref<f32>
+// CHECK:             %[[MUL:.*]] = arith.mulf %[[AVAL]], %[[BVAL]] : f32
+// CHECK:             %[[NEWSUM:.*]] = arith.addf %[[MUL]], %[[SUM]] : f32
+// CHECK:             fir.result %[[NEWSUM]] : f32
+// CHECK:           }
+// CHECK:           return %[[RES]] : f32
+// CHECK:         }
+
+// -----
+
+func.func @dot_f64(%arg0: !fir.box<!fir.array<?xf64>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xf64>> {fir.bindc_name = "b"}) -> f64 {
+  %0 = fir.alloca f64 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+  %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+  %c3_i32 = arith.constant 3 : i32
+  %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xf64>>) -> !fir.box<none>
+  %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xf64>>) -> !fir.box<none>
+  %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+  %5 = fir.call @_FortranADotProductReal8(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f64
+  fir.store %5 to %0 : !fir.ref<f64>
+  %6 = fir.load %0 : !fir.ref<f64>
+  return %6 : f64
+}
+func.func private @_FortranADotProductReal8(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f64 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+  %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+  fir.has_value %0 : !fir.char<1,10>
+}
+
+// The same code handles all FP types, so just check that there is no
+// call to runtime:
+// CHECK-LABEL:   func.func @dot_f64(
+// CHECK-SAME:                      %[[A:.*]]: !fir.box<!fir.array<?xf64>> {fir.bindc_name = "a"},
+// CHECK-SAME:                      %[[B:.*]]: !fir.box<!fir.array<?xf64>> {fir.bindc_name = "b"}) -> f64 {
+// CHECK-NOT: call{{.*}}_FortranADotProductReal8(
+
+// -----
+
+func.func @dot_f80(%arg0: !fir.box<!fir.array<?xf80>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xf80>> {fir.bindc_name = "b"}) -> f80 {
+  %0 = fir.alloca f80 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+  %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+  %c3_i32 = arith.constant 3 : i32
+  %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xf80>>) -> !fir.box<none>
+  %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xf80>>) -> !fir.box<none>
+  %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+  %5 = fir.call @_FortranADotProductReal10(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f80
+  fir.store %5 to %0 : !fir.ref<f80>
+  %6 = fir.load %0 : !fir.ref<f80>
+  return %6 : f80
+}
+func.func private @_FortranADotProductReal10(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f80 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+  %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+  fir.has_value %0 : !fir.char<1,10>
+}
+
+// The same code handles all FP types, so just check that there is no
+// call to runtime:
+// CHECK-LABEL:   func.func @dot_f80(
+// CHECK-SAME:                      %[[A:.*]]: !fir.box<!fir.array<?xf80>> {fir.bindc_name = "a"},
+// CHECK-SAME:                      %[[B:.*]]: !fir.box<!fir.array<?xf80>> {fir.bindc_name = "b"}) -> f80 {
+// CHECK-NOT: call{{.*}}_FortranADotProductReal10(
+
+// -----
+
+func.func @dot_f128(%arg0: !fir.box<!fir.array<?xf128>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xf128>> {fir.bindc_name = "b"}) -> f128 {
+  %0 = fir.alloca f128 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+  %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+  %c3_i32 = arith.constant 3 : i32
+  %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xf128>>) -> !fir.box<none>
+  %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xf128>>) -> !fir.box<none>
+  %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+  %5 = fir.call @_FortranADotProductReal16(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f128
+  fir.store %5 to %0 : !fir.ref<f128>
+  %6 = fir.load %0 : !fir.ref<f128>
+  return %6 : f128
+}
+func.func private @_FortranADotProductReal16(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> f128 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+  %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+  fir.has_value %0 : !fir.char<1,10>
+}
+
+// The same code handles all FP types, so just check that there is no
+// call to runtime:
+// CHECK-LABEL:   func.func @dot_f128(
+// CHECK-SAME:                      %[[A:.*]]: !fir.box<!fir.array<?xf128>> {fir.bindc_name = "a"},
+// CHECK-SAME:                      %[[B:.*]]: !fir.box<!fir.array<?xf128>> {fir.bindc_name = "b"}) -> f128 {
+// CHECK-NOT: call{{.*}}_FortranADotProductReal16(
+
+// -----
+
+func.func @dot_i32(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) -> i32 {
+  %0 = fir.alloca i32 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+  %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+  %c3_i32 = arith.constant 3 : i32
+  %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+  %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+  %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+  %5 = fir.call @_FortranADotProductInteger4(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+  fir.store %5 to %0 : !fir.ref<i32>
+  %6 = fir.load %0 : !fir.ref<i32>
+  return %6 : i32
+}
+func.func private @_FortranADotProductInteger4(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i32 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+  %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+  fir.has_value %0 : !fir.char<1,10>
+}
+
+// CHECK-LABEL:   func.func @dot_i32(
+// CHECK-SAME:                      %[[A:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"},
+// CHECK-SAME:                      %[[B:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) -> i32 {
+// CHECK:           %[[RESLOC:.*]] = fir.alloca i32 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+// CHECK:           %[[ACAST:.*]] = fir.convert %[[A]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+// CHECK:           %[[BCAST:.*]] = fir.convert %[[B]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+// CHECK:           %[[RES:.*]] = fir.call @_FortranADotProductInteger4_simplified(%[[ACAST]], %[[BCAST]]) : (!fir.box<none>, !fir.box<none>) -> i32
+// CHECK:           fir.store %[[RES]] to %[[RESLOC]] : !fir.ref<i32>
+// CHECK:           %[[RET:.*]] = fir.load %[[RESLOC]] : !fir.ref<i32>
+// CHECK:           return %[[RET]] : i32
+// CHECK:         }
+
+// CHECK-LABEL:   func.func private @_FortranADotProductInteger4_simplified(
+// CHECK-SAME:                                                           %[[A:.*]]: !fir.box<none>,
+// CHECK-SAME:                                                           %[[B:.*]]: !fir.box<none>) -> i32 attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
+// CHECK:           %[[I32ZERO:.*]] = arith.constant 0 : i32
+// CHECK:           %[[IZERO:.*]] = arith.constant 0 : index
+// CHECK:           %[[ACAST:.*]] = fir.convert %[[A]] : (!fir.box<none>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[BCAST:.*]] = fir.convert %[[B]] : (!fir.box<none>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[DIMS:.*]]:3 = fir.box_dims %[[ACAST]], %[[IZERO]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK:           %[[IONE:.*]] = arith.constant 1 : index
+// CHECK:           %[[LEN:.*]] = arith.subi %[[DIMS]]#1, %[[IONE]] : index
+// CHECK:           %[[RES:.*]] = fir.do_loop %[[IDX:.*]] = %[[IZERO]] to %[[LEN]] step %[[IONE]] iter_args(%[[SUM:.*]] = %[[I32ZERO]]) -> (i32) {
+// CHECK:             %[[ALOC:.*]] = fir.coordinate_of %[[ACAST]], %[[IDX]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:             %[[AVAL:.*]] = fir.load %[[ALOC]] : !fir.ref<i32>
+// CHECK:             %[[BLOC:.*]] = fir.coordinate_of %[[BCAST]], %[[IDX]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:             %[[BVAL:.*]] = fir.load %[[BLOC]] : !fir.ref<i32>
+// CHECK:             %[[MUL:.*]] = arith.muli %[[AVAL]], %[[BVAL]] : i32
+// CHECK:             %[[NEWSUM:.*]] = arith.addi %[[MUL]], %[[SUM]] : i32
+// CHECK:             fir.result %[[NEWSUM]] : i32
+// CHECK:           }
+// CHECK:           return %[[RES]] : i32
+// CHECK:         }
+
+// -----
+
+func.func @dot_i8(%arg0: !fir.box<!fir.array<?xi8>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi8>> {fir.bindc_name = "b"}) -> i8 {
+  %0 = fir.alloca i8 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+  %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+  %c3_i32 = arith.constant 3 : i32
+  %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xi8>>) -> !fir.box<none>
+  %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xi8>>) -> !fir.box<none>
+  %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+  %5 = fir.call @_FortranADotProductInteger1(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i8
+  fir.store %5 to %0 : !fir.ref<i8>
+  %6 = fir.load %0 : !fir.ref<i8>
+  return %6 : i8
+}
+func.func private @_FortranADotProductInteger1(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i8 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+  %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+  fir.has_value %0 : !fir.char<1,10>
+}
+
+// The same code handles all integer types, so just check that there is no
+// call to runtime:
+// CHECK-LABEL:   func.func @dot_i8(
+// CHECK-SAME:                      %[[A:.*]]: !fir.box<!fir.array<?xi8>> {fir.bindc_name = "a"},
+// CHECK-SAME:                      %[[B:.*]]: !fir.box<!fir.array<?xi8>> {fir.bindc_name = "b"}) -> i8 {
+// CHECK-NOT: call{{.*}}_FortranADotProductInteger1(
+
+// -----
+
+func.func @dot_i16(%arg0: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "b"}) -> i16 {
+  %0 = fir.alloca i16 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+  %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+  %c3_i32 = arith.constant 3 : i32
+  %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xi16>>) -> !fir.box<none>
+  %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xi16>>) -> !fir.box<none>
+  %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+  %5 = fir.call @_FortranADotProductInteger2(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i16
+  fir.store %5 to %0 : !fir.ref<i16>
+  %6 = fir.load %0 : !fir.ref<i16>
+  return %6 : i16
+}
+func.func private @_FortranADotProductInteger2(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i16 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+  %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+  fir.has_value %0 : !fir.char<1,10>
+}
+
+// The same code handles all integer types, so just check that there is no
+// call to runtime:
+// CHECK-LABEL:   func.func @dot_i16(
+// CHECK-SAME:                      %[[A:.*]]: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "a"},
+// CHECK-SAME:                      %[[B:.*]]: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "b"}) -> i16 {
+// CHECK-NOT: call{{.*}}_FortranADotProductInteger2(
+
+// -----
+
+func.func @dot_i64(%arg0: !fir.box<!fir.array<?xi64>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi64>> {fir.bindc_name = "b"}) -> i64 {
+  %0 = fir.alloca i64 {bindc_name = "dot", uniq_name = "_QFdotEdot"}
+  %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref<!fir.char<1,10>>
+  %c3_i32 = arith.constant 3 : i32
+  %2 = fir.convert %arg0 : (!fir.box<!fir.array<?xi64>>) -> !fir.box<none>
+  %3 = fir.convert %arg1 : (!fir.box<!fir.array<?xi64>>) -> !fir.box<none>
+  %4 = fir.convert %1 : (!fir.ref<!fir.char<1,10>>) -> !fir.ref<i8>
+  %5 = fir.call @_FortranADotProductInteger8(%2, %3, %4, %c3_i32) : (!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i64
+  fir.store %5 to %0 : !fir.ref<i64>
+  %6 = fir.load %0 : !fir.ref<i64>
+  return %6 : i64
+}
+func.func private @_FortranADotProductInteger8(!fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> i64 attributes {fir.runtime}
+fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> {
+  %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10>
+  fir.has_value %0 : !fir.char<1,10>
+}
+
+// The same code handles all integer types, so just check that there is no
+// call to runtime:
+// CHECK-LABEL:   func.func @dot_i64(
+// CHECK-SAME:                      %[[A:.*]]: !fir.box<!fir.array<?xi64>> {fir.bindc_name = "a"},
+// CHECK-SAME:                      %[[B:.*]]: !fir.box<!fir.array<?xi64>> {fir.bindc_name = "b"}) -> i64 {
+// CHECK-NOT: call{{.*}}_FortranADotProductInteger8(


        


More information about the flang-commits mailing list