[flang-commits] [flang] [flang] Lower MATMUL to type specific runtime calls. (PR #97547)

Slava Zakharin via flang-commits flang-commits at lists.llvm.org
Wed Jul 3 02:40:33 PDT 2024


https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/97547

Lower MATMUL to the new runtime entries added in #97406.


>From 450f6705f7a6aac3618b4acea2eed2ff55cced37 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Wed, 3 Jul 2024 01:11:40 -0700
Subject: [PATCH] [flang] Lower MATMUL to type specific runtime calls.

Lower MATMUL to the new runtime entries added in #97406.
---
 flang/include/flang/Optimizer/Support/Utils.h |  77 +++++++++-
 .../flang/Runtime/matmul-instances.inc        |  23 +--
 .../include/flang/Runtime/matmul-transpose.h  |   2 +
 flang/include/flang/Runtime/matmul.h          |   2 +
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp |  15 +-
 .../Builder/Runtime/Transformational.cpp      |  96 ++++++++++++-
 flang/runtime/matmul-transpose.cpp            |  53 +------
 flang/runtime/matmul.cpp                      |  53 +------
 flang/test/HLFIR/matmul-lowering.fir          |   6 +-
 flang/test/HLFIR/mul_transpose.f90            |   6 +-
 flang/test/Lower/Intrinsics/matmul.f90        |   4 +-
 .../Builder/Runtime/RuntimeCallTestBase.h     |   9 ++
 .../Builder/Runtime/TransformationalTest.cpp  |  42 ++++--
 flang/unittests/Runtime/Matmul.cpp            | 119 ----------------
 flang/unittests/Runtime/MatmulTranspose.cpp   | 131 ------------------
 15 files changed, 246 insertions(+), 392 deletions(-)

diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h
index ae95a26be1d86..2ffb48335686c 100644
--- a/flang/include/flang/Optimizer/Support/Utils.h
+++ b/flang/include/flang/Optimizer/Support/Utils.h
@@ -84,9 +84,10 @@ inline std::string mlirTypeToString(mlir::Type type) {
   return result;
 }
 
-inline std::string numericMlirTypeToFortran(fir::FirOpBuilder &builder,
-                                            mlir::Type type, mlir::Location loc,
-                                            const llvm::Twine &name) {
+inline std::string mlirTypeToIntrinsicFortran(fir::FirOpBuilder &builder,
+                                              mlir::Type type,
+                                              mlir::Location loc,
+                                              const llvm::Twine &name) {
   if (type.isF16())
     return "REAL(KIND=2)";
   else if (type.isBF16())
@@ -123,6 +124,14 @@ inline std::string numericMlirTypeToFortran(fir::FirOpBuilder &builder,
     return "COMPLEX(KIND=10)";
   else if (type == fir::ComplexType::get(builder.getContext(), 16))
     return "COMPLEX(KIND=16)";
+  else if (type == fir::LogicalType::get(builder.getContext(), 1))
+    return "LOGICAL(KIND=1)";
+  else if (type == fir::LogicalType::get(builder.getContext(), 2))
+    return "LOGICAL(KIND=2)";
+  else if (type == fir::LogicalType::get(builder.getContext(), 4))
+    return "LOGICAL(KIND=4)";
+  else if (type == fir::LogicalType::get(builder.getContext(), 8))
+    return "LOGICAL(KIND=8)";
   else
     fir::emitFatalError(loc, "unsupported type in " + name + ": " +
                                  fir::mlirTypeToString(type));
@@ -133,10 +142,70 @@ inline void intrinsicTypeTODO(fir::FirOpBuilder &builder, mlir::Type type,
                               const llvm::Twine &intrinsicName) {
   TODO(loc,
        "intrinsic: " +
-           fir::numericMlirTypeToFortran(builder, type, loc, intrinsicName) +
+           fir::mlirTypeToIntrinsicFortran(builder, type, loc, intrinsicName) +
            " in " + intrinsicName);
 }
 
+inline void intrinsicTypeTODO2(fir::FirOpBuilder &builder, mlir::Type type1,
+                               mlir::Type type2, mlir::Location loc,
+                               const llvm::Twine &intrinsicName) {
+  TODO(loc,
+       "intrinsic: {" +
+           fir::mlirTypeToIntrinsicFortran(builder, type2, loc, intrinsicName) +
+           ", " +
+           fir::mlirTypeToIntrinsicFortran(builder, type2, loc, intrinsicName) +
+           "} in " + intrinsicName);
+}
+
+inline std::pair<Fortran::common::TypeCategory, KindMapping::KindTy>
+mlirTypeToCategoryKind(mlir::Location loc, mlir::Type type) {
+  if (type.isF16())
+    return {Fortran::common::TypeCategory::Real, 2};
+  else if (type.isBF16())
+    return {Fortran::common::TypeCategory::Real, 3};
+  else if (type.isF32())
+    return {Fortran::common::TypeCategory::Real, 4};
+  else if (type.isF64())
+    return {Fortran::common::TypeCategory::Real, 8};
+  else if (type.isF80())
+    return {Fortran::common::TypeCategory::Real, 10};
+  else if (type.isF128())
+    return {Fortran::common::TypeCategory::Real, 16};
+  else if (type.isInteger(8))
+    return {Fortran::common::TypeCategory::Integer, 1};
+  else if (type.isInteger(16))
+    return {Fortran::common::TypeCategory::Integer, 2};
+  else if (type.isInteger(32))
+    return {Fortran::common::TypeCategory::Integer, 4};
+  else if (type.isInteger(64))
+    return {Fortran::common::TypeCategory::Integer, 8};
+  else if (type.isInteger(128))
+    return {Fortran::common::TypeCategory::Integer, 16};
+  else if (type == fir::ComplexType::get(loc.getContext(), 2))
+    return {Fortran::common::TypeCategory::Complex, 2};
+  else if (type == fir::ComplexType::get(loc.getContext(), 3))
+    return {Fortran::common::TypeCategory::Complex, 3};
+  else if (type == fir::ComplexType::get(loc.getContext(), 4))
+    return {Fortran::common::TypeCategory::Complex, 4};
+  else if (type == fir::ComplexType::get(loc.getContext(), 8))
+    return {Fortran::common::TypeCategory::Complex, 8};
+  else if (type == fir::ComplexType::get(loc.getContext(), 10))
+    return {Fortran::common::TypeCategory::Complex, 10};
+  else if (type == fir::ComplexType::get(loc.getContext(), 16))
+    return {Fortran::common::TypeCategory::Complex, 16};
+  else if (type == fir::LogicalType::get(loc.getContext(), 1))
+    return {Fortran::common::TypeCategory::Logical, 1};
+  else if (type == fir::LogicalType::get(loc.getContext(), 2))
+    return {Fortran::common::TypeCategory::Logical, 2};
+  else if (type == fir::LogicalType::get(loc.getContext(), 4))
+    return {Fortran::common::TypeCategory::Logical, 4};
+  else if (type == fir::LogicalType::get(loc.getContext(), 8))
+    return {Fortran::common::TypeCategory::Logical, 8};
+  else
+    fir::emitFatalError(loc,
+                        "unsupported type: " + fir::mlirTypeToString(type));
+}
+
 /// Find the fir.type_info that was created for this \p recordType in \p module,
 /// if any. \p  symbolTable can be provided to speed-up the lookup. This tool
 /// will match record type even if they have been "altered" in type conversion
diff --git a/flang/include/flang/Runtime/matmul-instances.inc b/flang/include/flang/Runtime/matmul-instances.inc
index 970b03339cd5e..32c6ab06d2521 100644
--- a/flang/include/flang/Runtime/matmul-instances.inc
+++ b/flang/include/flang/Runtime/matmul-instances.inc
@@ -17,6 +17,10 @@
 #error "Define MATMUL_DIRECT_INSTANCE before including this file"
 #endif
 
+#ifndef MATMUL_FORCE_ALL_TYPES
+#error "Define MATMUL_FORCE_ALL_TYPES to 0 or 1 before including this file"
+#endif
+
 // clang-format off
 
 #define FOREACH_MATMUL_TYPE_PAIR(macro)         \
@@ -88,7 +92,7 @@
 FOREACH_MATMUL_TYPE_PAIR(MATMUL_INSTANCE)
 FOREACH_MATMUL_TYPE_PAIR(MATMUL_DIRECT_INSTANCE)
 
-#if defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T
+#if MATMUL_FORCE_ALL_TYPES || (defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T)
 #define FOREACH_MATMUL_TYPE_PAIR_WITH_INT16(macro)      \
   macro(Integer, 16, Integer, 1)                        \
   macro(Integer, 16, Integer, 2)                        \
@@ -107,7 +111,7 @@ FOREACH_MATMUL_TYPE_PAIR(MATMUL_DIRECT_INSTANCE)
 FOREACH_MATMUL_TYPE_PAIR_WITH_INT16(MATMUL_INSTANCE)
 FOREACH_MATMUL_TYPE_PAIR_WITH_INT16(MATMUL_DIRECT_INSTANCE)
 
-#if LDBL_MANT_DIG == 64
+#if MATMUL_FORCE_ALL_TYPES || LDBL_MANT_DIG == 64
 MATMUL_INSTANCE(Integer, 16, Real, 10)
 MATMUL_INSTANCE(Integer, 16, Complex, 10)
 MATMUL_INSTANCE(Real, 10, Integer, 16)
@@ -117,7 +121,7 @@ MATMUL_DIRECT_INSTANCE(Integer, 16, Complex, 10)
 MATMUL_DIRECT_INSTANCE(Real, 10, Integer, 16)
 MATMUL_DIRECT_INSTANCE(Complex, 10, Integer, 16)
 #endif
-#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+#if MATMUL_FORCE_ALL_TYPES || (LDBL_MANT_DIG == 113 || HAS_FLOAT128)
 MATMUL_INSTANCE(Integer, 16, Real, 16)
 MATMUL_INSTANCE(Integer, 16, Complex, 16)
 MATMUL_INSTANCE(Real, 16, Integer, 16)
@@ -127,9 +131,9 @@ MATMUL_DIRECT_INSTANCE(Integer, 16, Complex, 16)
 MATMUL_DIRECT_INSTANCE(Real, 16, Integer, 16)
 MATMUL_DIRECT_INSTANCE(Complex, 16, Integer, 16)
 #endif
-#endif // defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T
+#endif // MATMUL_FORCE_ALL_TYPES || (defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T)
 
-#if LDBL_MANT_DIG == 64
+#if MATMUL_FORCE_ALL_TYPES || LDBL_MANT_DIG == 64
 #define FOREACH_MATMUL_TYPE_PAIR_WITH_REAL10(macro)         \
   macro(Integer, 1, Real, 10)                               \
   macro(Integer, 1, Complex, 10)                            \
@@ -171,7 +175,7 @@ MATMUL_DIRECT_INSTANCE(Complex, 16, Integer, 16)
 FOREACH_MATMUL_TYPE_PAIR_WITH_REAL10(MATMUL_INSTANCE)
 FOREACH_MATMUL_TYPE_PAIR_WITH_REAL10(MATMUL_DIRECT_INSTANCE)
 
-#if HAS_FLOAT128
+#if MATMUL_FORCE_ALL_TYPES || HAS_FLOAT128
 MATMUL_INSTANCE(Real, 10, Real, 16)
 MATMUL_INSTANCE(Real, 10, Complex, 16)
 MATMUL_INSTANCE(Real, 16, Real, 10)
@@ -189,9 +193,9 @@ MATMUL_DIRECT_INSTANCE(Complex, 10, Complex, 16)
 MATMUL_DIRECT_INSTANCE(Complex, 16, Real, 10)
 MATMUL_DIRECT_INSTANCE(Complex, 16, Complex, 10)
 #endif
-#endif // LDBL_MANT_DIG == 64
+#endif // MATMUL_FORCE_ALL_TYPES || LDBL_MANT_DIG == 64
 
-#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+#if MATMUL_FORCE_ALL_TYPES || (LDBL_MANT_DIG == 113 || HAS_FLOAT128)
 #define FOREACH_MATMUL_TYPE_PAIR_WITH_REAL16(macro)         \
   macro(Integer, 1, Real, 16)                               \
   macro(Integer, 1, Complex, 16)                            \
@@ -232,7 +236,7 @@ MATMUL_DIRECT_INSTANCE(Complex, 16, Complex, 10)
 
 FOREACH_MATMUL_TYPE_PAIR_WITH_REAL16(MATMUL_INSTANCE)
 FOREACH_MATMUL_TYPE_PAIR_WITH_REAL16(MATMUL_DIRECT_INSTANCE)
-#endif // LDBL_MANT_DIG == 113 || HAS_FLOAT128
+#endif // MATMUL_FORCE_ALL_TYPES || (LDBL_MANT_DIG == 113 || HAS_FLOAT128)
 
 #define FOREACH_MATMUL_LOGICAL_TYPE_PAIR(macro) \
   macro(Logical, 1, Logical, 1)                 \
@@ -257,5 +261,6 @@ FOREACH_MATMUL_LOGICAL_TYPE_PAIR(MATMUL_DIRECT_INSTANCE)
 
 #undef MATMUL_INSTANCE
 #undef MATMUL_DIRECT_INSTANCE
+#undef MATMUL_FORCE_ALL_TYPES
 
 // clang-format on
diff --git a/flang/include/flang/Runtime/matmul-transpose.h b/flang/include/flang/Runtime/matmul-transpose.h
index d0a5005a1a8bd..2d79ca10e0895 100644
--- a/flang/include/flang/Runtime/matmul-transpose.h
+++ b/flang/include/flang/Runtime/matmul-transpose.h
@@ -40,6 +40,8 @@ void RTDECL(MatmulTransposeDirect)(const Descriptor &, const Descriptor &,
       Descriptor & result, const Descriptor &x, const Descriptor &y, \
       const char *sourceFile, int line);
 
+#define MATMUL_FORCE_ALL_TYPES 0
+
 #include "matmul-instances.inc"
 
 } // extern "C"
diff --git a/flang/include/flang/Runtime/matmul.h b/flang/include/flang/Runtime/matmul.h
index 1a5e39eb8813f..a72d4a06ee459 100644
--- a/flang/include/flang/Runtime/matmul.h
+++ b/flang/include/flang/Runtime/matmul.h
@@ -39,6 +39,8 @@ void RTDECL(MatmulDirect)(const Descriptor &, const Descriptor &,
       const Descriptor &x, const Descriptor &y, const char *sourceFile, \
       int line);
 
+#define MATMUL_FORCE_ALL_TYPES 0
+
 #include "matmul-instances.inc"
 
 } // extern "C"
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 8dd1904939f3e..a1cef7437fa2d 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -701,18 +701,19 @@ prettyPrintIntrinsicName(fir::FirOpBuilder &builder, mlir::Location loc,
   if (name == "pow") {
     assert(funcType.getNumInputs() == 2 && "power operator has two arguments");
     std::string displayName{" ** "};
-    sstream << numericMlirTypeToFortran(builder, funcType.getInput(0), loc,
-                                        displayName)
+    sstream << mlirTypeToIntrinsicFortran(builder, funcType.getInput(0), loc,
+                                          displayName)
             << displayName
-            << numericMlirTypeToFortran(builder, funcType.getInput(1), loc,
-                                        displayName);
+            << mlirTypeToIntrinsicFortran(builder, funcType.getInput(1), loc,
+                                          displayName);
   } else {
     sstream << name.upper() << "(";
     if (funcType.getNumInputs() > 0)
-      sstream << numericMlirTypeToFortran(builder, funcType.getInput(0), loc,
-                                          name);
+      sstream << mlirTypeToIntrinsicFortran(builder, funcType.getInput(0), loc,
+                                            name);
     for (mlir::Type argType : funcType.getInputs().drop_front()) {
-      sstream << ", " << numericMlirTypeToFortran(builder, argType, loc, name);
+      sstream << ", "
+              << mlirTypeToIntrinsicFortran(builder, argType, loc, name);
     }
     sstream << ")";
   }
diff --git a/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp b/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp
index 6d3d85e8df69f..8f08b01fe0097 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp
@@ -329,11 +329,64 @@ void fir::runtime::genEoshiftVector(fir::FirOpBuilder &builder,
   builder.create<fir::CallOp>(loc, eoshiftFunc, args);
 }
 
+/// Define ForcedMatmul<ACAT><AKIND><BCAT><BKIND> models.
+struct ForcedMatmulTypeModel {
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+    return [](mlir::MLIRContext *ctx) {
+      auto boxRefTy =
+          fir::runtime::getModel<Fortran::runtime::Descriptor &>()(ctx);
+      auto boxTy =
+          fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
+      auto strTy = fir::runtime::getModel<const char *>()(ctx);
+      auto intTy = fir::runtime::getModel<int>()(ctx);
+      auto voidTy = fir::runtime::getModel<void>()(ctx);
+      return mlir::FunctionType::get(
+          ctx, {boxRefTy, boxTy, boxTy, strTy, intTy}, {voidTy});
+    };
+  }
+};
+
+#define MATMUL_INSTANCE(ACAT, AKIND, BCAT, BKIND)                              \
+  struct ForcedMatmul##ACAT##AKIND##BCAT##BKIND                                \
+      : public ForcedMatmulTypeModel {                                         \
+    static constexpr const char *name =                                        \
+        ExpandAndQuoteKey(RTNAME(Matmul##ACAT##AKIND##BCAT##BKIND));           \
+  };
+
+#define MATMUL_DIRECT_INSTANCE(ACAT, AKIND, BCAT, BKIND)
+#define MATMUL_FORCE_ALL_TYPES 1
+
+#include "flang/Runtime/matmul-instances.inc"
+
 /// Generate call to Matmul intrinsic runtime routine.
 void fir::runtime::genMatmul(fir::FirOpBuilder &builder, mlir::Location loc,
                              mlir::Value resultBox, mlir::Value matrixABox,
                              mlir::Value matrixBBox) {
-  auto func = fir::runtime::getRuntimeFunc<mkRTKey(Matmul)>(loc, builder);
+  mlir::func::FuncOp func;
+  auto boxATy = matrixABox.getType();
+  auto arrATy = fir::dyn_cast_ptrOrBoxEleTy(boxATy);
+  auto arrAEleTy = mlir::cast<fir::SequenceType>(arrATy).getEleTy();
+  auto [aCat, aKind] = fir::mlirTypeToCategoryKind(loc, arrAEleTy);
+  auto boxBTy = matrixBBox.getType();
+  auto arrBTy = fir::dyn_cast_ptrOrBoxEleTy(boxBTy);
+  auto arrBEleTy = mlir::cast<fir::SequenceType>(arrBTy).getEleTy();
+  auto [bCat, bKind] = fir::mlirTypeToCategoryKind(loc, arrBEleTy);
+
+#define MATMUL_INSTANCE(ACAT, AKIND, BCAT, BKIND)                              \
+  if (!func && aCat == TypeCategory::ACAT && aKind == AKIND &&                 \
+      bCat == TypeCategory::BCAT && bKind == BKIND) {                          \
+    func =                                                                     \
+        fir::runtime::getRuntimeFunc<ForcedMatmul##ACAT##AKIND##BCAT##BKIND>(  \
+            loc, builder);                                                     \
+  }
+
+#define MATMUL_DIRECT_INSTANCE(ACAT, AKIND, BCAT, BKIND)
+#define MATMUL_FORCE_ALL_TYPES 1
+#include "flang/Runtime/matmul-instances.inc"
+
+  if (!func) {
+    fir::intrinsicTypeTODO2(builder, arrAEleTy, arrBEleTy, loc, "MATMUL");
+  }
   auto fTy = func.getFunctionType();
   auto sourceFile = fir::factory::locationToFilename(builder, loc);
   auto sourceLine =
@@ -344,13 +397,48 @@ void fir::runtime::genMatmul(fir::FirOpBuilder &builder, mlir::Location loc,
   builder.create<fir::CallOp>(loc, func, args);
 }
 
-/// Generate call to MatmulTranspose intrinsic runtime routine.
+/// Define ForcedMatmulTranspose<ACAT><AKIND><BCAT><BKIND> models.
+#define MATMUL_INSTANCE(ACAT, AKIND, BCAT, BKIND)                              \
+  struct ForcedMatmulTranspose##ACAT##AKIND##BCAT##BKIND                       \
+      : public ForcedMatmulTypeModel {                                         \
+    static constexpr const char *name =                                        \
+        ExpandAndQuoteKey(RTNAME(MatmulTranspose##ACAT##AKIND##BCAT##BKIND));  \
+  };
+
+#define MATMUL_DIRECT_INSTANCE(ACAT, AKIND, BCAT, BKIND)
+#define MATMUL_FORCE_ALL_TYPES 1
+
+#include "flang/Runtime/matmul-instances.inc"
+
 void fir::runtime::genMatmulTranspose(fir::FirOpBuilder &builder,
                                       mlir::Location loc, mlir::Value resultBox,
                                       mlir::Value matrixABox,
                                       mlir::Value matrixBBox) {
-  auto func =
-      fir::runtime::getRuntimeFunc<mkRTKey(MatmulTranspose)>(loc, builder);
+  mlir::func::FuncOp func;
+  auto boxATy = matrixABox.getType();
+  auto arrATy = fir::dyn_cast_ptrOrBoxEleTy(boxATy);
+  auto arrAEleTy = mlir::cast<fir::SequenceType>(arrATy).getEleTy();
+  auto [aCat, aKind] = fir::mlirTypeToCategoryKind(loc, arrAEleTy);
+  auto boxBTy = matrixBBox.getType();
+  auto arrBTy = fir::dyn_cast_ptrOrBoxEleTy(boxBTy);
+  auto arrBEleTy = mlir::cast<fir::SequenceType>(arrBTy).getEleTy();
+  auto [bCat, bKind] = fir::mlirTypeToCategoryKind(loc, arrBEleTy);
+
+#define MATMUL_INSTANCE(ACAT, AKIND, BCAT, BKIND)                              \
+  if (!func && aCat == TypeCategory::ACAT && aKind == AKIND &&                 \
+      bCat == TypeCategory::BCAT && bKind == BKIND) {                          \
+    func = fir::runtime::getRuntimeFunc<                                       \
+        ForcedMatmulTranspose##ACAT##AKIND##BCAT##BKIND>(loc, builder);        \
+  }
+
+#define MATMUL_DIRECT_INSTANCE(ACAT, AKIND, BCAT, BKIND)
+#define MATMUL_FORCE_ALL_TYPES 1
+#include "flang/Runtime/matmul-instances.inc"
+
+  if (!func) {
+    fir::intrinsicTypeTODO2(builder, arrAEleTy, arrBEleTy, loc,
+                            "MATMUL-TRANSPOSE");
+  }
   auto fTy = func.getFunctionType();
   auto sourceFile = fir::factory::locationToFilename(builder, loc);
   auto sourceLine =
diff --git a/flang/runtime/matmul-transpose.cpp b/flang/runtime/matmul-transpose.cpp
index 1c998fa8cf6c1..283472650a1c6 100644
--- a/flang/runtime/matmul-transpose.cpp
+++ b/flang/runtime/matmul-transpose.cpp
@@ -343,48 +343,6 @@ inline static RT_API_ATTRS void DoMatmulTranspose(
 
 RT_DIAG_POP
 
-// Maps the dynamic type information from the arguments' descriptors
-// to the right instantiation of DoMatmul() for valid combinations of
-// types.
-template <bool IS_ALLOCATING> struct MatmulTranspose {
-  using ResultDescriptor =
-      std::conditional_t<IS_ALLOCATING, Descriptor, const Descriptor>;
-  template <TypeCategory XCAT, int XKIND> struct MM1 {
-    template <TypeCategory YCAT, int YKIND> struct MM2 {
-      RT_API_ATTRS void operator()(ResultDescriptor &result,
-          const Descriptor &x, const Descriptor &y,
-          Terminator &terminator) const {
-        if constexpr (constexpr auto resultType{
-                          GetResultType(XCAT, XKIND, YCAT, YKIND)}) {
-          if constexpr (Fortran::common::IsNumericTypeCategory(
-                            resultType->first) ||
-              resultType->first == TypeCategory::Logical) {
-            return DoMatmulTranspose<IS_ALLOCATING, resultType->first,
-                resultType->second, CppTypeFor<XCAT, XKIND>,
-                CppTypeFor<YCAT, YKIND>>(result, x, y, terminator);
-          }
-        }
-        terminator.Crash("MATMUL-TRANSPOSE: bad operand types (%d(%d), %d(%d))",
-            static_cast<int>(XCAT), XKIND, static_cast<int>(YCAT), YKIND);
-      }
-    };
-    RT_API_ATTRS void operator()(ResultDescriptor &result, const Descriptor &x,
-        const Descriptor &y, Terminator &terminator, TypeCategory yCat,
-        int yKind) const {
-      ApplyType<MM2, void>(yCat, yKind, terminator, result, x, y, terminator);
-    }
-  };
-  RT_API_ATTRS void operator()(ResultDescriptor &result, const Descriptor &x,
-      const Descriptor &y, const char *sourceFile, int line) const {
-    Terminator terminator{sourceFile, line};
-    auto xCatKind{x.type().GetCategoryAndKind()};
-    auto yCatKind{y.type().GetCategoryAndKind()};
-    RUNTIME_CHECK(terminator, xCatKind.has_value() && yCatKind.has_value());
-    ApplyType<MM1, void>(xCatKind->first, xCatKind->second, terminator, result,
-        x, y, terminator, yCatKind->first, yCatKind->second);
-  }
-};
-
 template <bool IS_ALLOCATING, TypeCategory XCAT, int XKIND, TypeCategory YCAT,
     int YKIND>
 struct MatmulTransposeHelper {
@@ -414,15 +372,6 @@ namespace Fortran::runtime {
 extern "C" {
 RT_EXT_API_GROUP_BEGIN
 
-void RTDEF(MatmulTranspose)(Descriptor &result, const Descriptor &x,
-    const Descriptor &y, const char *sourceFile, int line) {
-  MatmulTranspose<true>{}(result, x, y, sourceFile, line);
-}
-void RTDEF(MatmulTransposeDirect)(const Descriptor &result, const Descriptor &x,
-    const Descriptor &y, const char *sourceFile, int line) {
-  MatmulTranspose<false>{}(result, x, y, sourceFile, line);
-}
-
 #define MATMUL_INSTANCE(XCAT, XKIND, YCAT, YKIND) \
   void RTDEF(MatmulTranspose##XCAT##XKIND##YCAT##YKIND)(Descriptor & result, \
       const Descriptor &x, const Descriptor &y, const char *sourceFile, \
@@ -439,6 +388,8 @@ void RTDEF(MatmulTransposeDirect)(const Descriptor &result, const Descriptor &x,
         TypeCategory::YCAT, YKIND>{}(result, x, y, sourceFile, line); \
   }
 
+#define MATMUL_FORCE_ALL_TYPES 0
+
 #include "flang/Runtime/matmul-instances.inc"
 
 RT_EXT_API_GROUP_END
diff --git a/flang/runtime/matmul.cpp b/flang/runtime/matmul.cpp
index 504d1aa4dc4a4..252557e2f9e7a 100644
--- a/flang/runtime/matmul.cpp
+++ b/flang/runtime/matmul.cpp
@@ -443,48 +443,6 @@ static inline RT_API_ATTRS void DoMatmul(
 
 RT_DIAG_POP
 
-// Maps the dynamic type information from the arguments' descriptors
-// to the right instantiation of DoMatmul() for valid combinations of
-// types.
-template <bool IS_ALLOCATING> struct Matmul {
-  using ResultDescriptor =
-      std::conditional_t<IS_ALLOCATING, Descriptor, const Descriptor>;
-  template <TypeCategory XCAT, int XKIND> struct MM1 {
-    template <TypeCategory YCAT, int YKIND> struct MM2 {
-      RT_API_ATTRS void operator()(ResultDescriptor &result,
-          const Descriptor &x, const Descriptor &y,
-          Terminator &terminator) const {
-        if constexpr (constexpr auto resultType{
-                          GetResultType(XCAT, XKIND, YCAT, YKIND)}) {
-          if constexpr (Fortran::common::IsNumericTypeCategory(
-                            resultType->first) ||
-              resultType->first == TypeCategory::Logical) {
-            return DoMatmul<IS_ALLOCATING, resultType->first,
-                resultType->second, CppTypeFor<XCAT, XKIND>,
-                CppTypeFor<YCAT, YKIND>>(result, x, y, terminator);
-          }
-        }
-        terminator.Crash("MATMUL: bad operand types (%d(%d), %d(%d))",
-            static_cast<int>(XCAT), XKIND, static_cast<int>(YCAT), YKIND);
-      }
-    };
-    RT_API_ATTRS void operator()(ResultDescriptor &result, const Descriptor &x,
-        const Descriptor &y, Terminator &terminator, TypeCategory yCat,
-        int yKind) const {
-      ApplyType<MM2, void>(yCat, yKind, terminator, result, x, y, terminator);
-    }
-  };
-  RT_API_ATTRS void operator()(ResultDescriptor &result, const Descriptor &x,
-      const Descriptor &y, const char *sourceFile, int line) const {
-    Terminator terminator{sourceFile, line};
-    auto xCatKind{x.type().GetCategoryAndKind()};
-    auto yCatKind{y.type().GetCategoryAndKind()};
-    RUNTIME_CHECK(terminator, xCatKind.has_value() && yCatKind.has_value());
-    ApplyType<MM1, void>(xCatKind->first, xCatKind->second, terminator, result,
-        x, y, terminator, yCatKind->first, yCatKind->second);
-  }
-};
-
 template <bool IS_ALLOCATING, TypeCategory XCAT, int XKIND, TypeCategory YCAT,
     int YKIND>
 struct MatmulHelper {
@@ -514,15 +472,6 @@ namespace Fortran::runtime {
 extern "C" {
 RT_EXT_API_GROUP_BEGIN
 
-void RTDEF(Matmul)(Descriptor &result, const Descriptor &x, const Descriptor &y,
-    const char *sourceFile, int line) {
-  Matmul<true>{}(result, x, y, sourceFile, line);
-}
-void RTDEF(MatmulDirect)(const Descriptor &result, const Descriptor &x,
-    const Descriptor &y, const char *sourceFile, int line) {
-  Matmul<false>{}(result, x, y, sourceFile, line);
-}
-
 #define MATMUL_INSTANCE(XCAT, XKIND, YCAT, YKIND) \
   void RTDEF(Matmul##XCAT##XKIND##YCAT##YKIND)(Descriptor & result, \
       const Descriptor &x, const Descriptor &y, const char *sourceFile, \
@@ -539,6 +488,8 @@ void RTDEF(MatmulDirect)(const Descriptor &result, const Descriptor &x,
         YKIND>{}(result, x, y, sourceFile, line); \
   }
 
+#define MATMUL_FORCE_ALL_TYPES 0
+
 #include "flang/Runtime/matmul-instances.inc"
 
 RT_EXT_API_GROUP_END
diff --git a/flang/test/HLFIR/matmul-lowering.fir b/flang/test/HLFIR/matmul-lowering.fir
index 85a73dd45160f..fd76db2659516 100644
--- a/flang/test/HLFIR/matmul-lowering.fir
+++ b/flang/test/HLFIR/matmul-lowering.fir
@@ -29,7 +29,7 @@ func.func @_QPmatmul1(%arg0: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "lh
 // CHECK:         %[[RET_ARG:.*]] = fir.convert %[[RET_BOX]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> !fir.ref<!fir.box<none>>
 // CHECK-DAG:     %[[LHS_ARG:.*]] = fir.convert %[[LHS_VAR]]#1 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<none>
 // CHECK-DAG:     %[[RHS_ARG:.*]] = fir.convert %[[RHS_VAR]]#1 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<none>
-// CHECK:         %[[NONE:.*]] = fir.call @_FortranAMatmul(%[[RET_ARG]], %[[LHS_ARG]], %[[RHS_ARG]], %[[LOC_STR:.*]], %[[LOC_N:.*]]) fastmath<contract>
+// CHECK:         %[[NONE:.*]] = fir.call @_FortranAMatmulInteger4Integer4(%[[RET_ARG]], %[[LHS_ARG]], %[[RHS_ARG]], %[[LOC_STR:.*]], %[[LOC_N:.*]]) fastmath<contract>
 
 // CHECK:         %[[RET:.*]] = fir.load %[[RET_BOX]]
 // CHECK-DAG:     %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[RET]]
@@ -71,7 +71,7 @@ func.func @_QPtest(%arg0: !fir.ref<!fir.array<3x3xf32>> {fir.bindc_name = "a"},
 }
 // just check that we apply the patterns successfully. The details are checked above
 // CHECK-LABEL: func.func @_QPtest(
-// CHECK:         fir.call @_FortranAMatmul({{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> none
-// CHECK:         fir.call @_FortranAMatmul({{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> none
+// CHECK:         fir.call @_FortranAMatmulReal4Real4({{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> none
+// CHECK:         fir.call @_FortranAMatmulReal4Real4({{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> none
 // CHECK:         return
 // CHECK-NEXT:  }
diff --git a/flang/test/HLFIR/mul_transpose.f90 b/flang/test/HLFIR/mul_transpose.f90
index 378ecfe4886aa..7cfbfe39d0ea8 100644
--- a/flang/test/HLFIR/mul_transpose.f90
+++ b/flang/test/HLFIR/mul_transpose.f90
@@ -44,7 +44,7 @@ subroutine mul_transpose(a, b, res)
 ! CHECK-LOWERING:       %[[MUL_CONV_RES:.*]] = fir.convert %[[MUL_RES_BOX:.*]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>
 ! CHECK-LOWERING:       %[[LHS_CONV:.*]] = fir.convert %[[LHS_BOX]] : (!fir.box<!fir.array<1x2xf32>>) -> !fir.box<none>
 ! CHECK-LOWERING:       %[[B_BOX_CONV:.*]] = fir.convert %[[B_BOX]] : (!fir.box<!fir.array<2x2xf32>>) -> !fir.box<none>
-! CHECK-LOWERING:       fir.call @_FortranAMatmul(%[[MUL_CONV_RES]], %[[LHS_CONV]], %[[B_BOX_CONV]], %[[LOC_STR2:.*]], %[[LOC_N2:.*]])
+! CHECK-LOWERING:       fir.call @_FortranAMatmulReal4Real4(%[[MUL_CONV_RES]], %[[LHS_CONV]], %[[B_BOX_CONV]], %[[LOC_STR2:.*]], %[[LOC_N2:.*]])
 ! CHECK-LOWERING:       %[[MUL_RES_LD:.*]] = fir.load %[[MUL_RES_BOX:.*]]
 ! CHECK-LOWERING:       %[[MUL_RES_ADDR:.*]] = fir.box_addr %[[MUL_RES_LD]]
 ! CHECK-LOWERING:       %[[MUL_RES_VAR:.*]]:2 = hlfir.declare %[[MUL_RES_ADDR]]({{.*}}) {uniq_name = ".tmp.intrinsic_result"}
@@ -60,7 +60,7 @@ subroutine mul_transpose(a, b, res)
 ! CHECK-LOWERING-OPT:   %[[MUL_CONV_RES:.*]] = fir.convert %[[MUL_RES_BOX:.*]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>
 ! CHECK-LOWERING-OPT:   %[[LHS_CONV:.*]] = fir.convert %[[LHS_BOX]] : (!fir.box<!fir.array<2x1xf32>>) -> !fir.box<none>
 ! CHECK-LOWERING-OPT:   %[[B_BOX_CONV:.*]] = fir.convert %[[B_BOX]] : (!fir.box<!fir.array<2x2xf32>>) -> !fir.box<none>
-! CHECK-LOWERING-OPT:   fir.call @_FortranAMatmulTranspose(%[[MUL_CONV_RES]], %[[LHS_CONV]], %[[B_BOX_CONV]], %[[LOC_STR2:.*]], %[[LOC_N2:.*]])
+! CHECK-LOWERING-OPT:   fir.call @_FortranAMatmulTransposeReal4Real4(%[[MUL_CONV_RES]], %[[LHS_CONV]], %[[B_BOX_CONV]], %[[LOC_STR2:.*]], %[[LOC_N2:.*]])
 ! CHECK-LOWERING-OPT:   %[[MUL_RES_LD:.*]] = fir.load %[[MUL_RES_BOX:.*]]
 ! CHECK-LOWERING-OPT:   %[[MUL_RES_ADDR:.*]] = fir.box_addr %[[MUL_RES_LD]]
 ! CHECK-LOWERING-OPT:   %[[MUL_RES_VAR:.*]]:2 = hlfir.declare %[[MUL_RES_ADDR]]({{.*}}) {uniq_name = ".tmp.intrinsic_result"}
@@ -81,7 +81,7 @@ subroutine mul_transpose(a, b, res)
 ! CHECK-BUFFERING:      %[[TRANSPOSE_RES_BOX:.*]] = fir.embox %[[TRANSPOSE_RES_REF]]({{.*}})
 ! CHECK-BUFFERING:      %[[LHS_CONV:.*]] = fir.convert %[[TRANSPOSE_RES_BOX]] : (!fir.box<!fir.array<1x2xf32>>) -> !fir.box<none>
 ! [argument handling unchanged]
-! CHECK-BUFFERING:      fir.call @_FortranAMatmul(
+! CHECK-BUFFERING:      fir.call @_FortranAMatmulReal4Real4(
 ! CHECK-BUFFERING:      %[[MUL_RES_LD:.*]] = fir.load %[[MUL_RES_BOX:.*]]
 ! CHECK-BUFFERING:      %[[MUL_RES_ADDR:.*]] = fir.box_addr %[[MUL_RES_LD]]
 ! CHECK-BUFFERING:      %[[MUL_RES_VAR:.*]]:2 = hlfir.declare %[[MUL_RES_ADDR]]({{.*}}) {uniq_name = ".tmp.intrinsic_result"}
diff --git a/flang/test/Lower/Intrinsics/matmul.f90 b/flang/test/Lower/Intrinsics/matmul.f90
index e9a8220dc6ab7..db60963320144 100644
--- a/flang/test/Lower/Intrinsics/matmul.f90
+++ b/flang/test/Lower/Intrinsics/matmul.f90
@@ -23,7 +23,7 @@
 ! CHECK:  %[[RESULT_BOX_ADDR_RUNTIME:.*]] = fir.convert %[[RESULT_BOX_ADDR]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>
 ! CHECK:  %[[X_BOX_RUNTIME:.*]] = fir.convert %[[X_BOX]] : (!fir.box<!fir.array<3x1xf32>>) -> !fir.box<none>
 ! CHECK:  %[[Y_BOX_RUNTIME:.*]] = fir.convert %[[Y_BOX]] : (!fir.box<!fir.array<1x3xf32>>) -> !fir.box<none>
-! CHECK:  {{.*}}fir.call @_FortranAMatmul(%[[RESULT_BOX_ADDR_RUNTIME]], %[[X_BOX_RUNTIME]], %[[Y_BOX_RUNTIME]], {{.*}}, {{.*}} {{.*}}: (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> none
+! CHECK:  {{.*}}fir.call @_FortranAMatmulReal4Real4(%[[RESULT_BOX_ADDR_RUNTIME]], %[[X_BOX_RUNTIME]], %[[Y_BOX_RUNTIME]], {{.*}}, {{.*}} {{.*}}: (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> none
 ! CHECK:  %[[RESULT_BOX:.*]] = fir.load %[[RESULT_BOX_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
 ! CHECK:  %[[RESULT_TMP:.*]] = fir.box_addr %[[RESULT_BOX]] : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>) -> !fir.heap<!fir.array<?x?xf32>>
 ! CHECK:  %[[Z_COPY_FROM_RESULT:.*]] = fir.do_loop
@@ -50,7 +50,7 @@ subroutine matmul_test(x,y,z)
 !CHECK:  %[[RESULT_BOX_RUNTIME:.*]] = fir.convert %[[RESULT_BOX_ADDR]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>>) -> !fir.ref<!fir.box<none>>
 !CHECK:  %[[X_BOX_RUNTIME:.*]] = fir.convert %[[X_BOX]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>) -> !fir.box<none>
 !CHECK:  %[[Y_BOX_RUNTIME:.*]] = fir.convert %[[Y_BOX]] : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> !fir.box<none>
-!CHECK:  {{.*}}fir.call @_FortranAMatmul(%[[RESULT_BOX_RUNTIME]], %[[X_BOX_RUNTIME]], %[[Y_BOX_RUNTIME]], {{.*}}, {{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> none
+!CHECK:  {{.*}}fir.call @_FortranAMatmulLogical4Logical4(%[[RESULT_BOX_RUNTIME]], %[[X_BOX_RUNTIME]], %[[Y_BOX_RUNTIME]], {{.*}}, {{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> none
 !CHECK:  %[[RESULT_BOX:.*]] = fir.load %[[RESULT_BOX_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>>
 !CHECK:  %[[RESULT_TMP:.*]] = fir.box_addr %[[RESULT_BOX]] : (!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>) -> !fir.heap<!fir.array<?x!fir.logical<4>>>
 !CHECK:  %[[Z_COPY_FROM_RESULT:.*]] = fir.do_loop
diff --git a/flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h b/flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h
index 00c27f4bb2142..4ace359f055b0 100644
--- a/flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h
+++ b/flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h
@@ -58,6 +58,11 @@ struct RuntimeCallTest : public testing::Test {
     char1Ty = fir::CharacterType::getSingleton(builder.getContext(), 1);
     char2Ty = fir::CharacterType::getSingleton(builder.getContext(), 2);
     char4Ty = fir::CharacterType::getSingleton(builder.getContext(), 4);
+
+    logical1Ty = fir::LogicalType::get(builder.getContext(), 1);
+    logical2Ty = fir::LogicalType::get(builder.getContext(), 2);
+    logical4Ty = fir::LogicalType::get(builder.getContext(), 4);
+    logical8Ty = fir::LogicalType::get(builder.getContext(), 8);
   }
 
   mlir::MLIRContext context;
@@ -84,6 +89,10 @@ struct RuntimeCallTest : public testing::Test {
   mlir::Type char1Ty;
   mlir::Type char2Ty;
   mlir::Type char4Ty;
+  mlir::Type logical1Ty;
+  mlir::Type logical2Ty;
+  mlir::Type logical4Ty;
+  mlir::Type logical8Ty;
 };
 
 /// Check that the \p op is a `fir::CallOp` operation and its name matches
diff --git a/flang/unittests/Optimizer/Builder/Runtime/TransformationalTest.cpp b/flang/unittests/Optimizer/Builder/Runtime/TransformationalTest.cpp
index d5884ae3febbb..28266bb90400b 100644
--- a/flang/unittests/Optimizer/Builder/Runtime/TransformationalTest.cpp
+++ b/flang/unittests/Optimizer/Builder/Runtime/TransformationalTest.cpp
@@ -144,15 +144,41 @@ TEST_F(RuntimeCallTest, genEoshiftVectorTest) {
   checkCallOpFromResultBox(result, "_FortranAEoshiftVector", 4);
 }
 
+void testGenMatmul(fir::FirOpBuilder &builder, mlir::Type eleTy1,
+    mlir::Type eleTy2, llvm::StringRef funcName) {
+  auto loc = builder.getUnknownLoc();
+  mlir::Type resultTy =
+      fir::ReferenceType::get(fir::BoxType::get(builder.getNoneType()));
+  mlir::Type seqTy1 =
+      fir::SequenceType::get(fir::SequenceType::Shape(2, 10), eleTy1);
+  mlir::Type seqTy2 =
+      fir::SequenceType::get(fir::SequenceType::Shape(2, 10), eleTy2);
+  mlir::Type boxTy1 = fir::BoxType::get(seqTy1);
+  mlir::Type boxTy2 = fir::BoxType::get(seqTy2);
+  mlir::Value result = builder.create<fir::UndefOp>(loc, resultTy);
+  mlir::Value matrixA = builder.create<fir::UndefOp>(loc, boxTy1);
+  mlir::Value matrixB = builder.create<fir::UndefOp>(loc, boxTy2);
+  fir::runtime::genMatmul(builder, loc, result, matrixA, matrixB);
+  checkCallOpFromResultBox(result, funcName, 3);
+}
+
 TEST_F(RuntimeCallTest, genMatmulTest) {
-  auto loc = firBuilder->getUnknownLoc();
-  mlir::Type seqTy =
-      fir::SequenceType::get(fir::SequenceType::Shape(1, 10), i32Ty);
-  mlir::Value result = firBuilder->create<fir::UndefOp>(loc, seqTy);
-  mlir::Value matrixA = firBuilder->create<fir::UndefOp>(loc, seqTy);
-  mlir::Value matrixB = firBuilder->create<fir::UndefOp>(loc, seqTy);
-  fir::runtime::genMatmul(*firBuilder, loc, matrixA, matrixB, result);
-  checkCallOpFromResultBox(result, "_FortranAMatmul", 3);
+  testGenMatmul(*firBuilder, i32Ty, i16Ty, "_FortranAMatmulInteger4Integer2");
+  testGenMatmul(*firBuilder, i32Ty, f64Ty, "_FortranAMatmulInteger4Real8");
+  testGenMatmul(*firBuilder, i32Ty, c8Ty, "_FortranAMatmulInteger4Complex8");
+  testGenMatmul(*firBuilder, f32Ty, i16Ty, "_FortranAMatmulReal4Integer2");
+  testGenMatmul(*firBuilder, f32Ty, f64Ty, "_FortranAMatmulReal4Real8");
+  testGenMatmul(*firBuilder, f32Ty, c8Ty, "_FortranAMatmulReal4Complex8");
+  testGenMatmul(*firBuilder, c4Ty, i16Ty, "_FortranAMatmulComplex4Integer2");
+  testGenMatmul(*firBuilder, c4Ty, f64Ty, "_FortranAMatmulComplex4Real8");
+  testGenMatmul(*firBuilder, c4Ty, c8Ty, "_FortranAMatmulComplex4Complex8");
+  testGenMatmul(*firBuilder, f80Ty, f128Ty, "_FortranAMatmulReal10Real16");
+  testGenMatmul(*firBuilder, f80Ty, i128Ty, "_FortranAMatmulReal10Integer16");
+  testGenMatmul(*firBuilder, f128Ty, i128Ty, "_FortranAMatmulReal16Integer16");
+  testGenMatmul(
+      *firBuilder, logical1Ty, logical2Ty, "_FortranAMatmulLogical1Logical2");
+  testGenMatmul(
+      *firBuilder, logical4Ty, logical8Ty, "_FortranAMatmulLogical4Logical8");
 }
 
 TEST_F(RuntimeCallTest, genPackTest) {
diff --git a/flang/unittests/Runtime/Matmul.cpp b/flang/unittests/Runtime/Matmul.cpp
index 226dbc5ae9eeb..c3fed9b972df2 100644
--- a/flang/unittests/Runtime/Matmul.cpp
+++ b/flang/unittests/Runtime/Matmul.cpp
@@ -40,29 +40,6 @@ TEST(Matmul, Basic) {
   StaticDescriptor<2, true> statDesc;
   Descriptor &result{statDesc.descriptor()};
 
-  RTNAME(Matmul)(result, *x, *y, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 2);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 2);
-  EXPECT_EQ(result.GetDimension(1).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(1).Extent(), 2);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Integer, 4}));
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(0), 46);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(1), 67);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(2), 64);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
-
-  std::memset(
-      result.raw().base_addr, 0, result.Elements() * result.ElementBytes());
-  result.GetDimension(0).SetLowerBound(0);
-  result.GetDimension(1).SetLowerBound(2);
-  RTNAME(MatmulDirect)(result, *x, *y, __FILE__, __LINE__);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(0), 46);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(1), 67);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(2), 64);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
-  result.Destroy();
-
   RTNAME(MatmulInteger4Integer2)(result, *x, *y, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 2);
   EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
@@ -86,16 +63,6 @@ TEST(Matmul, Basic) {
   EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
   result.Destroy();
 
-  RTNAME(Matmul)(result, *v, *x, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 1);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 3);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Integer, 8}));
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(0), -2);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(1), -8);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(2), -14);
-  result.Destroy();
-
   RTNAME(MatmulInteger8Integer4)(result, *v, *x, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 1);
   EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
@@ -106,16 +73,6 @@ TEST(Matmul, Basic) {
   EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(2), -14);
   result.Destroy();
 
-  RTNAME(Matmul)(result, *y, *v, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 1);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 3);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Integer, 8}));
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(0), -24);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(1), -27);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(2), -30);
-  result.Destroy();
-
   RTNAME(MatmulInteger2Integer8)(result, *y, *v, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 1);
   EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
@@ -159,19 +116,6 @@ TEST(Matmul, Basic) {
       /*uppers=*/nullptr, /*strides=*/nullptr)};
   ASSERT_EQ(errorY2, 0) << "CFI_section failed for Y2: " << errorY2;
 
-  RTNAME(Matmul)(result, sectionX2, *y, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 2);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 2);
-  EXPECT_EQ(result.GetDimension(1).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(1).Extent(), 2);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Integer, 4}));
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(0), 46);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(1), 67);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(2), 64);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
-  result.Destroy();
-
   RTNAME(MatmulInteger4Integer2)(result, sectionX2, *y, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 2);
   EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
@@ -185,19 +129,6 @@ TEST(Matmul, Basic) {
   EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
   result.Destroy();
 
-  RTNAME(Matmul)(result, *x, sectionY2, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 2);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 2);
-  EXPECT_EQ(result.GetDimension(1).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(1).Extent(), 2);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Integer, 4}));
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(0), 46);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(1), 67);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(2), 64);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
-  result.Destroy();
-
   RTNAME(MatmulInteger4Integer2)(result, *x, sectionY2, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 2);
   EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
@@ -211,19 +142,6 @@ TEST(Matmul, Basic) {
   EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
   result.Destroy();
 
-  RTNAME(Matmul)(result, sectionX2, sectionY2, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 2);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 2);
-  EXPECT_EQ(result.GetDimension(1).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(1).Extent(), 2);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Integer, 4}));
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(0), 46);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(1), 67);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(2), 64);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
-  result.Destroy();
-
   RTNAME(MatmulInteger4Integer2)
   (result, sectionX2, sectionY2, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 2);
@@ -238,16 +156,6 @@ TEST(Matmul, Basic) {
   EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
   result.Destroy();
 
-  RTNAME(Matmul)(result, *v, sectionX2, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 1);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 3);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Integer, 8}));
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(0), -2);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(1), -8);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(2), -14);
-  result.Destroy();
-
   RTNAME(MatmulInteger8Integer4)(result, *v, sectionX2, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 1);
   EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
@@ -258,16 +166,6 @@ TEST(Matmul, Basic) {
   EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(2), -14);
   result.Destroy();
 
-  RTNAME(Matmul)(result, sectionY2, *v, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 1);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 3);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Integer, 8}));
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(0), -24);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(1), -27);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(2), -30);
-  result.Destroy();
-
   RTNAME(MatmulInteger2Integer8)(result, sectionY2, *v, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 1);
   EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
@@ -285,23 +183,6 @@ TEST(Matmul, Basic) {
       std::vector<std::uint8_t>{false, false, false, true, true, false})};
   auto yLog{MakeArray<TypeCategory::Logical, 2>(std::vector<int>{3, 2},
       std::vector<std::uint16_t>{false, false, false, true, true, false})};
-  RTNAME(Matmul)(result, *xLog, *yLog, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 2);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 2);
-  EXPECT_EQ(result.GetDimension(1).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(1).Extent(), 2);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Logical, 2}));
-  EXPECT_FALSE(
-      static_cast<bool>(*result.ZeroBasedIndexedElement<std::uint16_t>(0)));
-  EXPECT_FALSE(
-      static_cast<bool>(*result.ZeroBasedIndexedElement<std::uint16_t>(1)));
-  EXPECT_FALSE(
-      static_cast<bool>(*result.ZeroBasedIndexedElement<std::uint16_t>(2)));
-  EXPECT_TRUE(
-      static_cast<bool>(*result.ZeroBasedIndexedElement<std::uint16_t>(3)));
-  result.Destroy();
-
   RTNAME(MatmulLogical1Logical2)(result, *xLog, *yLog, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 2);
   EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
diff --git a/flang/unittests/Runtime/MatmulTranspose.cpp b/flang/unittests/Runtime/MatmulTranspose.cpp
index 391c2e1b144ea..c582e945dc7c9 100644
--- a/flang/unittests/Runtime/MatmulTranspose.cpp
+++ b/flang/unittests/Runtime/MatmulTranspose.cpp
@@ -46,29 +46,6 @@ TEST(MatmulTranspose, Basic) {
   StaticDescriptor<2, true> statDesc;
   Descriptor &result{statDesc.descriptor()};
 
-  RTNAME(MatmulTranspose)(result, *x, *y, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 2);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 2);
-  EXPECT_EQ(result.GetDimension(1).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(1).Extent(), 2);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Integer, 4}));
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(0), 46);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(1), 67);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(2), 64);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
-
-  std::memset(
-      result.raw().base_addr, 0, result.Elements() * result.ElementBytes());
-  result.GetDimension(0).SetLowerBound(0);
-  result.GetDimension(1).SetLowerBound(2);
-  RTNAME(MatmulTransposeDirect)(result, *x, *y, __FILE__, __LINE__);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(0), 46);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(1), 67);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(2), 64);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
-  result.Destroy();
-
   RTNAME(MatmulTransposeInteger4Integer2)(result, *x, *y, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 2);
   EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
@@ -93,16 +70,6 @@ TEST(MatmulTranspose, Basic) {
   EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
   result.Destroy();
 
-  RTNAME(MatmulTranspose)(result, *z, *v, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 1);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 3);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Integer, 8}));
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(0), -24);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(1), -27);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(2), -30);
-  result.Destroy();
-
   RTNAME(MatmulTransposeInteger2Integer8)(result, *z, *v, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 1);
   EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
@@ -113,27 +80,6 @@ TEST(MatmulTranspose, Basic) {
   EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(2), -30);
   result.Destroy();
 
-  RTNAME(MatmulTranspose)(result, *m, *z, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 2);
-  ASSERT_EQ(result.GetDimension(0).LowerBound(), 1);
-  ASSERT_EQ(result.GetDimension(0).UpperBound(), 4);
-  ASSERT_EQ(result.GetDimension(1).LowerBound(), 1);
-  ASSERT_EQ(result.GetDimension(1).UpperBound(), 3);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Integer, 2}));
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int16_t>(0), 0);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int16_t>(1), 9);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int16_t>(2), 6);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int16_t>(3), 15);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int16_t>(4), 0);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int16_t>(5), 10);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int16_t>(6), 7);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int16_t>(7), 17);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int16_t>(8), 0);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int16_t>(9), 11);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int16_t>(10), 8);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int16_t>(11), 19);
-  result.Destroy();
-
   RTNAME(MatmulTransposeInteger2Integer2)(result, *m, *z, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 2);
   ASSERT_EQ(result.GetDimension(0).LowerBound(), 1);
@@ -204,19 +150,6 @@ TEST(MatmulTranspose, Basic) {
       &sectionZ2.raw(), &z2->raw(), lowersZ2, uppersZ2, /*strides=*/nullptr)};
   ASSERT_EQ(errorZ2, 0) << "CFI_section failed for Z2: " << errorZ2;
 
-  RTNAME(MatmulTranspose)(result, sectionX2, *y, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 2);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 2);
-  EXPECT_EQ(result.GetDimension(1).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(1).Extent(), 2);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Integer, 4}));
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(0), 46);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(1), 67);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(2), 64);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
-  result.Destroy();
-
   RTNAME(MatmulTransposeInteger4Integer2)
   (result, sectionX2, *y, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 2);
@@ -231,19 +164,6 @@ TEST(MatmulTranspose, Basic) {
   EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
   result.Destroy();
 
-  RTNAME(MatmulTranspose)(result, *x, sectionY2, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 2);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 2);
-  EXPECT_EQ(result.GetDimension(1).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(1).Extent(), 2);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Integer, 4}));
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(0), 46);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(1), 67);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(2), 64);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
-  result.Destroy();
-
   RTNAME(MatmulTransposeInteger4Integer2)
   (result, *x, sectionY2, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 2);
@@ -258,19 +178,6 @@ TEST(MatmulTranspose, Basic) {
   EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
   result.Destroy();
 
-  RTNAME(MatmulTranspose)(result, sectionX2, sectionY2, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 2);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 2);
-  EXPECT_EQ(result.GetDimension(1).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(1).Extent(), 2);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Integer, 4}));
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(0), 46);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(1), 67);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(2), 64);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
-  result.Destroy();
-
   RTNAME(MatmulTransposeInteger4Integer2)
   (result, sectionX2, sectionY2, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 2);
@@ -285,16 +192,6 @@ TEST(MatmulTranspose, Basic) {
   EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int32_t>(3), 94);
   result.Destroy();
 
-  RTNAME(MatmulTranspose)(result, sectionZ2, *v, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 1);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 3);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Integer, 8}));
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(0), -24);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(1), -27);
-  EXPECT_EQ(*result.ZeroBasedIndexedElement<std::int64_t>(2), -30);
-  result.Destroy();
-
   RTNAME(MatmulTransposeInteger2Integer8)
   (result, sectionZ2, *v, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 1);
@@ -315,23 +212,6 @@ TEST(MatmulTranspose, Basic) {
       std::vector<std::uint16_t>{false, false, false, true, true, false})};
   auto vLog{MakeArray<TypeCategory::Logical, 1>(
       std::vector<int>{3}, std::vector<std::uint8_t>{true, false, true})};
-  RTNAME(MatmulTranspose)(result, *xLog, *yLog, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 2);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 2);
-  EXPECT_EQ(result.GetDimension(1).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(1).Extent(), 2);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Logical, 2}));
-  EXPECT_FALSE(
-      static_cast<bool>(*result.ZeroBasedIndexedElement<std::uint16_t>(0)));
-  EXPECT_FALSE(
-      static_cast<bool>(*result.ZeroBasedIndexedElement<std::uint16_t>(1)));
-  EXPECT_TRUE(
-      static_cast<bool>(*result.ZeroBasedIndexedElement<std::uint16_t>(2)));
-  EXPECT_FALSE(
-      static_cast<bool>(*result.ZeroBasedIndexedElement<std::uint16_t>(3)));
-  result.Destroy();
-
   RTNAME(MatmulTransposeLogical1Logical2)
   (result, *xLog, *yLog, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 2);
@@ -350,17 +230,6 @@ TEST(MatmulTranspose, Basic) {
       static_cast<bool>(*result.ZeroBasedIndexedElement<std::uint16_t>(3)));
   result.Destroy();
 
-  RTNAME(MatmulTranspose)(result, *yLog, *vLog, __FILE__, __LINE__);
-  ASSERT_EQ(result.rank(), 1);
-  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
-  EXPECT_EQ(result.GetDimension(0).Extent(), 2);
-  ASSERT_EQ(result.type(), (TypeCode{TypeCategory::Logical, 2}));
-  EXPECT_FALSE(
-      static_cast<bool>(*result.ZeroBasedIndexedElement<std::uint16_t>(0)));
-  EXPECT_TRUE(
-      static_cast<bool>(*result.ZeroBasedIndexedElement<std::uint16_t>(1)));
-  result.Destroy();
-
   RTNAME(MatmulTransposeLogical2Logical1)
   (result, *yLog, *vLog, __FILE__, __LINE__);
   ASSERT_EQ(result.rank(), 1);



More information about the flang-commits mailing list