[flang-commits] [flang] e525245 - [Flang] Lower the transpose intrinsic

Thu Mar 17 06:16:21 PDT 2022

Author: Kiran Chandramohan
Date: 2022-03-17T13:15:11Z
New Revision: e525245fba629632db4d026f2cac29f032f02320

URL: https://github.com/llvm/llvm-project/commit/e525245fba629632db4d026f2cac29f032f02320
DIFF: https://github.com/llvm/llvm-project/commit/e525245fba629632db4d026f2cac29f032f02320.diff

LOG: [Flang] Lower the transpose intrinsic

Tranpose intrinsic performs the transpose matrix operation for arrays
of rank 2. The intrinsic is lowered to a runtime call.

This is part of the upstreaming effort from the fir-dev branch in [1].
[1] https://github.com/flang-compiler/f18-llvm-project

Reviewed By: clementval

Differential Revision: https://reviews.llvm.org/D121895

Co-authored-by: Valentin Clement <clementval at gmail.com>
Co-authored-by: Jean Perier <jperier at nvidia.com>

Added: 
    flang/test/Lower/Intrinsics/transpose.f90

Modified: 
    flang/lib/Lower/IntrinsicCall.cpp

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/IntrinsicCall.cpp b/flang/lib/Lower/IntrinsicCall.cpp
index d4fbf554c2de6..669878cfab1d0 100644

--- a/flang/lib/Lower/IntrinsicCall.cpp
+++ b/flang/lib/Lower/IntrinsicCall.cpp
@@ -505,6 +505,8 @@ struct IntrinsicLibrary {
   void genSystemClock(llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genTransfer(mlir::Type,
                                  llvm::ArrayRef<fir::ExtendedValue>);
+  fir::ExtendedValue genTranspose(mlir::Type,
+                                  llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genTrim(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genUbound(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genUnpack(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
@@ -805,6 +807,10 @@ static constexpr IntrinsicHandler handlers[]{
      &I::genTransfer,
      {{{"source", asAddr}, {"mold", asAddr}, {"size", asValue}}},
      /*isElemental=*/false},
+    {"transpose",
+     &I::genTranspose,
+     {{{"matrix", asAddr}}},
+     /*isElemental=*/false},
     {"trim", &I::genTrim, {{{"string", asAddr}}}, /*isElemental=*/false},
     {"ubound",
      &I::genUbound,
@@ -3063,6 +3069,30 @@ IntrinsicLibrary::genUbound(mlir::Type resultType,
   return mlir::Value();
 }
 
+// TRANSPOSE
+fir::ExtendedValue
+IntrinsicLibrary::genTranspose(mlir::Type resultType,
+                               llvm::ArrayRef<fir::ExtendedValue> args) {
+
+  assert(args.size() == 1);
+
+  // Handle source argument
+  mlir::Value source = builder.createBox(loc, args[0]);
+
+  // Create mutable fir.box to be passed to the runtime for the result.
+  mlir::Type resultArrayType = builder.getVarLenSeqTy(resultType, 2);
+  fir::MutableBoxValue resultMutableBox =
+      fir::factory::createTempMutableBox(builder, loc, resultArrayType);
+  mlir::Value resultIrBox =
+      fir::factory::getMutableIRBox(builder, loc, resultMutableBox);
+  // Call runtime. The runtime is allocating the result.
+  fir::runtime::genTranspose(builder, loc, resultIrBox, source);
+  // Read result from mutable fir.box and add it to the list of temps to be
+  // finalized by the StatementContext.
+  return readAndAddCleanUp(resultMutableBox, resultType,
+                           "unexpected result for TRANSPOSE");
+}
+
 // TRIM
 fir::ExtendedValue
 IntrinsicLibrary::genTrim(mlir::Type resultType,
@@ -3080,6 +3110,7 @@ IntrinsicLibrary::genTrim(mlir::Type resultType,
   // finalized by the StatementContext.
   return readAndAddCleanUp(resultMutableBox, resultType, "TRIM");
 }
+
 // UNPACK
 fir::ExtendedValue
 IntrinsicLibrary::genUnpack(mlir::Type resultType,

diff  --git a/flang/test/Lower/Intrinsics/transpose.f90 b/flang/test/Lower/Intrinsics/transpose.f90
new file mode 100644
index 0000000000000..62bea39648988
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/transpose.f90
@@ -0,0 +1,24 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPtranspose_test(
+! CHECK-SAME: %[[source:.*]]: !fir.ref<!fir.array<2x3xf32>>{{.*}}) {
+subroutine transpose_test(mat)
+! CHECK:  %[[resultDescr:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xf32>>>
+   real :: mat(2,3)
+   call bar_transpose_test(transpose(mat))
+! CHECK:  %[[sourceBox:.*]] = fir.embox %[[source]]({{.*}}) : (!fir.ref<!fir.array<2x3xf32>>, !fir.shape<2>) -> !fir.box<!fir.array<2x3xf32>>
+! CHECK:  %[[zeroArray:.*]] = fir.zero_bits !fir.heap<!fir.array<?x?xf32>
+! CHECK:  %[[c0:.*]] = arith.constant 0 : index
+! CHECK:  %[[shapeResult:.*]] = fir.shape %[[c0]], %[[c0]] : (index, index) -> !fir.shape<2>
+! CHECK:  %[[resultBox:.*]] = fir.embox %[[zeroArray]](%[[shapeResult]]) : (!fir.heap<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.box<!fir.heap<!fir.array<?x?xf32>>>
+! CHECK:  fir.store %[[resultBox]] to %[[resultDescr]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+! CHECK:  %[[resultOpaque:.*]] = fir.convert %[[resultDescr]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>
+! CHECK:  %[[sourceOpaque:.*]] = fir.convert %[[sourceBox]] : (!fir.box<!fir.array<2x3xf32>>) -> !fir.box<none>
+! CHECK:  %{{.*}} = fir.call @_FortranATranspose(%[[resultOpaque]], %[[sourceOpaque]], %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.ref<i8>, i32) -> none
+! CHECK:  %[[tmp1:.*]] = fir.load %[[resultDescr]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+! CHECK:  %[[tmp2:.*]] = fir.box_addr %[[tmp1]] : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>) -> !fir.heap<!fir.array<?x?xf32>>
+! CHECK:  %[[tmp3:.*]] = fir.convert %[[tmp2]] : (!fir.heap<!fir.array<?x?xf32>>) -> !fir.ref<!fir.array<3x2xf32>>
+! CHECK:  fir.call @_QPbar_transpose_test(%[[tmp3]]) : (!fir.ref<!fir.array<3x2xf32>>) -> ()
+! CHECK:  fir.freemem %[[tmp2]] : <!fir.array<?x?xf32>
+end subroutine
+