[flang-commits] [flang] c983aed - [fir] Add character utility functions in FIRBuilder

Valentin Clement via flang-commits flang-commits at lists.llvm.org
Tue Oct 19 13:35:31 PDT 2021


Author: Valentin Clement
Date: 2021-10-19T22:34:21+02:00
New Revision: c983aeddcf5af992d2a807d3f4f8cdc27cbf63b1

URL: https://github.com/llvm/llvm-project/commit/c983aeddcf5af992d2a807d3f4f8cdc27cbf63b1
DIFF: https://github.com/llvm/llvm-project/commit/c983aeddcf5af992d2a807d3f4f8cdc27cbf63b1.diff

LOG: [fir] Add character utility functions in FIRBuilder

Extract part of D111337 in order to mke it smaller
and easier to review. This patch add some utility
functions to the FIRBuilder.

Add the following utility functions:
- getCharacterLengthType
- createStringLiteral
- locationToFilename
- characterWithDynamicLen
- sequenceWithNonConstantShape
- hasDynamicSize

These bring up the BoxValue implementation together with it.

This patch is part of the upstreaming effort from fir-dev branch.

Reviewed By: AlexisPerry

Differential Revision: https://reviews.llvm.org/D112074

Co-authored-by: Jean Perier <jperier at nvidia.com>
Co-authored-by: Eric Schweitz <eschweitz at nvidia.com>

Added: 
    flang/include/flang/Optimizer/Builder/BoxValue.h
    flang/include/flang/Optimizer/Support/Matcher.h
    flang/lib/Optimizer/Builder/BoxValue.cpp

Modified: 
    flang/include/flang/Optimizer/Builder/FIRBuilder.h
    flang/include/flang/Optimizer/Dialect/FIRType.h
    flang/lib/Optimizer/Builder/CMakeLists.txt
    flang/lib/Optimizer/Builder/FIRBuilder.cpp
    flang/lib/Optimizer/Dialect/FIROps.cpp
    flang/lib/Optimizer/Dialect/FIRType.cpp
    flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/Builder/BoxValue.h b/flang/include/flang/Optimizer/Builder/BoxValue.h
new file mode 100644
index 0000000000000..ef84dfd30f1a6
--- /dev/null
+++ b/flang/include/flang/Optimizer/Builder/BoxValue.h
@@ -0,0 +1,472 @@
+//===-- BoxValue.h -- internal box values -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_BUILDER_BOXVALUE_H
+#define FORTRAN_OPTIMIZER_BUILDER_BOXVALUE_H
+
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Support/FatalError.h"
+#include "flang/Optimizer/Support/Matcher.h"
+#include "mlir/IR/OperationSupport.h"
+#include "mlir/IR/Value.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include <utility>
+
+namespace fir {
+class CharBoxValue;
+class ArrayBoxValue;
+class CharArrayBoxValue;
+class ProcBoxValue;
+class MutableBoxValue;
+class BoxValue;
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &, const CharBoxValue &);
+llvm::raw_ostream &operator<<(llvm::raw_ostream &, const ArrayBoxValue &);
+llvm::raw_ostream &operator<<(llvm::raw_ostream &, const CharArrayBoxValue &);
+llvm::raw_ostream &operator<<(llvm::raw_ostream &, const ProcBoxValue &);
+llvm::raw_ostream &operator<<(llvm::raw_ostream &, const MutableBoxValue &);
+llvm::raw_ostream &operator<<(llvm::raw_ostream &, const BoxValue &);
+
+//===----------------------------------------------------------------------===//
+//
+// Boxed values
+//
+// Define a set of containers used internally by the lowering bridge to keep
+// track of extended values associated with a Fortran subexpression. These
+// associations are maintained during the construction of FIR.
+//
+//===----------------------------------------------------------------------===//
+
+/// Most expressions of intrinsic type can be passed unboxed. Their properties
+/// are known statically.
+using UnboxedValue = mlir::Value;
+
+/// Abstract base class.
+class AbstractBox {
+public:
+  AbstractBox() = delete;
+  AbstractBox(mlir::Value addr) : addr{addr} {}
+
+  /// FIXME: this comment is not true anymore since genLoad
+  /// is loading constant length characters. What is the impact  /// ?
+  /// An abstract box always contains a memory reference to a value.
+  mlir::Value getAddr() const { return addr; }
+
+protected:
+  mlir::Value addr;
+};
+
+/// Expressions of CHARACTER type have an associated, possibly dynamic LEN
+/// value.
+class CharBoxValue : public AbstractBox {
+public:
+  CharBoxValue(mlir::Value addr, mlir::Value len)
+      : AbstractBox{addr}, len{len} {
+    if (addr && addr.getType().template isa<fir::BoxCharType>())
+      fir::emitFatalError(addr.getLoc(),
+                          "BoxChar should not be in CharBoxValue");
+  }
+
+  CharBoxValue clone(mlir::Value newBase) const { return {newBase, len}; }
+
+  /// Convenience alias to get the memory reference to the buffer.
+  mlir::Value getBuffer() const { return getAddr(); }
+
+  mlir::Value getLen() const { return len; }
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &,
+                                       const CharBoxValue &);
+  LLVM_DUMP_METHOD void dump() const { llvm::errs() << *this; }
+
+protected:
+  mlir::Value len;
+};
+
+/// Abstract base class.
+/// Expressions of type array have at minimum a shape. These expressions may
+/// have lbound attributes (dynamic values) that affect the interpretation of
+/// indexing expressions.
+class AbstractArrayBox {
+public:
+  AbstractArrayBox() = default;
+  AbstractArrayBox(llvm::ArrayRef<mlir::Value> extents,
+                   llvm::ArrayRef<mlir::Value> lbounds)
+      : extents{extents.begin(), extents.end()}, lbounds{lbounds.begin(),
+                                                         lbounds.end()} {}
+
+  // Every array has extents that describe its shape.
+  const llvm::SmallVectorImpl<mlir::Value> &getExtents() const {
+    return extents;
+  }
+
+  // An array expression may have user-defined lower bound values.
+  // If this vector is empty, the default in all dimensions is `1`.
+  const llvm::SmallVectorImpl<mlir::Value> &getLBounds() const {
+    return lbounds;
+  }
+
+  bool lboundsAllOne() const { return lbounds.empty(); }
+  std::size_t rank() const { return extents.size(); }
+
+protected:
+  llvm::SmallVector<mlir::Value, 4> extents;
+  llvm::SmallVector<mlir::Value, 4> lbounds;
+};
+
+/// Expressions with rank > 0 have extents. They may also have lbounds that are
+/// not 1.
+class ArrayBoxValue : public AbstractBox, public AbstractArrayBox {
+public:
+  ArrayBoxValue(mlir::Value addr, llvm::ArrayRef<mlir::Value> extents,
+                llvm::ArrayRef<mlir::Value> lbounds = {})
+      : AbstractBox{addr}, AbstractArrayBox{extents, lbounds} {}
+
+  ArrayBoxValue clone(mlir::Value newBase) const {
+    return {newBase, extents, lbounds};
+  }
+
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &,
+                                       const ArrayBoxValue &);
+  LLVM_DUMP_METHOD void dump() const { llvm::errs() << *this; }
+};
+
+/// Expressions of type CHARACTER and with rank > 0.
+class CharArrayBoxValue : public CharBoxValue, public AbstractArrayBox {
+public:
+  CharArrayBoxValue(mlir::Value addr, mlir::Value len,
+                    llvm::ArrayRef<mlir::Value> extents,
+                    llvm::ArrayRef<mlir::Value> lbounds = {})
+      : CharBoxValue{addr, len}, AbstractArrayBox{extents, lbounds} {}
+
+  CharArrayBoxValue clone(mlir::Value newBase) const {
+    return {newBase, len, extents, lbounds};
+  }
+
+  CharBoxValue cloneElement(mlir::Value newBase) const {
+    return {newBase, len};
+  }
+
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &,
+                                       const CharArrayBoxValue &);
+  LLVM_DUMP_METHOD void dump() const { llvm::errs() << *this; }
+};
+
+/// Expressions that are procedure POINTERs may need a set of references to
+/// variables in the host scope.
+class ProcBoxValue : public AbstractBox {
+public:
+  ProcBoxValue(mlir::Value addr, mlir::Value context)
+      : AbstractBox{addr}, hostContext{context} {}
+
+  ProcBoxValue clone(mlir::Value newBase) const {
+    return {newBase, hostContext};
+  }
+
+  mlir::Value getHostContext() const { return hostContext; }
+
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &,
+                                       const ProcBoxValue &);
+  LLVM_DUMP_METHOD void dump() const { llvm::errs() << *this; }
+
+protected:
+  mlir::Value hostContext;
+};
+
+/// Base class for values associated to a fir.box or fir.ref<fir.box>.
+class AbstractIrBox : public AbstractBox, public AbstractArrayBox {
+public:
+  AbstractIrBox(mlir::Value addr) : AbstractBox{addr} {}
+  AbstractIrBox(mlir::Value addr, llvm::ArrayRef<mlir::Value> lbounds,
+                llvm::ArrayRef<mlir::Value> extents)
+      : AbstractBox{addr}, AbstractArrayBox(extents, lbounds) {}
+  /// Get the fir.box<type> part of the address type.
+  fir::BoxType getBoxTy() const {
+    auto type = getAddr().getType();
+    if (auto pointedTy = fir::dyn_cast_ptrEleTy(type))
+      type = pointedTy;
+    return type.cast<fir::BoxType>();
+  }
+  /// Return the part of the address type after memory and box types. That is
+  /// the element type, maybe wrapped in a fir.array type.
+  mlir::Type getBaseTy() const {
+    return fir::dyn_cast_ptrOrBoxEleTy(getBoxTy());
+  }
+
+  /// Return the memory type of the data address inside the box:
+  /// - for fir.box<fir.ptr<T>>, return fir.ptr<T>
+  /// - for fir.box<fir.heap<T>>, return fir.heap<T>
+  /// - for fir.box<T>, return fir.ref<T>
+  mlir::Type getMemTy() const {
+    auto ty = getBoxTy().getEleTy();
+    if (fir::isa_ref_type(ty))
+      return ty;
+    return fir::ReferenceType::get(ty);
+  }
+
+  /// Get the scalar type related to the described entity
+  mlir::Type getEleTy() const {
+    auto type = getBaseTy();
+    if (auto seqTy = type.dyn_cast<fir::SequenceType>())
+      return seqTy.getEleTy();
+    return type;
+  }
+
+  /// Is the entity an array or an assumed rank ?
+  bool hasRank() const { return getBaseTy().isa<fir::SequenceType>(); }
+  /// Is this an assumed rank ?
+  bool hasAssumedRank() const {
+    auto seqTy = getBaseTy().dyn_cast<fir::SequenceType>();
+    return seqTy && seqTy.hasUnknownShape();
+  }
+  /// Returns the rank of the entity. Beware that zero will be returned for
+  /// both scalars and assumed rank.
+  unsigned rank() const {
+    if (auto seqTy = getBaseTy().dyn_cast<fir::SequenceType>())
+      return seqTy.getDimension();
+    return 0;
+  }
+  /// Is this a character entity ?
+  bool isCharacter() const { return fir::isa_char(getEleTy()); };
+  /// Is this a derived type entity ?
+  bool isDerived() const { return getEleTy().isa<fir::RecordType>(); };
+
+  bool isDerivedWithLengthParameters() const {
+    auto record = getEleTy().dyn_cast<fir::RecordType>();
+    return record && record.getNumLenParams() != 0;
+  };
+  /// Is this a CLASS(*)/TYPE(*) ?
+  bool isUnlimitedPolymorphic() const {
+    return getEleTy().isa<mlir::NoneType>();
+  }
+};
+
+/// An entity described by a fir.box value that cannot be read into
+/// another ExtendedValue category, either because the fir.box may be an
+/// absent optional and we need to wait until the user is referencing it
+/// to read it, or because it contains important information that cannot
+/// be exposed in FIR (e.g. non contiguous byte stride).
+/// It may also store explicit bounds or length parameters that were specified
+/// for the entity.
+class BoxValue : public AbstractIrBox {
+public:
+  BoxValue(mlir::Value addr) : AbstractIrBox{addr} { assert(verify()); }
+  BoxValue(mlir::Value addr, llvm::ArrayRef<mlir::Value> lbounds,
+           llvm::ArrayRef<mlir::Value> explicitParams,
+           llvm::ArrayRef<mlir::Value> explicitExtents = {})
+      : AbstractIrBox{addr, lbounds, explicitExtents},
+        explicitParams{explicitParams.begin(), explicitParams.end()} {
+    assert(verify());
+  }
+  // TODO: check contiguous attribute of addr
+  bool isContiguous() const { return false; }
+
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &, const BoxValue &);
+  LLVM_DUMP_METHOD void dump() const { llvm::errs() << *this; }
+
+  llvm::ArrayRef<mlir::Value> getLBounds() const { return lbounds; }
+
+  // The extents member is not guaranteed to be field for arrays. It is only
+  // guaranteed to be field for explicit shape arrays. In general,
+  // explicit-shape will not come as descriptors, so this field will be empty in
+  // most cases. The exception are derived types with length parameters and
+  // polymorphic dummy argument arrays. It may be possible for the explicit
+  // extents to conflict with the shape information that is in the box according
+  // to 15.5.2.11 sequence association rules.
+  llvm::ArrayRef<mlir::Value> getExplicitExtents() const { return extents; }
+
+  llvm::ArrayRef<mlir::Value> getExplicitParameters() const {
+    return explicitParams;
+  }
+
+protected:
+  // Verify constructor invariants.
+  bool verify() const;
+
+  // Only field when the BoxValue has explicit length parameters.
+  // Otherwise, the length parameters are in the fir.box.
+  llvm::SmallVector<mlir::Value, 2> explicitParams;
+};
+
+/// Set of variables (addresses) holding the allocatable properties. These may
+/// be empty in case it is not deemed safe to duplicate the descriptor
+/// information locally (For instance, a volatile allocatable will always be
+/// lowered to a descriptor to preserve the integrity of the entity and its
+/// associated properties. As such, all references to the entity and its
+/// property will go through the descriptor explicitly.).
+class MutableProperties {
+public:
+  bool isEmpty() const { return !addr; }
+  mlir::Value addr;
+  llvm::SmallVector<mlir::Value, 2> extents;
+  llvm::SmallVector<mlir::Value, 2> lbounds;
+  /// Only keep track of the deferred length parameters through variables, since
+  /// they are the only ones that can change as per the deferred type parameters
+  /// definition in F2018 standard section 3.147.12.2.
+  /// Non-deferred values are returned by
+  /// MutableBoxValue.nonDeferredLenParams().
+  llvm::SmallVector<mlir::Value, 2> deferredParams;
+};
+
+/// MutableBoxValue is used for entities that are represented by the address of
+/// a box. This is intended to be used for entities whose base address, shape
+/// and type are not constant in the entity lifetime (e.g Allocatables and
+/// Pointers).
+class MutableBoxValue : public AbstractIrBox {
+public:
+  /// Create MutableBoxValue given the address \p addr of the box and the non
+  /// deferred length parameters \p lenParameters. The non deferred length
+  /// parameters must always be provided, even if they are constant and already
+  /// reflected in the address type.
+  MutableBoxValue(mlir::Value addr, mlir::ValueRange lenParameters,
+                  MutableProperties mutableProperties)
+      : AbstractIrBox(addr), lenParams{lenParameters.begin(),
+                                       lenParameters.end()},
+        mutableProperties{mutableProperties} {
+    // Currently only accepts fir.(ref/ptr/heap)<fir.box<type>> mlir::Value for
+    // the address. This may change if we accept
+    // fir.(ref/ptr/heap)<fir.heap<type>> for scalar without length parameters.
+    assert(verify() &&
+           "MutableBoxValue requires mem ref to fir.box<fir.[heap|ptr]<type>>");
+  }
+  /// Is this a Fortran pointer ?
+  bool isPointer() const {
+    return getBoxTy().getEleTy().isa<fir::PointerType>();
+  }
+  /// Is this an allocatable ?
+  bool isAllocatable() const {
+    return getBoxTy().getEleTy().isa<fir::HeapType>();
+  }
+  /// Does this entity have any non deferred length parameters ?
+  bool hasNonDeferredLenParams() const { return !lenParams.empty(); }
+  /// Return the non deferred length parameters.
+  llvm::ArrayRef<mlir::Value> nonDeferredLenParams() const { return lenParams; }
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &,
+                                       const MutableBoxValue &);
+  LLVM_DUMP_METHOD void dump() const { llvm::errs() << *this; }
+
+  /// Set of variables is used instead of a descriptor to hold the entity
+  /// properties instead of a fir.ref<fir.box<>>.
+  bool isDescribedByVariables() const { return !mutableProperties.isEmpty(); }
+
+  const MutableProperties &getMutableProperties() const {
+    return mutableProperties;
+  }
+
+protected:
+  /// Validate the address type form in the constructor.
+  bool verify() const;
+  /// Hold the non-deferred length parameter values  (both for characters and
+  /// derived). Non-deferred length parameters cannot change dynamically, as
+  /// opposed to deferred type parameters (3.147.12.2).
+  llvm::SmallVector<mlir::Value, 2> lenParams;
+  /// Set of variables holding the extents, lower bounds and
+  /// base address when it is deemed safe to work with these variables rather
+  /// than directly with a descriptor.
+  MutableProperties mutableProperties;
+};
+
+class ExtendedValue;
+
+/// Get the base value of an extended value. Every type of extended value has a
+/// base value or is null.
+mlir::Value getBase(const ExtendedValue &exv);
+
+/// Get the LEN property value of an extended value. CHARACTER values have a LEN
+/// property.
+mlir::Value getLen(const ExtendedValue &exv);
+
+/// Pretty-print an extended value.
+llvm::raw_ostream &operator<<(llvm::raw_ostream &, const ExtendedValue &);
+
+/// Return a clone of the extended value `exv` with the base value `base`
+/// substituted.
+ExtendedValue substBase(const ExtendedValue &exv, mlir::Value base);
+
+/// Is the extended value `exv` an array?
+bool isArray(const ExtendedValue &exv);
+
+/// Get the type parameters for `exv`.
+llvm::SmallVector<mlir::Value> getTypeParams(const ExtendedValue &exv);
+
+/// An extended value is a box of values pertaining to a discrete entity. It is
+/// used in lowering to track all the runtime values related to an entity. For
+/// example, an entity may have an address in memory that contains its value(s)
+/// as well as various attribute values that describe the shape and starting
+/// indices if it is an array entity.
+class ExtendedValue : public details::matcher<ExtendedValue> {
+public:
+  using VT =
+      std::variant<UnboxedValue, CharBoxValue, ArrayBoxValue, CharArrayBoxValue,
+                   ProcBoxValue, BoxValue, MutableBoxValue>;
+
+  ExtendedValue() : box{UnboxedValue{}} {}
+  template <typename A, typename = std::enable_if_t<
+                            !std::is_same_v<std::decay_t<A>, ExtendedValue>>>
+  constexpr ExtendedValue(A &&a) : box{std::forward<A>(a)} {
+    if (const auto *b = getUnboxed()) {
+      if (*b) {
+        auto type = b->getType();
+        if (type.template isa<fir::BoxCharType>())
+          fir::emitFatalError(b->getLoc(), "BoxChar should be unboxed");
+        if (auto refType = type.template dyn_cast<fir::ReferenceType>())
+          type = refType.getEleTy();
+        if (auto seqType = type.template dyn_cast<fir::SequenceType>())
+          type = seqType.getEleTy();
+        if (fir::isa_char(type))
+          fir::emitFatalError(b->getLoc(),
+                              "character buffer should be in CharBoxValue");
+      }
+    }
+  }
+
+  template <typename A>
+  constexpr const A *getBoxOf() const {
+    return std::get_if<A>(&box);
+  }
+
+  constexpr const CharBoxValue *getCharBox() const {
+    return getBoxOf<CharBoxValue>();
+  }
+
+  constexpr const UnboxedValue *getUnboxed() const {
+    return getBoxOf<UnboxedValue>();
+  }
+
+  unsigned rank() const {
+    return match([](const fir::UnboxedValue &box) -> unsigned { return 0; },
+                 [](const fir::CharBoxValue &box) -> unsigned { return 0; },
+                 [](const fir::ProcBoxValue &box) -> unsigned { return 0; },
+                 [](const auto &box) -> unsigned { return box.rank(); });
+  }
+
+  /// LLVM style debugging of extended values
+  LLVM_DUMP_METHOD void dump() const { llvm::errs() << *this << '\n'; }
+
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &,
+                                       const ExtendedValue &);
+
+  const VT &matchee() const { return box; }
+
+private:
+  VT box;
+};
+
+/// Is the extended value `exv` unboxed and non-null?
+inline bool isUnboxedValue(const ExtendedValue &exv) {
+  return exv.match(
+      [](const fir::UnboxedValue &box) { return box ? true : false; },
+      [](const auto &) { return false; });
+}
+} // namespace fir
+
+#endif // FORTRAN_OPTIMIZER_BUILDER_BOXVALUE_H

diff  --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index 5984055b9ea51..4710e025419c5 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -23,6 +23,7 @@
 #include "mlir/IR/BuiltinOps.h"
 
 namespace fir {
+class ExtendedValue;
 
 //===----------------------------------------------------------------------===//
 // FirOpBuilder
@@ -55,6 +56,9 @@ class FirOpBuilder : public mlir::OpBuilder {
   /// Create a sequence of `eleTy` with `rank` dimensions of unknown size.
   mlir::Type getVarLenSeqTy(mlir::Type eleTy, unsigned rank = 1);
 
+  /// Get character length type
+  mlir::Type getCharacterLengthType() { return getIndexType(); }
+
   /// Get the integer type whose bit width corresponds to the width of pointer
   /// types, or is bigger.
   mlir::Type getIntPtrType() {
@@ -266,9 +270,14 @@ class FirOpBuilder : public mlir::OpBuilder {
 
 namespace fir::factory {
 
-//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 // String literal helper helpers
-//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+
+/// Create a !fir.char<1> string literal global and returns a
+/// fir::CharBoxValue with its address and length.
+fir::ExtendedValue createStringLiteral(fir::FirOpBuilder &, mlir::Location,
+                                       llvm::StringRef string);
 
 /// Unique a compiler generated identifier. A short prefix should be provided
 /// to hint at the origin of the identifier.
@@ -278,6 +287,9 @@ std::string uniqueCGIdent(llvm::StringRef prefix, llvm::StringRef name);
 // Location helpers
 //===----------------------------------------------------------------------===//
 
+/// Generate a string literal containing the file name and return its address
+mlir::Value locationToFilename(fir::FirOpBuilder &, mlir::Location);
+
 /// Generate a constant of the given type with the location line number
 mlir::Value locationToLineNo(fir::FirOpBuilder &, mlir::Location, mlir::Type);
 

diff  --git a/flang/include/flang/Optimizer/Dialect/FIRType.h b/flang/include/flang/Optimizer/Dialect/FIRType.h
index 82c4a43c06843..e0f0e29520390 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRType.h
+++ b/flang/include/flang/Optimizer/Dialect/FIRType.h
@@ -138,6 +138,22 @@ inline bool isa_char_string(mlir::Type t) {
 /// of unknown rank or type.
 bool isa_unknown_size_box(mlir::Type t);
 
+/// Returns true iff `t` is a fir.char type and has an unknown length.
+inline bool characterWithDynamicLen(mlir::Type t) {
+  if (auto charTy = t.dyn_cast<fir::CharacterType>())
+    return charTy.hasDynamicLen();
+  return false;
+}
+
+/// Returns true iff `seqTy` has either an unknown shape or a non-constant shape
+/// (where rank > 0).
+inline bool sequenceWithNonConstantShape(fir::SequenceType seqTy) {
+  return seqTy.hasUnknownShape() || !seqTy.hasConstantShape();
+}
+
+/// Returns true iff the type `t` does not have a constant size.
+bool hasDynamicSize(mlir::Type t);
+
 /// If `t` is a SequenceType return its element type, otherwise return `t`.
 inline mlir::Type unwrapSequenceType(mlir::Type t) {
   if (auto seqTy = t.dyn_cast<fir::SequenceType>())

diff  --git a/flang/include/flang/Optimizer/Support/Matcher.h b/flang/include/flang/Optimizer/Support/Matcher.h
new file mode 100644
index 0000000000000..da1d7c21f42c4
--- /dev/null
+++ b/flang/include/flang/Optimizer/Support/Matcher.h
@@ -0,0 +1,35 @@
+//===-- Optimizer/Support/Matcher.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_SUPPORT_MATCHER_H
+#define FORTRAN_OPTIMIZER_SUPPORT_MATCHER_H
+
+#include <variant>
+
+// Boilerplate CRTP class for a simplified type-casing syntactic sugar. This
+// lets one write pattern matchers using a more compact syntax.
+namespace fir::details {
+// clang-format off
+template<class... Ts> struct matches : Ts... { using Ts::operator()...; };
+template<class... Ts> matches(Ts...) -> matches<Ts...>;
+template<typename N> struct matcher {
+  template<typename... Ts> auto match(Ts... ts) {
+    return std::visit(matches{ts...}, static_cast<N*>(this)->matchee());
+  }
+  template<typename... Ts> auto match(Ts... ts) const {
+    return std::visit(matches{ts...}, static_cast<N const*>(this)->matchee());
+  }
+};
+// clang-format on
+} // namespace fir::details
+
+#endif // FORTRAN_OPTIMIZER_SUPPORT_MATCHER_H

diff  --git a/flang/lib/Optimizer/Builder/BoxValue.cpp b/flang/lib/Optimizer/Builder/BoxValue.cpp
new file mode 100644
index 0000000000000..0b713ec4a9408
--- /dev/null
+++ b/flang/lib/Optimizer/Builder/BoxValue.cpp
@@ -0,0 +1,228 @@
+//===-- BoxValue.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Pretty printers for box values, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/BoxValue.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "flang-box-value"
+
+mlir::Value fir::getBase(const fir::ExtendedValue &exv) {
+  return exv.match([](const fir::UnboxedValue &x) { return x; },
+                   [](const auto &x) { return x.getAddr(); });
+}
+
+mlir::Value fir::getLen(const fir::ExtendedValue &exv) {
+  return exv.match(
+      [](const fir::CharBoxValue &x) { return x.getLen(); },
+      [](const fir::CharArrayBoxValue &x) { return x.getLen(); },
+      [](const fir::BoxValue &) -> mlir::Value {
+        llvm::report_fatal_error("Need to read len from BoxValue Exv");
+      },
+      [](const fir::MutableBoxValue &) -> mlir::Value {
+        llvm::report_fatal_error("Need to read len from MutableBoxValue Exv");
+      },
+      [](const auto &) { return mlir::Value{}; });
+}
+
+fir::ExtendedValue fir::substBase(const fir::ExtendedValue &exv,
+                                  mlir::Value base) {
+  return exv.match(
+      [=](const fir::UnboxedValue &x) { return fir::ExtendedValue(base); },
+      [=](const fir::BoxValue &) -> fir::ExtendedValue {
+        llvm::report_fatal_error("TODO: substbase of BoxValue");
+      },
+      [=](const fir::MutableBoxValue &) -> fir::ExtendedValue {
+        llvm::report_fatal_error("TODO: substbase of MutableBoxValue");
+      },
+      [=](const auto &x) { return fir::ExtendedValue(x.clone(base)); });
+}
+
+llvm::SmallVector<mlir::Value> fir::getTypeParams(const ExtendedValue &exv) {
+  using RT = llvm::SmallVector<mlir::Value>;
+  auto baseTy = fir::getBase(exv).getType();
+  if (auto t = fir::dyn_cast_ptrEleTy(baseTy))
+    baseTy = t;
+  baseTy = fir::unwrapSequenceType(baseTy);
+  if (!fir::hasDynamicSize(baseTy))
+    return {}; // type has constant size, no type parameters needed
+  [[maybe_unused]] auto loc = fir::getBase(exv).getLoc();
+  return exv.match(
+      [](const fir::CharBoxValue &x) -> RT { return {x.getLen()}; },
+      [](const fir::CharArrayBoxValue &x) -> RT { return {x.getLen()}; },
+      [&](const fir::BoxValue &) -> RT {
+        LLVM_DEBUG(mlir::emitWarning(
+            loc, "TODO: box value is missing type parameters"));
+        return {};
+      },
+      [&](const fir::MutableBoxValue &) -> RT {
+        // In this case, the type params may be bound to the variable in an
+        // ALLOCATE statement as part of a type-spec.
+        LLVM_DEBUG(mlir::emitWarning(
+            loc, "TODO: mutable box value is missing type parameters"));
+        return {};
+      },
+      [](const auto &) -> RT { return {}; });
+}
+
+bool fir::isArray(const fir::ExtendedValue &exv) {
+  return exv.match(
+      [](const fir::ArrayBoxValue &) { return true; },
+      [](const fir::CharArrayBoxValue &) { return true; },
+      [](const fir::BoxValue &box) { return box.hasRank(); },
+      [](const fir::MutableBoxValue &box) { return box.hasRank(); },
+      [](auto) { return false; });
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::CharBoxValue &box) {
+  return os << "boxchar { addr: " << box.getAddr() << ", len: " << box.getLen()
+            << " }";
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::ArrayBoxValue &box) {
+  os << "boxarray { addr: " << box.getAddr();
+  if (box.getLBounds().size()) {
+    os << ", lbounds: [";
+    llvm::interleaveComma(box.getLBounds(), os);
+    os << "]";
+  } else {
+    os << ", lbounds: all-ones";
+  }
+  os << ", shape: [";
+  llvm::interleaveComma(box.getExtents(), os);
+  return os << "]}";
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::CharArrayBoxValue &box) {
+  os << "boxchararray { addr: " << box.getAddr() << ", len : " << box.getLen();
+  if (box.getLBounds().size()) {
+    os << ", lbounds: [";
+    llvm::interleaveComma(box.getLBounds(), os);
+    os << "]";
+  } else {
+    os << " lbounds: all-ones";
+  }
+  os << ", shape: [";
+  llvm::interleaveComma(box.getExtents(), os);
+  return os << "]}";
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::ProcBoxValue &box) {
+  return os << "boxproc: { procedure: " << box.getAddr()
+            << ", context: " << box.hostContext << "}";
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::BoxValue &box) {
+  os << "box: { value: " << box.getAddr();
+  if (box.lbounds.size()) {
+    os << ", lbounds: [";
+    llvm::interleaveComma(box.lbounds, os);
+    os << "]";
+  }
+  if (!box.explicitParams.empty()) {
+    os << ", explicit type params: [";
+    llvm::interleaveComma(box.explicitParams, os);
+    os << "]";
+  }
+  if (!box.extents.empty()) {
+    os << ", explicit extents: [";
+    llvm::interleaveComma(box.extents, os);
+    os << "]";
+  }
+  return os << "}";
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::MutableBoxValue &box) {
+  os << "mutablebox: { addr: " << box.getAddr();
+  if (!box.lenParams.empty()) {
+    os << ", non deferred type params: [";
+    llvm::interleaveComma(box.lenParams, os);
+    os << "]";
+  }
+  const auto &properties = box.mutableProperties;
+  if (!properties.isEmpty()) {
+    os << ", mutableProperties: { addr: " << properties.addr;
+    if (!properties.lbounds.empty()) {
+      os << ", lbounds: [";
+      llvm::interleaveComma(properties.lbounds, os);
+      os << "]";
+    }
+    if (!properties.extents.empty()) {
+      os << ", shape: [";
+      llvm::interleaveComma(properties.extents, os);
+      os << "]";
+    }
+    if (!properties.deferredParams.empty()) {
+      os << ", deferred type params: [";
+      llvm::interleaveComma(properties.deferredParams, os);
+      os << "]";
+    }
+    os << "}";
+  }
+  return os << "}";
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::ExtendedValue &exv) {
+  exv.match([&](const auto &value) { os << value; });
+  return os;
+}
+
+/// Debug verifier for MutableBox ctor. There is no guarantee that this will
+/// always be called, so it should not have any functional side effects,
+/// the const is here to enforce that.
+bool fir::MutableBoxValue::verify() const {
+  auto type = fir::dyn_cast_ptrEleTy(getAddr().getType());
+  if (!type)
+    return false;
+  auto box = type.dyn_cast<fir::BoxType>();
+  if (!box)
+    return false;
+  auto eleTy = box.getEleTy();
+  if (!eleTy.isa<fir::PointerType>() && !eleTy.isa<fir::HeapType>())
+    return false;
+
+  auto nParams = lenParams.size();
+  if (isCharacter()) {
+    if (nParams > 1)
+      return false;
+  } else if (!isDerived()) {
+    if (nParams != 0)
+      return false;
+  }
+  return true;
+}
+
+/// Debug verifier for BoxValue ctor. There is no guarantee this will
+/// always be called.
+bool fir::BoxValue::verify() const {
+  if (!addr.getType().isa<fir::BoxType>())
+    return false;
+  if (!lbounds.empty() && lbounds.size() != rank())
+    return false;
+  // Explicit extents are here to cover cases where an explicit-shape dummy
+  // argument comes as a fir.box. This can only happen with derived types and
+  // unlimited polymorphic.
+  if (!extents.empty() && !(isDerived() || isUnlimitedPolymorphic()))
+    return false;
+  if (!extents.empty() && extents.size() != rank())
+    return false;
+  if (isCharacter() && explicitParams.size() > 1)
+    return false;
+  return true;
+}

diff  --git a/flang/lib/Optimizer/Builder/CMakeLists.txt b/flang/lib/Optimizer/Builder/CMakeLists.txt
index c000c984b198b..f4aafeb94a8e4 100644
--- a/flang/lib/Optimizer/Builder/CMakeLists.txt
+++ b/flang/lib/Optimizer/Builder/CMakeLists.txt
@@ -1,5 +1,6 @@
 get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
 add_flang_library(FIRBuilder
+  BoxValue.cpp
   DoLoopHelper.cpp
   FIRBuilder.cpp
 

diff  --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index 22e77d405f76e..ab060af114771 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
 #include "flang/Optimizer/Support/FatalError.h"
 #include "flang/Optimizer/Support/InternalNames.h"
@@ -207,6 +208,16 @@ std::string fir::factory::uniqueCGIdent(llvm::StringRef prefix,
       nm.append(".").append(llvm::toHex(name)));
 }
 
+mlir::Value fir::factory::locationToFilename(fir::FirOpBuilder &builder,
+                                             mlir::Location loc) {
+  if (auto flc = loc.dyn_cast<mlir::FileLineColLoc>()) {
+    // must be encoded as asciiz, C string
+    auto fn = flc.getFilename().str() + '\0';
+    return fir::getBase(createStringLiteral(builder, loc, fn));
+  }
+  return builder.createNullConstant(loc);
+}
+
 mlir::Value fir::factory::locationToLineNo(fir::FirOpBuilder &builder,
                                            mlir::Location loc,
                                            mlir::Type type) {
@@ -214,3 +225,24 @@ mlir::Value fir::factory::locationToLineNo(fir::FirOpBuilder &builder,
     return builder.createIntegerConstant(loc, type, flc.getLine());
   return builder.createIntegerConstant(loc, type, 0);
 }
+
+fir::ExtendedValue fir::factory::createStringLiteral(fir::FirOpBuilder &builder,
+                                                     mlir::Location loc,
+                                                     llvm::StringRef str) {
+  std::string globalName = fir::factory::uniqueCGIdent("cl", str);
+  auto type = fir::CharacterType::get(builder.getContext(), 1, str.size());
+  auto global = builder.getNamedGlobal(globalName);
+  if (!global)
+    global = builder.createGlobalConstant(
+        loc, type, globalName,
+        [&](fir::FirOpBuilder &builder) {
+          auto stringLitOp = builder.createStringLitOp(loc, str);
+          builder.create<fir::HasValueOp>(loc, stringLitOp);
+        },
+        builder.createLinkOnceLinkage());
+  auto addr = builder.create<fir::AddrOfOp>(loc, global.resultType(),
+                                            global.getSymbol());
+  auto len = builder.createIntegerConstant(
+      loc, builder.getCharacterLengthType(), str.size());
+  return fir::CharBoxValue{addr, len};
+}

diff  --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 287cc6ad3321f..64174b8892dfc 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -1311,6 +1311,10 @@ static mlir::ArrayAttr collectAsAttributes(mlir::MLIRContext *ctxt,
 // GlobalLenOp
 //===----------------------------------------------------------------------===//
 
+mlir::Type fir::GlobalOp::resultType() {
+  return wrapAllocaResultType(getType());
+}
+
 static mlir::ParseResult parseGlobalLenOp(mlir::OpAsmParser &parser,
                                           mlir::OperationState &result) {
   llvm::StringRef fieldName;

diff  --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp
index fb3fba663bab9..e020048ed7d94 100644
--- a/flang/lib/Optimizer/Dialect/FIRType.cpp
+++ b/flang/lib/Optimizer/Dialect/FIRType.cpp
@@ -218,6 +218,34 @@ mlir::Type dyn_cast_ptrOrBoxEleTy(mlir::Type t) {
       .Default([](mlir::Type) { return mlir::Type{}; });
 }
 
+static bool hasDynamicSize(fir::RecordType recTy) {
+  for (auto field : recTy.getTypeList()) {
+    if (auto arr = field.second.dyn_cast<fir::SequenceType>()) {
+      if (sequenceWithNonConstantShape(arr))
+        return true;
+    } else if (characterWithDynamicLen(field.second)) {
+      return true;
+    } else if (auto rec = field.second.dyn_cast<fir::RecordType>()) {
+      if (hasDynamicSize(rec))
+        return true;
+    }
+  }
+  return false;
+}
+
+bool hasDynamicSize(mlir::Type t) {
+  if (auto arr = t.dyn_cast<fir::SequenceType>()) {
+    if (sequenceWithNonConstantShape(arr))
+      return true;
+    t = arr.getEleTy();
+  }
+  if (characterWithDynamicLen(t))
+    return true;
+  if (auto rec = t.dyn_cast<fir::RecordType>())
+    return hasDynamicSize(rec);
+  return false;
+}
+
 } // namespace fir
 
 namespace {

diff  --git a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
index 06b45756b454b..2942477a5c2c6 100644
--- a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
+++ b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
@@ -8,6 +8,7 @@
 
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "gtest/gtest.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Support/InitFIR.h"
 #include "flang/Optimizer/Support/KindMapping.h"
 
@@ -243,3 +244,77 @@ TEST_F(FIRBuilderTest, locationToLineNo) {
       builder, builder.getUnknownLoc(), builder.getI64Type());
   checkIntegerConstant(line, builder.getI64Type(), 0);
 }
+
+TEST_F(FIRBuilderTest, hasDynamicSize) {
+  auto builder = getBuilder();
+  auto type = fir::CharacterType::get(builder.getContext(), 1, 16);
+  EXPECT_FALSE(fir::hasDynamicSize(type));
+  EXPECT_TRUE(fir::SequenceType::getUnknownExtent());
+  auto seqTy = builder.getVarLenSeqTy(builder.getI64Type(), 10);
+  EXPECT_TRUE(fir::hasDynamicSize(seqTy));
+  EXPECT_FALSE(fir::hasDynamicSize(builder.getI64Type()));
+}
+
+TEST_F(FIRBuilderTest, locationToFilename) {
+  auto builder = getBuilder();
+  auto loc =
+      mlir::FileLineColLoc::get(builder.getIdentifier("file1.f90"), 10, 5);
+  mlir::Value locToFile = fir::factory::locationToFilename(builder, loc);
+  auto addrOp = dyn_cast<fir::AddrOfOp>(locToFile.getDefiningOp());
+  auto symbol = addrOp.symbol().getRootReference().getValue();
+  auto global = builder.getNamedGlobal(symbol);
+  auto stringLitOps = global.getRegion().front().getOps<fir::StringLitOp>();
+  EXPECT_TRUE(llvm::hasSingleElement(stringLitOps));
+  for (auto stringLit : stringLitOps) {
+    EXPECT_EQ(10, stringLit.getSize().cast<mlir::IntegerAttr>().getValue());
+    EXPECT_TRUE(stringLit.getValue().isa<StringAttr>());
+    EXPECT_EQ(0,
+        strcmp("file1.f90\0",
+            stringLit.getValue()
+                .dyn_cast<StringAttr>()
+                .getValue()
+                .str()
+                .c_str()));
+  }
+}
+
+TEST_F(FIRBuilderTest, createStringLitOp) {
+  auto builder = getBuilder();
+  llvm::StringRef data("mystringlitdata");
+  auto loc = builder.getUnknownLoc();
+  auto op = builder.createStringLitOp(loc, data);
+  EXPECT_EQ(15, op.getSize().cast<mlir::IntegerAttr>().getValue());
+  EXPECT_TRUE(op.getValue().isa<StringAttr>());
+  EXPECT_EQ(data, op.getValue().dyn_cast<StringAttr>().getValue());
+}
+
+TEST_F(FIRBuilderTest, createStringLiteral) {
+  auto builder = getBuilder();
+  auto loc = builder.getUnknownLoc();
+  llvm::StringRef strValue("onestringliteral");
+  auto strLit = fir::factory::createStringLiteral(builder, loc, strValue);
+  EXPECT_EQ(0u, strLit.rank());
+  EXPECT_TRUE(strLit.getCharBox() != nullptr);
+  auto *charBox = strLit.getCharBox();
+  EXPECT_FALSE(fir::isArray(*charBox));
+  checkIntegerConstant(charBox->getLen(), builder.getCharacterLengthType(), 16);
+  auto generalGetLen = fir::getLen(strLit);
+  checkIntegerConstant(generalGetLen, builder.getCharacterLengthType(), 16);
+  auto addr = charBox->getBuffer();
+  EXPECT_TRUE(mlir::isa<fir::AddrOfOp>(addr.getDefiningOp()));
+  auto addrOp = dyn_cast<fir::AddrOfOp>(addr.getDefiningOp());
+  auto symbol = addrOp.symbol().getRootReference().getValue();
+  auto global = builder.getNamedGlobal(symbol);
+  EXPECT_EQ(
+      builder.createLinkOnceLinkage().getValue(), global.linkName().getValue());
+  EXPECT_EQ(fir::CharacterType::get(builder.getContext(), 1, strValue.size()),
+      global.type());
+
+  auto stringLitOps = global.getRegion().front().getOps<fir::StringLitOp>();
+  EXPECT_TRUE(llvm::hasSingleElement(stringLitOps));
+  for (auto stringLit : stringLitOps) {
+    EXPECT_EQ(16, stringLit.getSize().cast<mlir::IntegerAttr>().getValue());
+    EXPECT_TRUE(stringLit.getValue().isa<StringAttr>());
+    EXPECT_EQ(strValue, stringLit.getValue().dyn_cast<StringAttr>().getValue());
+  }
+}


        


More information about the flang-commits mailing list