[flang-commits] [flang] e4ce922 - [fir] Add Character helper

Valentin Clement via flang-commits flang-commits at lists.llvm.org
Thu Oct 21 00:48:47 PDT 2021


Author: Valentin Clement
Date: 2021-10-21T09:48:37+02:00
New Revision: e4ce92245c96cea9492767d7149eb9e30dee0d16

URL: https://github.com/llvm/llvm-project/commit/e4ce92245c96cea9492767d7149eb9e30dee0d16
DIFF: https://github.com/llvm/llvm-project/commit/e4ce92245c96cea9492767d7149eb9e30dee0d16.diff

LOG: [fir] Add Character helper

This patch is extracted from D111337. It introduce the
CharacterExprHelper that helps dealing with character in FIR.

Reviewed By: schweitz, awarzynski

Differential Revision: https://reviews.llvm.org/D112140

Co-authored-by: Jean Perier <jperier at nvidia.com>
Co-authored-by: Eric Schweitz <eschweitz at nvidia.com>
Co-authored-by: V Donaldson <vdonaldson at nvidia.com>

Added: 
    flang/include/flang/Optimizer/Builder/Character.h
    flang/lib/Optimizer/Builder/Character.cpp
    flang/unittests/Optimizer/Builder/CharacterTest.cpp

Modified: 
    flang/include/flang/Optimizer/Builder/FIRBuilder.h
    flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
    flang/include/flang/Optimizer/Dialect/FIRType.h
    flang/lib/Optimizer/Builder/CMakeLists.txt
    flang/lib/Optimizer/Builder/FIRBuilder.cpp
    flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
    flang/unittests/Optimizer/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/Builder/Character.h b/flang/include/flang/Optimizer/Builder/Character.h
new file mode 100644
index 0000000000000..610b42052f317
--- /dev/null
+++ b/flang/include/flang/Optimizer/Builder/Character.h
@@ -0,0 +1,192 @@
+//===-- Character.h -- lowering of characters -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_BUILDER_CHARACTER_H
+#define FORTRAN_OPTIMIZER_BUILDER_CHARACTER_H
+
+#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+
+namespace fir::factory {
+
+/// Helper to facilitate lowering of CHARACTER in FIR.
+class CharacterExprHelper {
+public:
+  /// Constructor.
+  explicit CharacterExprHelper(FirOpBuilder &builder, mlir::Location loc)
+      : builder{builder}, loc{loc} {}
+  CharacterExprHelper(const CharacterExprHelper &) = delete;
+
+  /// Copy the \p count first characters of \p src into \p dest.
+  /// \p count can have any integer type.
+  void createCopy(const fir::CharBoxValue &dest, const fir::CharBoxValue &src,
+                  mlir::Value count);
+
+  /// Set characters of \p str at position [\p lower, \p upper) to blanks.
+  /// \p lower and \upper bounds are zero based.
+  /// If \p upper <= \p lower, no padding is done.
+  /// \p upper and \p lower can have any integer type.
+  void createPadding(const fir::CharBoxValue &str, mlir::Value lower,
+                     mlir::Value upper);
+
+  /// Create str(lb:ub), lower bounds must always be specified, upper
+  /// bound is optional.
+  fir::CharBoxValue createSubstring(const fir::CharBoxValue &str,
+                                    llvm::ArrayRef<mlir::Value> bounds);
+
+  /// Return blank character of given \p type !fir.char<kind>
+  mlir::Value createBlankConstant(fir::CharacterType type);
+
+  /// Lower \p lhs = \p rhs where \p lhs and \p rhs are scalar characters.
+  /// It handles cases where \p lhs and \p rhs may overlap.
+  void createAssign(const fir::ExtendedValue &lhs,
+                    const fir::ExtendedValue &rhs);
+
+  /// Create lhs // rhs in temp obtained with fir.alloca
+  fir::CharBoxValue createConcatenate(const fir::CharBoxValue &lhs,
+                                      const fir::CharBoxValue &rhs);
+
+  /// LEN_TRIM intrinsic.
+  mlir::Value createLenTrim(const fir::CharBoxValue &str);
+
+  /// Embox \p addr and \p len and return fir.boxchar.
+  /// Take care of type conversions before emboxing.
+  /// \p len is converted to the integer type for character lengths if needed.
+  mlir::Value createEmboxChar(mlir::Value addr, mlir::Value len);
+  /// Create a fir.boxchar for \p str. If \p str is not in memory, a temp is
+  /// allocated to create the fir.boxchar.
+  mlir::Value createEmbox(const fir::CharBoxValue &str);
+  /// Embox a string array. Note that the size/shape of the array is not
+  /// retrievable from the resulting mlir::Value.
+  mlir::Value createEmbox(const fir::CharArrayBoxValue &str);
+
+  /// Convert character array to a scalar by reducing the extents into the
+  /// length. Will fail if call on non reference like base.
+  fir::CharBoxValue toScalarCharacter(const fir::CharArrayBoxValue &);
+
+  /// Unbox \p boxchar into (fir.ref<fir.char<kind>>, character length type).
+  std::pair<mlir::Value, mlir::Value> createUnboxChar(mlir::Value boxChar);
+
+  /// Allocate a temp of fir::CharacterType type and length len.
+  /// Returns related fir.ref<fir.array<? x fir.char<kind>>>.
+  fir::CharBoxValue createCharacterTemp(mlir::Type type, mlir::Value len);
+
+  /// Allocate a temp of compile time constant length.
+  /// Returns related fir.ref<fir.array<len x fir.char<kind>>>.
+  fir::CharBoxValue createCharacterTemp(mlir::Type type, int len);
+
+  /// Create a temporary with the same kind, length, and value as source.
+  fir::CharBoxValue createTempFrom(const fir::ExtendedValue &source);
+
+  /// Return true if \p type is a character literal type (is
+  /// `fir.array<len x fir.char<kind>>`).;
+  static bool isCharacterLiteral(mlir::Type type);
+
+  /// Return true if \p type is one of the following type
+  /// - fir.boxchar<kind>
+  /// - fir.ref<fir.char<kind,len>>
+  /// - fir.char<kind,len>
+  static bool isCharacterScalar(mlir::Type type);
+
+  /// Does this extended value base type is fir.char<kind,len>
+  /// where len is not the unknown extent ?
+  static bool hasConstantLengthInType(const fir::ExtendedValue &);
+
+  /// Extract the kind of a character type
+  static fir::KindTy getCharacterKind(mlir::Type type);
+
+  /// Extract the kind of a character or array of character type.
+  static fir::KindTy getCharacterOrSequenceKind(mlir::Type type);
+
+  /// Determine the base character type
+  static fir::CharacterType getCharacterType(mlir::Type type);
+  static fir::CharacterType getCharacterType(const fir::CharBoxValue &box);
+  static fir::CharacterType getCharacterType(mlir::Value str);
+
+  /// Create an extended value from a value of type:
+  /// - fir.boxchar<kind>
+  /// - fir.ref<fir.char<kind,len>>
+  /// - fir.char<kind,len>
+  /// or the array versions:
+  /// - fir.ref<fir.array<n x...x fir.char<kind,len>>>
+  /// - fir.array<n x...x fir.char<kind,len>>
+  ///
+  /// Does the heavy lifting of converting the value \p character (along with an
+  /// optional \p len value) to an extended value. If \p len is null, a length
+  /// value is extracted from \p character (or its type). This will produce an
+  /// error if it's not possible. The returned value is a CharBoxValue if \p
+  /// character is a scalar, otherwise it is a CharArrayBoxValue.
+  fir::ExtendedValue toExtendedValue(mlir::Value character,
+                                     mlir::Value len = {});
+
+  /// Is `type` a sequence (array) of CHARACTER type? Return true for any of the
+  /// following cases:
+  ///   - !fir.array<dim x ... x !fir.char<kind, len>>
+  ///   - !fir.ref<T>  where T is either of the first case
+  ///   - !fir.box<T>  where T is either of the first case
+  ///
+  /// In certain contexts, Fortran allows an array of CHARACTERs to be treated
+  /// as if it were one longer CHARACTER scalar, each element append to the
+  /// previous.
+  static bool isArray(mlir::Type type);
+
+  /// Temporary helper to help migrating towards properties of
+  /// ExtendedValue containing characters.
+  /// Mainly, this ensure that characters are always CharArrayBoxValue,
+  /// CharBoxValue, or BoxValue and that the base address is not a boxchar.
+  /// Return the argument if this is not a character.
+  /// TODO: Create and propagate ExtendedValue according to properties listed
+  /// above instead of fixing it when needed.
+  fir::ExtendedValue cleanUpCharacterExtendedValue(const fir::ExtendedValue &);
+
+  /// Create fir.char<kind> singleton from \p code integer value.
+  mlir::Value createSingletonFromCode(mlir::Value code, int kind);
+  /// Returns integer value held in a character singleton.
+  mlir::Value extractCodeFromSingleton(mlir::Value singleton);
+
+  /// Create a value for the length of a character based on its memory reference
+  /// that may be a boxchar, box or !fir.[ptr|ref|heap]<fir.char<kind, len>>. If
+  /// the memref is a simple address and the length is not constant in type, the
+  /// returned length will be empty.
+  mlir::Value getLength(mlir::Value memref);
+
+  /// Compute length given a fir.box describing a character entity.
+  /// It adjusts the length from the number of bytes per the descriptor
+  /// to the number of characters per the Fortran KIND.
+  mlir::Value readLengthFromBox(mlir::Value box);
+
+private:
+  /// FIXME: the implementation also needs a clean-up now that
+  /// CharBoxValue are better propagated.
+  fir::CharBoxValue materializeValue(mlir::Value str);
+  mlir::Value getCharBoxBuffer(const fir::CharBoxValue &box);
+  mlir::Value createElementAddr(mlir::Value buffer, mlir::Value index);
+  mlir::Value createLoadCharAt(mlir::Value buff, mlir::Value index);
+  void createStoreCharAt(mlir::Value str, mlir::Value index, mlir::Value c);
+  void createLengthOneAssign(const fir::CharBoxValue &lhs,
+                             const fir::CharBoxValue &rhs);
+  void createAssign(const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs);
+  mlir::Value createBlankConstantCode(fir::CharacterType type);
+
+  FirOpBuilder &builder;
+  mlir::Location loc;
+};
+
+// FIXME: Move these to Optimizer
+mlir::FuncOp getLlvmMemcpy(FirOpBuilder &builder);
+mlir::FuncOp getLlvmMemmove(FirOpBuilder &builder);
+mlir::FuncOp getLlvmMemset(FirOpBuilder &builder);
+mlir::FuncOp getRealloc(FirOpBuilder &builder);
+
+} // namespace fir::factory
+
+#endif // FORTRAN_OPTIMIZER_BUILDER_CHARACTER_H

diff  --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index 4710e025419c5..bcd11425b41ab 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -47,9 +47,22 @@ class FirOpBuilder : public mlir::OpBuilder {
     return getRegion().getParentOfType<mlir::ModuleOp>();
   }
 
+  /// Get the current Function
+  mlir::FuncOp getFunction() {
+    return getRegion().getParentOfType<mlir::FuncOp>();
+  }
+
   /// Get a reference to the kind map.
   const fir::KindMapping &getKindMap() { return kindMap; }
 
+  /// Get the entry block of the current Function
+  mlir::Block *getEntryBlock() { return &getFunction().front(); }
+
+  /// Get the block for adding Allocas. If OpenMP is enabled then get the
+  /// the alloca block from an Operation which can be Outlined. Otherwise
+  /// use the entry block of the current Function
+  mlir::Block *getAllocaBlock();
+
   /// Safely create a reference type to the type `eleTy`.
   mlir::Type getRefType(mlir::Type eleTy);
 
@@ -91,6 +104,45 @@ class FirOpBuilder : public mlir::OpBuilder {
     return createRealConstant(loc, realType, 0u);
   }
 
+  /// Create a slot for a local on the stack. Besides the variable's type and
+  /// shape, it may be given name, pinned, or target attributes.
+  mlir::Value allocateLocal(mlir::Location loc, mlir::Type ty,
+                            llvm::StringRef uniqName, llvm::StringRef name,
+                            bool pinned, llvm::ArrayRef<mlir::Value> shape,
+                            llvm::ArrayRef<mlir::Value> lenParams,
+                            bool asTarget = false);
+  mlir::Value allocateLocal(mlir::Location loc, mlir::Type ty,
+                            llvm::StringRef uniqName, llvm::StringRef name,
+                            llvm::ArrayRef<mlir::Value> shape,
+                            llvm::ArrayRef<mlir::Value> lenParams,
+                            bool asTarget = false);
+
+  /// Create a temporary. A temp is allocated using `fir.alloca` and can be read
+  /// and written using `fir.load` and `fir.store`, resp.  The temporary can be
+  /// given a name via a front-end `Symbol` or a `StringRef`.
+  mlir::Value createTemporary(mlir::Location loc, mlir::Type type,
+                              llvm::StringRef name = {},
+                              mlir::ValueRange shape = {},
+                              mlir::ValueRange lenParams = {},
+                              llvm::ArrayRef<mlir::NamedAttribute> attrs = {});
+
+  /// Create an unnamed and untracked temporary on the stack.
+  mlir::Value createTemporary(mlir::Location loc, mlir::Type type,
+                              mlir::ValueRange shape) {
+    return createTemporary(loc, type, llvm::StringRef{}, shape);
+  }
+
+  mlir::Value createTemporary(mlir::Location loc, mlir::Type type,
+                              llvm::ArrayRef<mlir::NamedAttribute> attrs) {
+    return createTemporary(loc, type, llvm::StringRef{}, {}, {}, attrs);
+  }
+
+  mlir::Value createTemporary(mlir::Location loc, mlir::Type type,
+                              llvm::StringRef name,
+                              llvm::ArrayRef<mlir::NamedAttribute> attrs) {
+    return createTemporary(loc, type, name, {}, {}, attrs);
+  }
+
   /// Create a global value.
   fir::GlobalOp createGlobal(mlir::Location loc, mlir::Type type,
                              llvm::StringRef name,

diff  --git a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
index dcca1ab55ee0d..01f80c2bd10c4 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
+++ b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
@@ -64,6 +64,9 @@ constexpr llvm::StringRef getContiguousAttrName() { return "fir.contiguous"; }
 /// Attribute to mark Fortran entities with the OPTIONAL attribute.
 constexpr llvm::StringRef getOptionalAttrName() { return "fir.optional"; }
 
+/// Attribute to mark Fortran entities with the TARGET attribute.
+static constexpr llvm::StringRef getTargetAttrName() { return "fir.target"; }
+
 /// Tell if \p value is:
 ///   - a function argument that has attribute \p attributeName
 ///   - or, the result of fir.alloca/fir.allocamem op that has attribute \p

diff  --git a/flang/include/flang/Optimizer/Dialect/FIRType.h b/flang/include/flang/Optimizer/Dialect/FIRType.h
index e0f0e29520390..f5a0f0081b80c 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRType.h
+++ b/flang/include/flang/Optimizer/Dialect/FIRType.h
@@ -161,6 +161,12 @@ inline mlir::Type unwrapSequenceType(mlir::Type t) {
   return t;
 }
 
+inline mlir::Type unwrapRefType(mlir::Type t) {
+  if (auto eleTy = dyn_cast_ptrEleTy(t))
+    return eleTy;
+  return t;
+}
+
 #ifndef NDEBUG
 // !fir.ptr<X> and !fir.heap<X> where X is !fir.ptr, !fir.heap, or !fir.ref
 // is undefined and disallowed.

diff  --git a/flang/lib/Optimizer/Builder/CMakeLists.txt b/flang/lib/Optimizer/Builder/CMakeLists.txt
index f4aafeb94a8e4..afcd4d34a7836 100644
--- a/flang/lib/Optimizer/Builder/CMakeLists.txt
+++ b/flang/lib/Optimizer/Builder/CMakeLists.txt
@@ -1,6 +1,8 @@
 get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+
 add_flang_library(FIRBuilder
   BoxValue.cpp
+  Character.cpp
   DoLoopHelper.cpp
   FIRBuilder.cpp
 

diff  --git a/flang/lib/Optimizer/Builder/Character.cpp b/flang/lib/Optimizer/Builder/Character.cpp
new file mode 100644
index 0000000000000..7cd2b11f8cc4f
--- /dev/null
+++ b/flang/lib/Optimizer/Builder/Character.cpp
@@ -0,0 +1,725 @@
+//===-- Character.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/Character.h"
+#include "flang/Lower/Todo.h"
+#include "flang/Optimizer/Builder/DoLoopHelper.h"
+#include "llvm/Support/Debug.h"
+#include <optional>
+
+#define DEBUG_TYPE "flang-lower-character"
+
+//===----------------------------------------------------------------------===//
+// CharacterExprHelper implementation
+//===----------------------------------------------------------------------===//
+
+/// Unwrap base fir.char<kind,len> type.
+static fir::CharacterType recoverCharacterType(mlir::Type type) {
+  if (auto boxType = type.dyn_cast<fir::BoxCharType>())
+    return boxType.getEleTy();
+  while (true) {
+    type = fir::unwrapRefType(type);
+    if (auto boxTy = type.dyn_cast<fir::BoxType>())
+      type = boxTy.getEleTy();
+    else
+      break;
+  }
+  return fir::unwrapSequenceType(type).cast<fir::CharacterType>();
+}
+
+/// Get fir.char<kind> type with the same kind as inside str.
+fir::CharacterType
+fir::factory::CharacterExprHelper::getCharacterType(mlir::Type type) {
+  assert(isCharacterScalar(type) && "expected scalar character");
+  return recoverCharacterType(type);
+}
+
+fir::CharacterType fir::factory::CharacterExprHelper::getCharacterType(
+    const fir::CharBoxValue &box) {
+  return getCharacterType(box.getBuffer().getType());
+}
+
+fir::CharacterType
+fir::factory::CharacterExprHelper::getCharacterType(mlir::Value str) {
+  return getCharacterType(str.getType());
+}
+
+/// Determine the static size of the character. Returns the computed size, not
+/// an IR Value.
+static std::optional<fir::CharacterType::LenType>
+getCompileTimeLength(const fir::CharBoxValue &box) {
+  auto len = recoverCharacterType(box.getBuffer().getType()).getLen();
+  if (len == fir::CharacterType::unknownLen())
+    return {};
+  return len;
+}
+
+/// Detect the precondition that the value `str` does not reside in memory. Such
+/// values will have a type `!fir.array<...x!fir.char<N>>` or `!fir.char<N>`.
+LLVM_ATTRIBUTE_UNUSED static bool needToMaterialize(mlir::Value str) {
+  return str.getType().isa<fir::SequenceType>() || fir::isa_char(str.getType());
+}
+
+/// Unwrap integer constant from mlir::Value.
+static llvm::Optional<std::int64_t> getIntIfConstant(mlir::Value value) {
+  if (auto *definingOp = value.getDefiningOp())
+    if (auto cst = mlir::dyn_cast<mlir::ConstantOp>(definingOp))
+      if (auto intAttr = cst.getValue().dyn_cast<mlir::IntegerAttr>())
+        return intAttr.getInt();
+  return {};
+}
+
+/// This is called only if `str` does not reside in memory. Such a bare string
+/// value will be converted into a memory-based temporary and an extended
+/// boxchar value returned.
+fir::CharBoxValue
+fir::factory::CharacterExprHelper::materializeValue(mlir::Value str) {
+  assert(needToMaterialize(str));
+  auto ty = str.getType();
+  assert(isCharacterScalar(ty) && "expected scalar character");
+  auto charTy = ty.dyn_cast<fir::CharacterType>();
+  if (!charTy || charTy.getLen() == fir::CharacterType::unknownLen()) {
+    LLVM_DEBUG(llvm::dbgs() << "cannot materialize: " << str << '\n');
+    llvm_unreachable("must be a !fir.char<N> type");
+  }
+  auto len = builder.createIntegerConstant(
+      loc, builder.getCharacterLengthType(), charTy.getLen());
+  auto temp = builder.create<fir::AllocaOp>(loc, charTy);
+  builder.create<fir::StoreOp>(loc, str, temp);
+  LLVM_DEBUG(llvm::dbgs() << "materialized as local: " << str << " -> (" << temp
+                          << ", " << len << ")\n");
+  return {temp, len};
+}
+
+fir::ExtendedValue
+fir::factory::CharacterExprHelper::toExtendedValue(mlir::Value character,
+                                                   mlir::Value len) {
+  auto lenType = builder.getCharacterLengthType();
+  auto type = character.getType();
+  auto base = fir::isa_passbyref_type(type) ? character : mlir::Value{};
+  auto resultLen = len;
+  llvm::SmallVector<mlir::Value> extents;
+
+  if (auto eleType = fir::dyn_cast_ptrEleTy(type))
+    type = eleType;
+
+  if (auto arrayType = type.dyn_cast<fir::SequenceType>()) {
+    type = arrayType.getEleTy();
+    auto indexType = builder.getIndexType();
+    for (auto extent : arrayType.getShape()) {
+      if (extent == fir::SequenceType::getUnknownExtent())
+        break;
+      extents.emplace_back(
+          builder.createIntegerConstant(loc, indexType, extent));
+    }
+    // Last extent might be missing in case of assumed-size. If more extents
+    // could not be deduced from type, that's an error (a fir.box should
+    // have been used in the interface).
+    if (extents.size() + 1 < arrayType.getShape().size())
+      mlir::emitError(loc, "cannot retrieve array extents from type");
+  }
+
+  if (auto charTy = type.dyn_cast<fir::CharacterType>()) {
+    if (!resultLen && charTy.getLen() != fir::CharacterType::unknownLen())
+      resultLen = builder.createIntegerConstant(loc, lenType, charTy.getLen());
+  } else if (auto boxCharType = type.dyn_cast<fir::BoxCharType>()) {
+    auto refType = builder.getRefType(boxCharType.getEleTy());
+    // If the embox is accessible, use its operand to avoid filling
+    // the generated fir with embox/unbox.
+    mlir::Value boxCharLen;
+    if (auto *definingOp = character.getDefiningOp()) {
+      if (auto box = dyn_cast<fir::EmboxCharOp>(definingOp)) {
+        base = box.memref();
+        boxCharLen = box.len();
+      }
+    }
+    if (!boxCharLen) {
+      auto unboxed =
+          builder.create<fir::UnboxCharOp>(loc, refType, lenType, character);
+      base = builder.createConvert(loc, refType, unboxed.getResult(0));
+      boxCharLen = unboxed.getResult(1);
+    }
+    if (!resultLen) {
+      resultLen = boxCharLen;
+    }
+  } else if (type.isa<fir::BoxType>()) {
+    mlir::emitError(loc, "descriptor or derived type not yet handled");
+  } else {
+    llvm_unreachable("Cannot translate mlir::Value to character ExtendedValue");
+  }
+
+  if (!base) {
+    if (auto load =
+            mlir::dyn_cast_or_null<fir::LoadOp>(character.getDefiningOp())) {
+      base = load.getOperand();
+    } else {
+      return materializeValue(fir::getBase(character));
+    }
+  }
+  if (!resultLen)
+    llvm::report_fatal_error("no dynamic length found for character");
+  if (!extents.empty())
+    return fir::CharArrayBoxValue{base, resultLen, extents};
+  return fir::CharBoxValue{base, resultLen};
+}
+
+static mlir::Type getSingletonCharType(mlir::MLIRContext *ctxt, int kind) {
+  return fir::CharacterType::getSingleton(ctxt, kind);
+}
+
+mlir::Value
+fir::factory::CharacterExprHelper::createEmbox(const fir::CharBoxValue &box) {
+  // Base CharBoxValue of CharArrayBoxValue are ok here (do not require a scalar
+  // type)
+  auto charTy = recoverCharacterType(box.getBuffer().getType());
+  auto boxCharType =
+      fir::BoxCharType::get(builder.getContext(), charTy.getFKind());
+  auto refType = fir::ReferenceType::get(boxCharType.getEleTy());
+  mlir::Value buff = box.getBuffer();
+  // fir.boxchar requires a memory reference. Allocate temp if the character is
+  // not in memory.
+  if (!fir::isa_ref_type(buff.getType())) {
+    auto temp = builder.createTemporary(loc, buff.getType());
+    builder.create<fir::StoreOp>(loc, buff, temp);
+    buff = temp;
+  }
+  buff = builder.createConvert(loc, refType, buff);
+  // Convert in case the provided length is not of the integer type that must
+  // be used in boxchar.
+  auto len = builder.createConvert(loc, builder.getCharacterLengthType(),
+                                   box.getLen());
+  return builder.create<fir::EmboxCharOp>(loc, boxCharType, buff, len);
+}
+
+fir::CharBoxValue fir::factory::CharacterExprHelper::toScalarCharacter(
+    const fir::CharArrayBoxValue &box) {
+  if (box.getBuffer().getType().isa<fir::PointerType>())
+    TODO(loc, "concatenating non contiguous character array into a scalar");
+
+  // TODO: add a fast path multiplying new length at compile time if the info is
+  // in the array type.
+  auto lenType = builder.getCharacterLengthType();
+  auto len = builder.createConvert(loc, lenType, box.getLen());
+  for (auto extent : box.getExtents())
+    len = builder.create<arith::MulIOp>(
+        loc, len, builder.createConvert(loc, lenType, extent));
+
+  // TODO: typeLen can be improved in compiled constant cases
+  // TODO: allow bare fir.array<> (no ref) conversion here ?
+  auto typeLen = fir::CharacterType::unknownLen();
+  auto kind = recoverCharacterType(box.getBuffer().getType()).getFKind();
+  auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen);
+  auto type = fir::ReferenceType::get(charTy);
+  auto buffer = builder.createConvert(loc, type, box.getBuffer());
+  return {buffer, len};
+}
+
+mlir::Value fir::factory::CharacterExprHelper::createEmbox(
+    const fir::CharArrayBoxValue &box) {
+  // Use same embox as for scalar. It's losing the actual data size information
+  // (We do not multiply the length by the array size), but that is what Fortran
+  // call interfaces using boxchar expect.
+  return createEmbox(static_cast<const fir::CharBoxValue &>(box));
+}
+
+/// Get the address of the element at position \p index of the scalar character
+/// \p buffer.
+/// \p buffer must be of type !fir.ref<fir.char<k, len>>. The length may be
+/// unknown. \p index must have any integer type, and is zero based. The return
+/// value is a singleton address (!fir.ref<!fir.char<kind>>)
+mlir::Value
+fir::factory::CharacterExprHelper::createElementAddr(mlir::Value buffer,
+                                                     mlir::Value index) {
+  // The only way to address an element of a fir.ref<char<kind, len>> is to cast
+  // it to a fir.array<len x fir.char<kind>> and use fir.coordinate_of.
+  auto bufferType = buffer.getType();
+  assert(fir::isa_ref_type(bufferType));
+  assert(isCharacterScalar(bufferType));
+  auto charTy = recoverCharacterType(bufferType);
+  auto singleTy = getSingletonCharType(builder.getContext(), charTy.getFKind());
+  auto singleRefTy = builder.getRefType(singleTy);
+  auto extent = fir::SequenceType::getUnknownExtent();
+  if (charTy.getLen() != fir::CharacterType::unknownLen())
+    extent = charTy.getLen();
+  auto coorTy = builder.getRefType(fir::SequenceType::get({extent}, singleTy));
+
+  auto coor = builder.createConvert(loc, coorTy, buffer);
+  auto i = builder.createConvert(loc, builder.getIndexType(), index);
+  return builder.create<fir::CoordinateOp>(loc, singleRefTy, coor, i);
+}
+
+/// Load a character out of `buff` from offset `index`.
+/// `buff` must be a reference to memory.
+mlir::Value
+fir::factory::CharacterExprHelper::createLoadCharAt(mlir::Value buff,
+                                                    mlir::Value index) {
+  LLVM_DEBUG(llvm::dbgs() << "load a char: " << buff << " type: "
+                          << buff.getType() << " at: " << index << '\n');
+  return builder.create<fir::LoadOp>(loc, createElementAddr(buff, index));
+}
+
+/// Store the singleton character `c` to `str` at offset `index`.
+/// `str` must be a reference to memory.
+void fir::factory::CharacterExprHelper::createStoreCharAt(mlir::Value str,
+                                                          mlir::Value index,
+                                                          mlir::Value c) {
+  LLVM_DEBUG(llvm::dbgs() << "store the char: " << c << " into: " << str
+                          << " type: " << str.getType() << " at: " << index
+                          << '\n');
+  auto addr = createElementAddr(str, index);
+  builder.create<fir::StoreOp>(loc, c, addr);
+}
+
+// FIXME: this temp is useless... either fir.coordinate_of needs to
+// work on "loaded" characters (!fir.array<len x fir.char<kind>>) or
+// character should never be loaded.
+// If this is a fir.array<>, allocate and store the value so that
+// fir.cooridnate_of can be use on the value.
+mlir::Value fir::factory::CharacterExprHelper::getCharBoxBuffer(
+    const fir::CharBoxValue &box) {
+  auto buff = box.getBuffer();
+  if (fir::isa_char(buff.getType())) {
+    auto newBuff = builder.create<fir::AllocaOp>(loc, buff.getType());
+    builder.create<fir::StoreOp>(loc, buff, newBuff);
+    return newBuff;
+  }
+  return buff;
+}
+
+/// Get the LLVM intrinsic for `memcpy`. Use the 64 bit version.
+mlir::FuncOp fir::factory::getLlvmMemcpy(fir::FirOpBuilder &builder) {
+  auto ptrTy = builder.getRefType(builder.getIntegerType(8));
+  llvm::SmallVector<mlir::Type> args = {ptrTy, ptrTy, builder.getI64Type(),
+                                        builder.getI1Type()};
+  auto memcpyTy =
+      mlir::FunctionType::get(builder.getContext(), args, llvm::None);
+  return builder.addNamedFunction(builder.getUnknownLoc(),
+                                  "llvm.memcpy.p0i8.p0i8.i64", memcpyTy);
+}
+
+/// Get the LLVM intrinsic for `memmove`. Use the 64 bit version.
+mlir::FuncOp fir::factory::getLlvmMemmove(fir::FirOpBuilder &builder) {
+  auto ptrTy = builder.getRefType(builder.getIntegerType(8));
+  llvm::SmallVector<mlir::Type> args = {ptrTy, ptrTy, builder.getI64Type(),
+                                        builder.getI1Type()};
+  auto memmoveTy =
+      mlir::FunctionType::get(builder.getContext(), args, llvm::None);
+  return builder.addNamedFunction(builder.getUnknownLoc(),
+                                  "llvm.memmove.p0i8.p0i8.i64", memmoveTy);
+}
+
+/// Get the LLVM intrinsic for `memset`. Use the 64 bit version.
+mlir::FuncOp fir::factory::getLlvmMemset(fir::FirOpBuilder &builder) {
+  auto ptrTy = builder.getRefType(builder.getIntegerType(8));
+  llvm::SmallVector<mlir::Type> args = {ptrTy, ptrTy, builder.getI64Type(),
+                                        builder.getI1Type()};
+  auto memsetTy =
+      mlir::FunctionType::get(builder.getContext(), args, llvm::None);
+  return builder.addNamedFunction(builder.getUnknownLoc(),
+                                  "llvm.memset.p0i8.p0i8.i64", memsetTy);
+}
+
+/// Get the standard `realloc` function.
+mlir::FuncOp fir::factory::getRealloc(fir::FirOpBuilder &builder) {
+  auto ptrTy = builder.getRefType(builder.getIntegerType(8));
+  llvm::SmallVector<mlir::Type> args = {ptrTy, builder.getI64Type()};
+  auto reallocTy = mlir::FunctionType::get(builder.getContext(), args, {ptrTy});
+  return builder.addNamedFunction(builder.getUnknownLoc(), "realloc",
+                                  reallocTy);
+}
+
+/// Create a loop to copy `count` characters from `src` to `dest`. Note that the
+/// KIND indicates the number of bits in a code point. (ASCII, UCS-2, or UCS-4.)
+void fir::factory::CharacterExprHelper::createCopy(
+    const fir::CharBoxValue &dest, const fir::CharBoxValue &src,
+    mlir::Value count) {
+  auto fromBuff = getCharBoxBuffer(src);
+  auto toBuff = getCharBoxBuffer(dest);
+  LLVM_DEBUG(llvm::dbgs() << "create char copy from: "; src.dump();
+             llvm::dbgs() << " to: "; dest.dump();
+             llvm::dbgs() << " count: " << count << '\n');
+  auto kind = getCharacterKind(src.getBuffer().getType());
+  // If the src and dest are the same KIND, then use memmove to move the bits.
+  // We don't have to worry about overlapping ranges with memmove.
+  if (getCharacterKind(dest.getBuffer().getType()) == kind) {
+    auto bytes = builder.getKindMap().getCharacterBitsize(kind) / 8;
+    auto i64Ty = builder.getI64Type();
+    auto kindBytes = builder.createIntegerConstant(loc, i64Ty, bytes);
+    auto castCount = builder.createConvert(loc, i64Ty, count);
+    auto totalBytes = builder.create<arith::MulIOp>(loc, kindBytes, castCount);
+    auto notVolatile = builder.createBool(loc, false);
+    auto memmv = getLlvmMemmove(builder);
+    auto argTys = memmv.getType().getInputs();
+    auto toPtr = builder.createConvert(loc, argTys[0], toBuff);
+    auto fromPtr = builder.createConvert(loc, argTys[1], fromBuff);
+    builder.create<fir::CallOp>(
+        loc, memmv, mlir::ValueRange{toPtr, fromPtr, totalBytes, notVolatile});
+    return;
+  }
+
+  // Convert a CHARACTER of one KIND into a CHARACTER of another KIND.
+  builder.create<fir::CharConvertOp>(loc, src.getBuffer(), count,
+                                     dest.getBuffer());
+}
+
+void fir::factory::CharacterExprHelper::createPadding(
+    const fir::CharBoxValue &str, mlir::Value lower, mlir::Value upper) {
+  auto blank = createBlankConstant(getCharacterType(str));
+  // Always create the loop, if upper < lower, no iteration will be
+  // executed.
+  auto toBuff = getCharBoxBuffer(str);
+  fir::factory::DoLoopHelper{builder, loc}.createLoop(
+      lower, upper, [&](fir::FirOpBuilder &, mlir::Value index) {
+        createStoreCharAt(toBuff, index, blank);
+      });
+}
+
+fir::CharBoxValue
+fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
+                                                       mlir::Value len) {
+  auto kind = recoverCharacterType(type).getFKind();
+  auto typeLen = fir::CharacterType::unknownLen();
+  // If len is a constant, reflect the length in the type.
+  if (auto cstLen = getIntIfConstant(len))
+    typeLen = *cstLen;
+  auto *ctxt = builder.getContext();
+  auto charTy = fir::CharacterType::get(ctxt, kind, typeLen);
+  llvm::SmallVector<mlir::Value> lenParams;
+  if (typeLen == fir::CharacterType::unknownLen())
+    lenParams.push_back(len);
+  auto ref = builder.allocateLocal(loc, charTy, "", ".chrtmp",
+                                   /*shape=*/llvm::None, lenParams);
+  return {ref, len};
+}
+
+fir::CharBoxValue fir::factory::CharacterExprHelper::createTempFrom(
+    const fir::ExtendedValue &source) {
+  const auto *charBox = source.getCharBox();
+  if (!charBox)
+    fir::emitFatalError(loc, "source must be a fir::CharBoxValue");
+  auto len = charBox->getLen();
+  auto sourceTy = charBox->getBuffer().getType();
+  auto temp = createCharacterTemp(sourceTy, len);
+  if (fir::isa_ref_type(sourceTy)) {
+    createCopy(temp, *charBox, len);
+  } else {
+    auto ref = builder.createConvert(loc, builder.getRefType(sourceTy),
+                                     temp.getBuffer());
+    builder.create<fir::StoreOp>(loc, charBox->getBuffer(), ref);
+  }
+  return temp;
+}
+
+// Simple length one character assignment without loops.
+void fir::factory::CharacterExprHelper::createLengthOneAssign(
+    const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
+  auto addr = lhs.getBuffer();
+  mlir::Value val = builder.create<fir::LoadOp>(loc, rhs.getBuffer());
+  auto addrTy = builder.getRefType(val.getType());
+  addr = builder.createConvert(loc, addrTy, addr);
+  builder.create<fir::StoreOp>(loc, val, addr);
+}
+
+/// Returns the minimum of integer mlir::Value \p a and \b.
+mlir::Value genMin(fir::FirOpBuilder &builder, mlir::Location loc,
+                   mlir::Value a, mlir::Value b) {
+  auto cmp =
+      builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt, a, b);
+  return builder.create<mlir::SelectOp>(loc, cmp, a, b);
+}
+
+void fir::factory::CharacterExprHelper::createAssign(
+    const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
+  auto rhsCstLen = getCompileTimeLength(rhs);
+  auto lhsCstLen = getCompileTimeLength(lhs);
+  bool compileTimeSameLength =
+      lhsCstLen && rhsCstLen && *lhsCstLen == *rhsCstLen;
+
+  if (compileTimeSameLength && *lhsCstLen == 1) {
+    createLengthOneAssign(lhs, rhs);
+    return;
+  }
+
+  // Copy the minimum of the lhs and rhs lengths and pad the lhs remainder
+  // if needed.
+  auto copyCount = lhs.getLen();
+  auto idxTy = builder.getIndexType();
+  if (!compileTimeSameLength) {
+    auto lhsLen = builder.createConvert(loc, idxTy, lhs.getLen());
+    auto rhsLen = builder.createConvert(loc, idxTy, rhs.getLen());
+    copyCount = genMin(builder, loc, lhsLen, rhsLen);
+  }
+
+  // Actual copy
+  createCopy(lhs, rhs, copyCount);
+
+  // Pad if needed.
+  if (!compileTimeSameLength) {
+    auto one = builder.createIntegerConstant(loc, lhs.getLen().getType(), 1);
+    auto maxPadding = builder.create<arith::SubIOp>(loc, lhs.getLen(), one);
+    createPadding(lhs, copyCount, maxPadding);
+  }
+}
+
+fir::CharBoxValue fir::factory::CharacterExprHelper::createConcatenate(
+    const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
+  auto lhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
+                                      lhs.getLen());
+  auto rhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
+                                      rhs.getLen());
+  mlir::Value len = builder.create<arith::AddIOp>(loc, lhsLen, rhsLen);
+  auto temp = createCharacterTemp(getCharacterType(rhs), len);
+  createCopy(temp, lhs, lhsLen);
+  auto one = builder.createIntegerConstant(loc, len.getType(), 1);
+  auto upperBound = builder.create<arith::SubIOp>(loc, len, one);
+  auto lhsLenIdx = builder.createConvert(loc, builder.getIndexType(), lhsLen);
+  auto fromBuff = getCharBoxBuffer(rhs);
+  auto toBuff = getCharBoxBuffer(temp);
+  fir::factory::DoLoopHelper{builder, loc}.createLoop(
+      lhsLenIdx, upperBound, one,
+      [&](fir::FirOpBuilder &bldr, mlir::Value index) {
+        auto rhsIndex = bldr.create<arith::SubIOp>(loc, index, lhsLenIdx);
+        auto charVal = createLoadCharAt(fromBuff, rhsIndex);
+        createStoreCharAt(toBuff, index, charVal);
+      });
+  return temp;
+}
+
+fir::CharBoxValue fir::factory::CharacterExprHelper::createSubstring(
+    const fir::CharBoxValue &box, llvm::ArrayRef<mlir::Value> bounds) {
+  // Constant need to be materialize in memory to use fir.coordinate_of.
+  auto nbounds = bounds.size();
+  if (nbounds < 1 || nbounds > 2) {
+    mlir::emitError(loc, "Incorrect number of bounds in substring");
+    return {mlir::Value{}, mlir::Value{}};
+  }
+  mlir::SmallVector<mlir::Value> castBounds;
+  // Convert bounds to length type to do safe arithmetic on it.
+  for (auto bound : bounds)
+    castBounds.push_back(
+        builder.createConvert(loc, builder.getCharacterLengthType(), bound));
+  auto lowerBound = castBounds[0];
+  // FIR CoordinateOp is zero based but Fortran substring are one based.
+  auto one = builder.createIntegerConstant(loc, lowerBound.getType(), 1);
+  auto offset = builder.create<arith::SubIOp>(loc, lowerBound, one).getResult();
+  auto addr = createElementAddr(box.getBuffer(), offset);
+  auto kind = getCharacterKind(box.getBuffer().getType());
+  auto charTy = fir::CharacterType::getUnknownLen(builder.getContext(), kind);
+  auto resultType = builder.getRefType(charTy);
+  auto substringRef = builder.createConvert(loc, resultType, addr);
+
+  // Compute the length.
+  mlir::Value substringLen;
+  if (nbounds < 2) {
+    substringLen =
+        builder.create<arith::SubIOp>(loc, box.getLen(), castBounds[0]);
+  } else {
+    substringLen =
+        builder.create<arith::SubIOp>(loc, castBounds[1], castBounds[0]);
+  }
+  substringLen = builder.create<arith::AddIOp>(loc, substringLen, one);
+
+  // Set length to zero if bounds were reversed (Fortran 2018 9.4.1)
+  auto zero = builder.createIntegerConstant(loc, substringLen.getType(), 0);
+  auto cdt = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt,
+                                           substringLen, zero);
+  substringLen = builder.create<mlir::SelectOp>(loc, cdt, zero, substringLen);
+
+  return {substringRef, substringLen};
+}
+
+mlir::Value
+fir::factory::CharacterExprHelper::createLenTrim(const fir::CharBoxValue &str) {
+  // Note: Runtime for LEN_TRIM should also be available at some
+  // point. For now use an inlined implementation.
+  auto indexType = builder.getIndexType();
+  auto len = builder.createConvert(loc, indexType, str.getLen());
+  auto one = builder.createIntegerConstant(loc, indexType, 1);
+  auto minusOne = builder.createIntegerConstant(loc, indexType, -1);
+  auto zero = builder.createIntegerConstant(loc, indexType, 0);
+  auto trueVal = builder.createIntegerConstant(loc, builder.getI1Type(), 1);
+  auto blank = createBlankConstantCode(getCharacterType(str));
+  mlir::Value lastChar = builder.create<arith::SubIOp>(loc, len, one);
+
+  auto iterWhile =
+      builder.create<fir::IterWhileOp>(loc, lastChar, zero, minusOne, trueVal,
+                                       /*returnFinalCount=*/false, lastChar);
+  auto insPt = builder.saveInsertionPoint();
+  builder.setInsertionPointToStart(iterWhile.getBody());
+  auto index = iterWhile.getInductionVar();
+  // Look for first non-blank from the right of the character.
+  auto fromBuff = getCharBoxBuffer(str);
+  auto elemAddr = createElementAddr(fromBuff, index);
+  auto codeAddr =
+      builder.createConvert(loc, builder.getRefType(blank.getType()), elemAddr);
+  auto c = builder.create<fir::LoadOp>(loc, codeAddr);
+  auto isBlank =
+      builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq, blank, c);
+  llvm::SmallVector<mlir::Value> results = {isBlank, index};
+  builder.create<fir::ResultOp>(loc, results);
+  builder.restoreInsertionPoint(insPt);
+  // Compute length after iteration (zero if all blanks)
+  mlir::Value newLen =
+      builder.create<arith::AddIOp>(loc, iterWhile.getResult(1), one);
+  auto result =
+      builder.create<mlir::SelectOp>(loc, iterWhile.getResult(0), zero, newLen);
+  return builder.createConvert(loc, builder.getCharacterLengthType(), result);
+}
+
+fir::CharBoxValue
+fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
+                                                       int len) {
+  assert(len >= 0 && "expected positive length");
+  auto kind = recoverCharacterType(type).getFKind();
+  auto charType = fir::CharacterType::get(builder.getContext(), kind, len);
+  auto addr = builder.create<fir::AllocaOp>(loc, charType);
+  auto mlirLen =
+      builder.createIntegerConstant(loc, builder.getCharacterLengthType(), len);
+  return {addr, mlirLen};
+}
+
+// Returns integer with code for blank. The integer has the same
+// size as the character. Blank has ascii space code for all kinds.
+mlir::Value fir::factory::CharacterExprHelper::createBlankConstantCode(
+    fir::CharacterType type) {
+  auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
+  auto intType = builder.getIntegerType(bits);
+  return builder.createIntegerConstant(loc, intType, ' ');
+}
+
+mlir::Value fir::factory::CharacterExprHelper::createBlankConstant(
+    fir::CharacterType type) {
+  return createSingletonFromCode(createBlankConstantCode(type),
+                                 type.getFKind());
+}
+
+void fir::factory::CharacterExprHelper::createAssign(
+    const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs) {
+  if (auto *str = rhs.getBoxOf<fir::CharBoxValue>()) {
+    if (auto *to = lhs.getBoxOf<fir::CharBoxValue>()) {
+      createAssign(*to, *str);
+      return;
+    }
+  }
+  TODO(loc, "character array assignment");
+  // Note that it is not sure the array aspect should be handled
+  // by this utility.
+}
+
+mlir::Value
+fir::factory::CharacterExprHelper::createEmboxChar(mlir::Value addr,
+                                                   mlir::Value len) {
+  return createEmbox(fir::CharBoxValue{addr, len});
+}
+
+std::pair<mlir::Value, mlir::Value>
+fir::factory::CharacterExprHelper::createUnboxChar(mlir::Value boxChar) {
+  using T = std::pair<mlir::Value, mlir::Value>;
+  return toExtendedValue(boxChar).match(
+      [](const fir::CharBoxValue &b) -> T {
+        return {b.getBuffer(), b.getLen()};
+      },
+      [](const fir::CharArrayBoxValue &b) -> T {
+        return {b.getBuffer(), b.getLen()};
+      },
+      [](const auto &) -> T { llvm::report_fatal_error("not a character"); });
+}
+
+bool fir::factory::CharacterExprHelper::isCharacterLiteral(mlir::Type type) {
+  if (auto seqType = type.dyn_cast<fir::SequenceType>())
+    return (seqType.getShape().size() == 1) &&
+           fir::isa_char(seqType.getEleTy());
+  return false;
+}
+
+bool fir::factory::CharacterExprHelper::isCharacterScalar(mlir::Type type) {
+  if (type.isa<fir::BoxCharType>())
+    return true;
+  type = fir::unwrapRefType(type);
+  if (auto boxTy = type.dyn_cast<fir::BoxType>())
+    type = boxTy.getEleTy();
+  type = fir::unwrapRefType(type);
+  return !type.isa<fir::SequenceType>() && fir::isa_char(type);
+}
+
+fir::KindTy
+fir::factory::CharacterExprHelper::getCharacterKind(mlir::Type type) {
+  assert(isCharacterScalar(type) && "expected scalar character");
+  return recoverCharacterType(type).getFKind();
+}
+
+fir::KindTy
+fir::factory::CharacterExprHelper::getCharacterOrSequenceKind(mlir::Type type) {
+  return recoverCharacterType(type).getFKind();
+}
+
+bool fir::factory::CharacterExprHelper::isArray(mlir::Type type) {
+  return !isCharacterScalar(type);
+}
+
+bool fir::factory::CharacterExprHelper::hasConstantLengthInType(
+    const fir::ExtendedValue &exv) {
+  auto charTy = recoverCharacterType(fir::getBase(exv).getType());
+  return charTy.hasConstantLen();
+}
+
+mlir::Value
+fir::factory::CharacterExprHelper::createSingletonFromCode(mlir::Value code,
+                                                           int kind) {
+  auto charType = fir::CharacterType::get(builder.getContext(), kind, 1);
+  auto bits = builder.getKindMap().getCharacterBitsize(kind);
+  auto intType = builder.getIntegerType(bits);
+  auto cast = builder.createConvert(loc, intType, code);
+  auto undef = builder.create<fir::UndefOp>(loc, charType);
+  auto zero = builder.createIntegerConstant(loc, builder.getIndexType(), 0);
+  return builder.create<fir::InsertValueOp>(loc, charType, undef, cast, zero);
+}
+
+mlir::Value fir::factory::CharacterExprHelper::extractCodeFromSingleton(
+    mlir::Value singleton) {
+  auto type = getCharacterType(singleton);
+  assert(type.getLen() == 1);
+  auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
+  auto intType = builder.getIntegerType(bits);
+  auto zero = builder.createIntegerConstant(loc, builder.getIndexType(), 0);
+  return builder.create<fir::ExtractValueOp>(loc, intType, singleton, zero);
+}
+
+mlir::Value
+fir::factory::CharacterExprHelper::readLengthFromBox(mlir::Value box) {
+  auto lenTy = builder.getCharacterLengthType();
+  auto size = builder.create<fir::BoxEleSizeOp>(loc, lenTy, box);
+  auto charTy = recoverCharacterType(box.getType());
+  auto bits = builder.getKindMap().getCharacterBitsize(charTy.getFKind());
+  auto width = bits / 8;
+  if (width > 1) {
+    auto widthVal = builder.createIntegerConstant(loc, lenTy, width);
+    return builder.create<arith::DivSIOp>(loc, size, widthVal);
+  }
+  return size;
+}
+
+mlir::Value fir::factory::CharacterExprHelper::getLength(mlir::Value memref) {
+  auto memrefType = memref.getType();
+  auto charType = recoverCharacterType(memrefType);
+  assert(charType && "must be a character type");
+  if (charType.hasConstantLen())
+    return builder.createIntegerConstant(loc, builder.getCharacterLengthType(),
+                                         charType.getLen());
+  if (memrefType.isa<fir::BoxType>())
+    return readLengthFromBox(memref);
+  if (memrefType.isa<fir::BoxCharType>())
+    return createUnboxChar(memref).second;
+
+  // Length cannot be deduced from memref.
+  return {};
+}

diff  --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index ab060af114771..4cd74e5ee60f5 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -11,6 +11,7 @@
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
 #include "flang/Optimizer/Support/FatalError.h"
 #include "flang/Optimizer/Support/InternalNames.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MD5.h"
@@ -112,6 +113,113 @@ mlir::Value fir::FirOpBuilder::createRealConstant(mlir::Location loc,
   llvm_unreachable("should use builtin floating-point type");
 }
 
+static llvm::SmallVector<mlir::Value>
+elideExtentsAlreadyInType(mlir::Type type, mlir::ValueRange shape) {
+  auto arrTy = type.dyn_cast<fir::SequenceType>();
+  if (shape.empty() || !arrTy)
+    return {};
+  // elide the constant dimensions before construction
+  assert(shape.size() == arrTy.getDimension());
+  llvm::SmallVector<mlir::Value> dynamicShape;
+  auto typeShape = arrTy.getShape();
+  for (unsigned i = 0, end = arrTy.getDimension(); i < end; ++i)
+    if (typeShape[i] == fir::SequenceType::getUnknownExtent())
+      dynamicShape.push_back(shape[i]);
+  return dynamicShape;
+}
+
+static llvm::SmallVector<mlir::Value>
+elideLengthsAlreadyInType(mlir::Type type, mlir::ValueRange lenParams) {
+  if (lenParams.empty())
+    return {};
+  if (auto arrTy = type.dyn_cast<fir::SequenceType>())
+    type = arrTy.getEleTy();
+  if (fir::hasDynamicSize(type))
+    return lenParams;
+  return {};
+}
+
+/// Allocate a local variable.
+/// A local variable ought to have a name in the source code.
+mlir::Value fir::FirOpBuilder::allocateLocal(
+    mlir::Location loc, mlir::Type ty, llvm::StringRef uniqName,
+    llvm::StringRef name, bool pinned, llvm::ArrayRef<mlir::Value> shape,
+    llvm::ArrayRef<mlir::Value> lenParams, bool asTarget) {
+  // Convert the shape extents to `index`, as needed.
+  llvm::SmallVector<mlir::Value> indices;
+  llvm::SmallVector<mlir::Value> elidedShape =
+      elideExtentsAlreadyInType(ty, shape);
+  llvm::SmallVector<mlir::Value> elidedLenParams =
+      elideLengthsAlreadyInType(ty, lenParams);
+  auto idxTy = getIndexType();
+  llvm::for_each(elidedShape, [&](mlir::Value sh) {
+    indices.push_back(createConvert(loc, idxTy, sh));
+  });
+  // Add a target attribute, if needed.
+  llvm::SmallVector<mlir::NamedAttribute> attrs;
+  if (asTarget)
+    attrs.emplace_back(
+        mlir::Identifier::get(fir::getTargetAttrName(), getContext()),
+        getUnitAttr());
+  // Create the local variable.
+  if (name.empty()) {
+    if (uniqName.empty())
+      return create<fir::AllocaOp>(loc, ty, pinned, elidedLenParams, indices,
+                                   attrs);
+    return create<fir::AllocaOp>(loc, ty, uniqName, pinned, elidedLenParams,
+                                 indices, attrs);
+  }
+  return create<fir::AllocaOp>(loc, ty, uniqName, name, pinned, elidedLenParams,
+                               indices, attrs);
+}
+
+mlir::Value fir::FirOpBuilder::allocateLocal(
+    mlir::Location loc, mlir::Type ty, llvm::StringRef uniqName,
+    llvm::StringRef name, llvm::ArrayRef<mlir::Value> shape,
+    llvm::ArrayRef<mlir::Value> lenParams, bool asTarget) {
+  return allocateLocal(loc, ty, uniqName, name, /*pinned=*/false, shape,
+                       lenParams, asTarget);
+}
+
+/// Get the block for adding Allocas.
+mlir::Block *fir::FirOpBuilder::getAllocaBlock() {
+  // auto iface =
+  //     getRegion().getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>();
+  // return iface ? iface.getAllocaBlock() : getEntryBlock();
+  return getEntryBlock();
+}
+
+/// Create a temporary variable on the stack. Anonymous temporaries have no
+/// `name` value. Temporaries do not require a uniqued name.
+mlir::Value
+fir::FirOpBuilder::createTemporary(mlir::Location loc, mlir::Type type,
+                                   llvm::StringRef name, mlir::ValueRange shape,
+                                   mlir::ValueRange lenParams,
+                                   llvm::ArrayRef<mlir::NamedAttribute> attrs) {
+  llvm::SmallVector<mlir::Value> dynamicShape =
+      elideExtentsAlreadyInType(type, shape);
+  llvm::SmallVector<mlir::Value> dynamicLength =
+      elideLengthsAlreadyInType(type, lenParams);
+  InsertPoint insPt;
+  const bool hoistAlloc = dynamicShape.empty() && dynamicLength.empty();
+  if (hoistAlloc) {
+    insPt = saveInsertionPoint();
+    setInsertionPointToStart(getAllocaBlock());
+  }
+
+  // If the alloca is inside an OpenMP Op which will be outlined then pin the
+  // alloca here.
+  const bool pinned =
+      getRegion().getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>();
+  assert(!type.isa<fir::ReferenceType>() && "cannot be a reference");
+  auto ae =
+      create<fir::AllocaOp>(loc, type, /*unique_name=*/llvm::StringRef{}, name,
+                            pinned, dynamicLength, dynamicShape, attrs);
+  if (hoistAlloc)
+    restoreInsertionPoint(insPt);
+  return ae;
+}
+
 /// Create a global variable in the (read-only) data section. A global variable
 /// must have a unique name to identify and reference it.
 fir::GlobalOp

diff  --git a/flang/unittests/Optimizer/Builder/CharacterTest.cpp b/flang/unittests/Optimizer/Builder/CharacterTest.cpp
new file mode 100644
index 0000000000000..3c0a859fd2cea
--- /dev/null
+++ b/flang/unittests/Optimizer/Builder/CharacterTest.cpp
@@ -0,0 +1,100 @@
+//===- CharacterTest.cpp -- CharacterExprHelper unit tests ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/Character.h"
+#include "gtest/gtest.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Support/InitFIR.h"
+#include "flang/Optimizer/Support/KindMapping.h"
+
+struct CharacterTest : public testing::Test {
+public:
+  void SetUp() override {
+    llvm::ArrayRef<fir::KindTy> defs;
+    fir::KindMapping kindMap(&context, defs);
+    mlir::OpBuilder builder(&context);
+    auto loc = builder.getUnknownLoc();
+
+    // Set up a Module with a dummy function operation inside.
+    // Set the insertion point in the function entry block.
+    mlir::ModuleOp mod = builder.create<mlir::ModuleOp>(loc);
+    mlir::FuncOp func = mlir::FuncOp::create(
+        loc, "func1", builder.getFunctionType(llvm::None, llvm::None));
+    auto *entryBlock = func.addEntryBlock();
+    mod.push_back(mod);
+    builder.setInsertionPointToStart(entryBlock);
+
+    fir::support::loadDialects(context);
+    firBuilder = std::make_unique<fir::FirOpBuilder>(mod, kindMap);
+  }
+
+  fir::FirOpBuilder &getBuilder() { return *firBuilder; }
+
+  mlir::MLIRContext context;
+  std::unique_ptr<fir::FirOpBuilder> firBuilder;
+};
+
+static void checkIntegerConstant(mlir::Value value, mlir::Type ty, int64_t v) {
+  EXPECT_TRUE(mlir::isa<ConstantOp>(value.getDefiningOp()));
+  auto cstOp = dyn_cast<ConstantOp>(value.getDefiningOp());
+  EXPECT_EQ(ty, cstOp.getType());
+  auto valueAttr = cstOp.getValue().dyn_cast_or_null<IntegerAttr>();
+  EXPECT_EQ(v, valueAttr.getInt());
+}
+
+TEST_F(CharacterTest, smallUtilityFunctions) {
+  auto builder = getBuilder();
+  auto loc = builder.getUnknownLoc();
+  llvm::StringRef strValue("onestringliteral");
+  auto strLit = fir::factory::createStringLiteral(builder, loc, strValue);
+  EXPECT_TRUE(
+      fir::factory::CharacterExprHelper::hasConstantLengthInType(strLit));
+  auto ty = strLit.getCharBox()->getAddr().getType();
+  EXPECT_TRUE(fir::factory::CharacterExprHelper::isCharacterScalar(ty));
+  EXPECT_EQ(builder.getKindMap().defaultCharacterKind(),
+      fir::factory::CharacterExprHelper::getCharacterKind(ty));
+  EXPECT_EQ(builder.getKindMap().defaultCharacterKind(),
+      fir::factory::CharacterExprHelper::getCharacterOrSequenceKind(ty));
+}
+
+TEST_F(CharacterTest, createConcatenate) {
+  auto builder = getBuilder();
+  auto loc = builder.getUnknownLoc();
+  auto charHelper = fir::factory::CharacterExprHelper(builder, loc);
+  llvm::StringRef lhs("rightsideofconcat");
+  llvm::StringRef rhs("leftsideofconcat");
+  auto strLitLhs = fir::factory::createStringLiteral(builder, loc, lhs);
+  auto strLitRhs = fir::factory::createStringLiteral(builder, loc, rhs);
+  auto concat = charHelper.createConcatenate(
+      *strLitRhs.getCharBox(), *strLitLhs.getCharBox());
+  EXPECT_TRUE(mlir::isa<arith::AddIOp>(concat.getLen().getDefiningOp()));
+  auto addOp = dyn_cast<arith::AddIOp>(concat.getLen().getDefiningOp());
+  EXPECT_TRUE(mlir::isa<ConstantOp>(addOp.lhs().getDefiningOp()));
+  auto lhsCstOp = dyn_cast<ConstantOp>(addOp.lhs().getDefiningOp());
+  EXPECT_TRUE(mlir::isa<ConstantOp>(addOp.rhs().getDefiningOp()));
+  auto rhsCstOp = dyn_cast<ConstantOp>(addOp.rhs().getDefiningOp());
+  checkIntegerConstant(lhsCstOp, builder.getCharacterLengthType(), 16);
+  checkIntegerConstant(rhsCstOp, builder.getCharacterLengthType(), 17);
+}
+
+TEST_F(CharacterTest, createSubstring) {
+  auto builder = getBuilder();
+  auto loc = builder.getUnknownLoc();
+  auto charHelper = fir::factory::CharacterExprHelper(builder, loc);
+  llvm::StringRef data("a dummy string to test substring");
+  auto str = fir::factory::createStringLiteral(builder, loc, data);
+  auto lb = builder.createIntegerConstant(loc, builder.getI64Type(), 18);
+  auto ub = builder.createIntegerConstant(loc, builder.getI64Type(), 22);
+  auto substr = charHelper.createSubstring(*str.getCharBox(), {lb, ub});
+  EXPECT_FALSE(
+      fir::factory::CharacterExprHelper::hasConstantLengthInType(substr));
+  EXPECT_FALSE(charHelper.getCharacterType(substr).hasConstantLen());
+  EXPECT_FALSE(fir::factory::CharacterExprHelper::isArray(
+      charHelper.getCharacterType(substr)));
+}

diff  --git a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
index 2942477a5c2c6..b286d6add12a3 100644
--- a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
+++ b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
@@ -318,3 +318,20 @@ TEST_F(FIRBuilderTest, createStringLiteral) {
     EXPECT_EQ(strValue, stringLit.getValue().dyn_cast<StringAttr>().getValue());
   }
 }
+
+TEST_F(FIRBuilderTest, allocateLocal) {
+  auto builder = getBuilder();
+  auto loc = builder.getUnknownLoc();
+  llvm::StringRef varName = "var1";
+  auto var = builder.allocateLocal(
+      loc, builder.getI64Type(), "", varName, {}, {}, false);
+  EXPECT_TRUE(mlir::isa<fir::AllocaOp>(var.getDefiningOp()));
+  auto allocaOp = dyn_cast<fir::AllocaOp>(var.getDefiningOp());
+  EXPECT_EQ(builder.getI64Type(), allocaOp.in_type());
+  EXPECT_TRUE(allocaOp.bindc_name().hasValue());
+  EXPECT_EQ(varName, allocaOp.bindc_name().getValue());
+  EXPECT_FALSE(allocaOp.uniq_name().hasValue());
+  EXPECT_FALSE(allocaOp.pinned());
+  EXPECT_EQ(0u, allocaOp.typeparams().size());
+  EXPECT_EQ(0u, allocaOp.shape().size());
+}

diff  --git a/flang/unittests/Optimizer/CMakeLists.txt b/flang/unittests/Optimizer/CMakeLists.txt
index a8168fd4a3340..8d280e936d244 100644
--- a/flang/unittests/Optimizer/CMakeLists.txt
+++ b/flang/unittests/Optimizer/CMakeLists.txt
@@ -9,6 +9,7 @@ set(LIBS
 )
 
 add_flang_unittest(FlangOptimizerTests
+  Builder/CharacterTest.cpp
   Builder/DoLoopHelperTest.cpp
   Builder/FIRBuilderTest.cpp
   FIRContextTest.cpp


        


More information about the flang-commits mailing list