[flang-commits] [flang] d37250c - [flang][runtime] Fixes for element size calculation.

Slava Zakharin via flang-commits flang-commits at lists.llvm.org
Thu Sep 22 10:13:01 PDT 2022


Author: Slava Zakharin
Date: 2022-09-22T10:10:42-07:00
New Revision: d37250c9db48d12b6ba66e10a2ccaf8e3e66b547

URL: https://github.com/llvm/llvm-project/commit/d37250c9db48d12b6ba66e10a2ccaf8e3e66b547
DIFF: https://github.com/llvm/llvm-project/commit/d37250c9db48d12b6ba66e10a2ccaf8e3e66b547.diff

LOG: [flang][runtime] Fixes for element size calculation.

BytesFor() used to return KIND for the size, which is not always
correct, so I changed it to return the size of the actual CppType
corresponding to the given category and kind.

MinElemLen() used to calculate size incorrectly (e.g. CFI_type_extended_double
was sized 10, whereas it may occupy more bytes on a target), so I changed it
to call BytesFor().

Additional changes were needed to resolve new failures for transformational
intrinsics. These intrinsics used to work for not fully supported
data types (e.g. REAL(3)), but now stopped working because CppType
cannot be computed for those categories/kinds. The solution is to use
known element size from the source argument(s) for establishing
the destination descriptor - the element size is all that is needed
for transformational intrinsics to keep working.

Note that this does not help cases, where runtime still has to
compute the element size, e.g. when it creates descriptors for
components of derived types. If the component has unsupported
data type, BytesFor() will still fail. So these cases require
adding support for the missing types.

New regression unit test in Runtime/Transformational.cpp
demonstrates the case that will start working properly with
this commit.

Added: 
    flang/runtime/ISO_Fortran_util.h

Modified: 
    flang/include/flang/Runtime/descriptor.h
    flang/runtime/ISO_Fortran_binding.cpp
    flang/runtime/descriptor.cpp
    flang/runtime/extrema.cpp
    flang/runtime/reduction-templates.h
    flang/runtime/reduction.cpp
    flang/runtime/type-code.cpp
    flang/unittests/Runtime/Transformational.cpp

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Runtime/descriptor.h b/flang/include/flang/Runtime/descriptor.h
index 3bae3ef893550..9c6d6de0690e4 100644
--- a/flang/include/flang/Runtime/descriptor.h
+++ b/flang/include/flang/Runtime/descriptor.h
@@ -145,9 +145,10 @@ class Descriptor {
   Descriptor(const Descriptor &);
   Descriptor &operator=(const Descriptor &);
 
-  static constexpr std::size_t BytesFor(TypeCategory category, int kind) {
-    return category == TypeCategory::Complex ? kind * 2 : kind;
-  }
+  // Returns the number of bytes occupied by an element of the given
+  // category and kind including any alignment padding required
+  // between adjacent elements.
+  static std::size_t BytesFor(TypeCategory category, int kind);
 
   void Establish(TypeCode t, std::size_t elementBytes, void *p = nullptr,
       int rank = maxRank, const SubscriptValue *extent = nullptr,

diff  --git a/flang/runtime/ISO_Fortran_binding.cpp b/flang/runtime/ISO_Fortran_binding.cpp
index e4d8cc286cad7..a9e2c3e11eb08 100644
--- a/flang/runtime/ISO_Fortran_binding.cpp
+++ b/flang/runtime/ISO_Fortran_binding.cpp
@@ -10,20 +10,15 @@
 // as specified in section 18.5.5 of Fortran 2018.
 
 #include "flang/ISO_Fortran_binding.h"
+#include "ISO_Fortran_util.h"
+#include "terminator.h"
 #include "flang/Runtime/descriptor.h"
+#include "flang/Runtime/type-code.h"
 #include <cstdlib>
 
 namespace Fortran::ISO {
 extern "C" {
 
-static inline constexpr bool IsCharacterType(CFI_type_t ty) {
-  return ty == CFI_type_char || ty == CFI_type_char16_t ||
-      ty == CFI_type_char32_t;
-}
-static inline constexpr bool IsAssumedSize(const CFI_cdesc_t *dv) {
-  return dv->rank > 0 && dv->dim[dv->rank - 1].extent == -1;
-}
-
 void *CFI_address(
     const CFI_cdesc_t *descriptor, const CFI_index_t subscripts[]) {
   char *p{static_cast<char *>(descriptor->base_addr)};
@@ -106,184 +101,23 @@ int CFI_deallocate(CFI_cdesc_t *descriptor) {
   return CFI_SUCCESS;
 }
 
-static constexpr std::size_t MinElemLen(CFI_type_t type) {
-  std::size_t minElemLen{0};
-  switch (type) {
-  case CFI_type_signed_char:
-    minElemLen = sizeof(signed char);
-    break;
-  case CFI_type_short:
-    minElemLen = sizeof(short);
-    break;
-  case CFI_type_int:
-    minElemLen = sizeof(int);
-    break;
-  case CFI_type_long:
-    minElemLen = sizeof(long);
-    break;
-  case CFI_type_long_long:
-    minElemLen = sizeof(long long);
-    break;
-  case CFI_type_size_t:
-    minElemLen = sizeof(std::size_t);
-    break;
-  case CFI_type_int8_t:
-    minElemLen = sizeof(std::int8_t);
-    break;
-  case CFI_type_int16_t:
-    minElemLen = sizeof(std::int16_t);
-    break;
-  case CFI_type_int32_t:
-    minElemLen = sizeof(std::int32_t);
-    break;
-  case CFI_type_int64_t:
-    minElemLen = sizeof(std::int64_t);
-    break;
-  case CFI_type_int128_t:
-    minElemLen = 2 * sizeof(std::int64_t);
-    break;
-  case CFI_type_int_least8_t:
-    minElemLen = sizeof(std::int_least8_t);
-    break;
-  case CFI_type_int_least16_t:
-    minElemLen = sizeof(std::int_least16_t);
-    break;
-  case CFI_type_int_least32_t:
-    minElemLen = sizeof(std::int_least32_t);
-    break;
-  case CFI_type_int_least64_t:
-    minElemLen = sizeof(std::int_least64_t);
-    break;
-  case CFI_type_int_least128_t:
-    minElemLen = 2 * sizeof(std::int_least64_t);
-    break;
-  case CFI_type_int_fast8_t:
-    minElemLen = sizeof(std::int_fast8_t);
-    break;
-  case CFI_type_int_fast16_t:
-    minElemLen = sizeof(std::int_fast16_t);
-    break;
-  case CFI_type_int_fast32_t:
-    minElemLen = sizeof(std::int_fast32_t);
-    break;
-  case CFI_type_int_fast64_t:
-    minElemLen = sizeof(std::int_fast64_t);
-    break;
-  case CFI_type_intmax_t:
-    minElemLen = sizeof(std::intmax_t);
-    break;
-  case CFI_type_intptr_t:
-    minElemLen = sizeof(std::intptr_t);
-    break;
-  case CFI_type_ptr
diff _t:
-    minElemLen = sizeof(std::ptr
diff _t);
-    break;
-  case CFI_type_half_float:
-    minElemLen = 2;
-    break;
-  case CFI_type_bfloat:
-    minElemLen = 2;
-    break;
-  case CFI_type_float:
-    minElemLen = sizeof(float);
-    break;
-  case CFI_type_double:
-    minElemLen = sizeof(double);
-    break;
-  case CFI_type_extended_double:
-    minElemLen = 10;
-    break;
-  case CFI_type_long_double:
-    minElemLen = sizeof(long double);
-    break;
-  case CFI_type_float128:
-    minElemLen = 16;
-    break;
-  case CFI_type_half_float_Complex:
-    minElemLen = 2 * MinElemLen(CFI_type_half_float);
-    break;
-  case CFI_type_bfloat_Complex:
-    minElemLen = 2 * MinElemLen(CFI_type_bfloat);
-    break;
-  case CFI_type_float_Complex:
-    minElemLen = 2 * sizeof(float);
-    break;
-  case CFI_type_double_Complex:
-    minElemLen = 2 * sizeof(double);
-    break;
-  case CFI_type_extended_double_Complex:
-    minElemLen = 2 * MinElemLen(CFI_type_extended_double);
-    break;
-  case CFI_type_long_double_Complex:
-    minElemLen = 2 * sizeof(long double);
-    break;
-  case CFI_type_float128_Complex:
-    minElemLen = 2 * MinElemLen(CFI_type_float128);
-    break;
-  case CFI_type_Bool:
-    minElemLen = 1;
-    break;
-  case CFI_type_cptr:
-    minElemLen = sizeof(void *);
-    break;
-  case CFI_type_char16_t:
-    minElemLen = sizeof(char16_t);
-    break;
-  case CFI_type_char32_t:
-    minElemLen = sizeof(char32_t);
-    break;
-  }
-  return minElemLen;
-}
-
 int CFI_establish(CFI_cdesc_t *descriptor, void *base_addr,
     CFI_attribute_t attribute, CFI_type_t type, std::size_t elem_len,
     CFI_rank_t rank, const CFI_index_t extents[]) {
-  if (attribute != CFI_attribute_other && attribute != CFI_attribute_pointer &&
-      attribute != CFI_attribute_allocatable) {
-    return CFI_INVALID_ATTRIBUTE;
+  int cfiStatus{VerifyEstablishParameters(descriptor, base_addr, attribute,
+      type, elem_len, rank, extents, /*external=*/true)};
+  if (cfiStatus != CFI_SUCCESS) {
+    return cfiStatus;
   }
-  if (rank > CFI_MAX_RANK) {
-    return CFI_INVALID_RANK;
-  }
-  if (base_addr && attribute == CFI_attribute_allocatable) {
-    return CFI_ERROR_BASE_ADDR_NOT_NULL;
-  }
-  if (rank > 0 && base_addr && !extents) {
-    return CFI_INVALID_EXTENT;
-  }
-  if (type < CFI_type_signed_char || type > CFI_TYPE_LAST) {
-    return CFI_INVALID_TYPE;
-  }
-  if (!descriptor) {
-    return CFI_INVALID_DESCRIPTOR;
-  }
-  if (type == CFI_type_struct || type == CFI_type_other ||
-      IsCharacterType(type)) {
-    if (elem_len <= 0) {
-      return CFI_INVALID_ELEM_LEN;
-    }
-  } else {
+  if (type != CFI_type_struct && type != CFI_type_other &&
+      !IsCharacterType(type)) {
     elem_len = MinElemLen(type);
-    assert(elem_len > 0 && "Unknown element length for type");
   }
-  descriptor->base_addr = base_addr;
-  descriptor->elem_len = elem_len;
-  descriptor->version = CFI_VERSION;
-  descriptor->rank = rank;
-  descriptor->type = type;
-  descriptor->attribute = attribute;
-  descriptor->f18Addendum = 0;
-  std::size_t byteSize{elem_len};
-  constexpr std::size_t lower_bound{0};
-  if (base_addr) {
-    for (std::size_t j{0}; j < rank; ++j) {
-      descriptor->dim[j].lower_bound = lower_bound;
-      descriptor->dim[j].extent = extents[j];
-      descriptor->dim[j].sm = byteSize;
-      byteSize *= extents[j];
-    }
+  if (elem_len <= 0) {
+    return CFI_INVALID_ELEM_LEN;
   }
+  EstablishDescriptor(
+      descriptor, base_addr, attribute, type, elem_len, rank, extents);
   return CFI_SUCCESS;
 }
 

diff  --git a/flang/runtime/ISO_Fortran_util.h b/flang/runtime/ISO_Fortran_util.h
new file mode 100644
index 0000000000000..c829a822eb387
--- /dev/null
+++ b/flang/runtime/ISO_Fortran_util.h
@@ -0,0 +1,102 @@
+//===-- runtime/ISO_Fortran_util.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_RUNTIME_ISO_FORTRAN_UTIL_H_
+#define FORTRAN_RUNTIME_ISO_FORTRAN_UTIL_H_
+
+// Internal utils for establishing CFI_cdesc_t descriptors.
+
+#include "terminator.h"
+#include "flang/ISO_Fortran_binding.h"
+#include "flang/Runtime/descriptor.h"
+#include "flang/Runtime/type-code.h"
+#include <cstdlib>
+
+namespace Fortran::ISO {
+static inline constexpr bool IsCharacterType(CFI_type_t ty) {
+  return ty == CFI_type_char || ty == CFI_type_char16_t ||
+      ty == CFI_type_char32_t;
+}
+static inline constexpr bool IsAssumedSize(const CFI_cdesc_t *dv) {
+  return dv->rank > 0 && dv->dim[dv->rank - 1].extent == -1;
+}
+
+static inline std::size_t MinElemLen(CFI_type_t type) {
+  auto typeParams{Fortran::runtime::TypeCode{type}.GetCategoryAndKind()};
+  if (!typeParams) {
+    Fortran::runtime::Terminator terminator{__FILE__, __LINE__};
+    terminator.Crash(
+        "not yet implemented: CFI_type_t=%d", static_cast<int>(type));
+  }
+
+  return Fortran::runtime::Descriptor::BytesFor(
+      typeParams->first, typeParams->second);
+}
+
+static inline int VerifyEstablishParameters(CFI_cdesc_t *descriptor,
+    void *base_addr, CFI_attribute_t attribute, CFI_type_t type,
+    std::size_t elem_len, CFI_rank_t rank, const CFI_index_t extents[],
+    bool external) {
+  if (attribute != CFI_attribute_other && attribute != CFI_attribute_pointer &&
+      attribute != CFI_attribute_allocatable) {
+    return CFI_INVALID_ATTRIBUTE;
+  }
+  if (rank > CFI_MAX_RANK) {
+    return CFI_INVALID_RANK;
+  }
+  if (base_addr && attribute == CFI_attribute_allocatable) {
+    return CFI_ERROR_BASE_ADDR_NOT_NULL;
+  }
+  if (rank > 0 && base_addr && !extents) {
+    return CFI_INVALID_EXTENT;
+  }
+  if (type < CFI_type_signed_char || type > CFI_TYPE_LAST) {
+    return CFI_INVALID_TYPE;
+  }
+  if (!descriptor) {
+    return CFI_INVALID_DESCRIPTOR;
+  }
+  if (external) {
+    if (type == CFI_type_struct || type == CFI_type_other ||
+        IsCharacterType(type)) {
+      if (elem_len <= 0) {
+        return CFI_INVALID_ELEM_LEN;
+      }
+    }
+  } else {
+    // We do not expect CFI_type_other for internal invocations.
+    if (type == CFI_type_other) {
+      return CFI_INVALID_TYPE;
+    }
+  }
+  return CFI_SUCCESS;
+}
+
+static inline void EstablishDescriptor(CFI_cdesc_t *descriptor, void *base_addr,
+    CFI_attribute_t attribute, CFI_type_t type, std::size_t elem_len,
+    CFI_rank_t rank, const CFI_index_t extents[]) {
+  descriptor->base_addr = base_addr;
+  descriptor->elem_len = elem_len;
+  descriptor->version = CFI_VERSION;
+  descriptor->rank = rank;
+  descriptor->type = type;
+  descriptor->attribute = attribute;
+  descriptor->f18Addendum = 0;
+  std::size_t byteSize{elem_len};
+  constexpr std::size_t lower_bound{0};
+  if (base_addr) {
+    for (std::size_t j{0}; j < rank; ++j) {
+      descriptor->dim[j].lower_bound = lower_bound;
+      descriptor->dim[j].extent = extents[j];
+      descriptor->dim[j].sm = byteSize;
+      byteSize *= extents[j];
+    }
+  }
+}
+} // namespace Fortran::ISO
+#endif // FORTRAN_RUNTIME_ISO_FORTRAN_UTIL_H_

diff  --git a/flang/runtime/descriptor.cpp b/flang/runtime/descriptor.cpp
index 7f608c2336df0..37d6a50414345 100644
--- a/flang/runtime/descriptor.cpp
+++ b/flang/runtime/descriptor.cpp
@@ -7,10 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "flang/Runtime/descriptor.h"
+#include "ISO_Fortran_util.h"
 #include "derived.h"
 #include "memory.h"
 #include "stat.h"
 #include "terminator.h"
+#include "tools.h"
 #include "type-info.h"
 #include <cassert>
 #include <cstdlib>
@@ -29,22 +31,19 @@ void Descriptor::Establish(TypeCode t, std::size_t elementBytes, void *p,
     int rank, const SubscriptValue *extent, ISO::CFI_attribute_t attribute,
     bool addendum) {
   Terminator terminator{__FILE__, __LINE__};
-  // Subtle: the standard CFI_establish() function doesn't allow a zero
-  // elem_len argument in cases where elem_len is not ignored; and when it
-  // returns an error code (CFI_INVALID_ELEM_LEN in this case), it must not
-  // modify the descriptor.  That design makes sense, maybe, for actual
-  // C interoperability, but we need to work around it here.  A zero
-  // incoming element length is replaced by 4 so that it will be valid
-  // for all CHARACTER kinds.
-  std::size_t workaroundElemLen{elementBytes ? elementBytes : 4};
-  int cfiStatus{ISO::CFI_establish(
-      &raw_, p, attribute, t.raw(), workaroundElemLen, rank, extent)};
+  int cfiStatus{ISO::VerifyEstablishParameters(&raw_, p, attribute, t.raw(),
+      elementBytes, rank, extent, /*external=*/false)};
   if (cfiStatus != CFI_SUCCESS) {
     terminator.Crash(
-        "Descriptor::Establish: CFI_establish returned %d", cfiStatus, t.raw());
+        "Descriptor::Establish: CFI_establish returned %d for CFI_type_t(%d)",
+        cfiStatus, t.raw());
   }
+  ISO::EstablishDescriptor(
+      &raw_, p, attribute, t.raw(), elementBytes, rank, extent);
   if (elementBytes == 0) {
     raw_.elem_len = 0;
+    // Reset byte strides of the dimensions, since EstablishDescriptor()
+    // only does that when the base address is not nullptr.
     for (int j{0}; j < rank; ++j) {
       GetDimension(j).SetByteStride(0);
     }
@@ -57,6 +56,20 @@ void Descriptor::Establish(TypeCode t, std::size_t elementBytes, void *p,
   }
 }
 
+namespace {
+template <TypeCategory CAT, int KIND> struct TypeSizeGetter {
+  constexpr std::size_t operator()() const {
+    CppTypeFor<CAT, KIND> arr[2];
+    return sizeof arr / 2;
+  }
+};
+} // namespace
+
+std::size_t Descriptor::BytesFor(TypeCategory category, int kind) {
+  Terminator terminator{__FILE__, __LINE__};
+  return ApplyType<TypeSizeGetter, std::size_t>(category, kind, terminator);
+}
+
 void Descriptor::Establish(TypeCategory c, int kind, void *p, int rank,
     const SubscriptValue *extent, ISO::CFI_attribute_t attribute,
     bool addendum) {

diff  --git a/flang/runtime/extrema.cpp b/flang/runtime/extrema.cpp
index 1e7c364c27de7..f96a91c059512 100644
--- a/flang/runtime/extrema.cpp
+++ b/flang/runtime/extrema.cpp
@@ -250,8 +250,12 @@ inline void TypedPartialMaxOrMinLoc(const char *intrinsic, Descriptor &result,
       maskToUse = nullptr;
     } else {
       // For scalar MASK arguments that are .FALSE., return all zeroes
-      CreatePartialReductionResult(result, x, dim, terminator, intrinsic,
-          TypeCode{TypeCategory::Integer, kind});
+
+      // Element size of the destination descriptor is the size
+      // of {TypeCategory::Integer, kind}.
+      CreatePartialReductionResult(result, x,
+          Descriptor::BytesFor(TypeCategory::Integer, kind), dim, terminator,
+          intrinsic, TypeCode{TypeCategory::Integer, kind});
       std::memset(
           result.OffsetElement(), 0, result.Elements() * result.ElementBytes());
       return;
@@ -360,6 +364,9 @@ static void DoMaxMinNorm2(Descriptor &result, const Descriptor &x, int dim,
   ACCUMULATOR accumulator{x};
   if (dim == 0 || x.rank() == 1) {
     // Total reduction
+
+    // Element size of the destination descriptor is the same
+    // as the element size of the source.
     result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
         CFI_attribute_allocatable);
     if (int stat{result.Allocate()}) {
@@ -370,8 +377,11 @@ static void DoMaxMinNorm2(Descriptor &result, const Descriptor &x, int dim,
     accumulator.GetResult(result.OffsetElement<Type>());
   } else {
     // Partial reduction
-    PartialReduction<ACCUMULATOR, CAT, KIND>(
-        result, x, dim, mask, terminator, intrinsic, accumulator);
+
+    // Element size of the destination descriptor is the same
+    // as the element size of the source.
+    PartialReduction<ACCUMULATOR, CAT, KIND>(result, x, x.ElementBytes(), dim,
+        mask, terminator, intrinsic, accumulator);
   }
 }
 

diff  --git a/flang/runtime/reduction-templates.h b/flang/runtime/reduction-templates.h
index 93e987a04e5cb..2b7b03572ca8e 100644
--- a/flang/runtime/reduction-templates.h
+++ b/flang/runtime/reduction-templates.h
@@ -96,7 +96,7 @@ inline CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
 // result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
 // lower bounds other than one.  This utility subroutine creates an
 // array of subscripts [j,_,k] for result subscripts [j,k] so that the
-// elemets of array(j,:,k) can be reduced.
+// elements of array(j,:,k) can be reduced.
 inline void GetExpandedSubscripts(SubscriptValue at[],
     const Descriptor &descriptor, int zeroBasedDim,
     const SubscriptValue from[]) {
@@ -162,8 +162,8 @@ inline void ReduceDimMaskToScalar(const Descriptor &x, int zeroBasedDim,
 // Utility: establishes & allocates the result array for a partial
 // reduction (i.e., one with DIM=).
 static void CreatePartialReductionResult(Descriptor &result,
-    const Descriptor &x, int dim, Terminator &terminator, const char *intrinsic,
-    TypeCode typeCode) {
+    const Descriptor &x, std::size_t resultElementSize, int dim,
+    Terminator &terminator, const char *intrinsic, TypeCode typeCode) {
   int xRank{x.rank()};
   if (dim < 1 || dim > xRank) {
     terminator.Crash(
@@ -177,8 +177,8 @@ static void CreatePartialReductionResult(Descriptor &result,
   for (int j{zeroBasedDim + 1}; j < xRank; ++j) {
     resultExtent[j - 1] = x.GetDimension(j).Extent();
   }
-  result.Establish(typeCode, x.ElementBytes(), nullptr, xRank - 1, resultExtent,
-      CFI_attribute_allocatable);
+  result.Establish(typeCode, resultElementSize, nullptr, xRank - 1,
+      resultExtent, CFI_attribute_allocatable);
   for (int j{0}; j + 1 < xRank; ++j) {
     result.GetDimension(j).SetBounds(1, resultExtent[j]);
   }
@@ -191,11 +191,11 @@ static void CreatePartialReductionResult(Descriptor &result,
 // Partial reductions with DIM=
 
 template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
-inline void PartialReduction(Descriptor &result, const Descriptor &x, int dim,
-    const Descriptor *mask, Terminator &terminator, const char *intrinsic,
-    ACCUMULATOR &accumulator) {
-  CreatePartialReductionResult(
-      result, x, dim, terminator, intrinsic, TypeCode{CAT, KIND});
+inline void PartialReduction(Descriptor &result, const Descriptor &x,
+    std::size_t resultElementSize, int dim, const Descriptor *mask,
+    Terminator &terminator, const char *intrinsic, ACCUMULATOR &accumulator) {
+  CreatePartialReductionResult(result, x, resultElementSize, dim, terminator,
+      intrinsic, TypeCode{CAT, KIND});
   SubscriptValue at[maxRank];
   result.GetLowerBounds(at);
   INTERNAL_CHECK(result.rank() == 0 || at[0] == 1);
@@ -238,8 +238,10 @@ struct PartialIntegerReductionHelper {
       using Accumulator =
           ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>;
       Accumulator accumulator{x};
-      PartialReduction<Accumulator, TypeCategory::Integer, KIND>(
-          result, x, dim, mask, terminator, intrinsic, accumulator);
+      // Element size of the destination descriptor is the same
+      // as the element size of the source.
+      PartialReduction<Accumulator, TypeCategory::Integer, KIND>(result, x,
+          x.ElementBytes(), dim, mask, terminator, intrinsic, accumulator);
     }
   };
 };
@@ -263,8 +265,10 @@ struct PartialFloatingReductionHelper {
         const char *intrinsic) const {
       using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>;
       Accumulator accumulator{x};
-      PartialReduction<Accumulator, CAT, KIND>(
-          result, x, dim, mask, terminator, intrinsic, accumulator);
+      // Element size of the destination descriptor is the same
+      // as the element size of the source.
+      PartialReduction<Accumulator, CAT, KIND>(result, x, x.ElementBytes(), dim,
+          mask, terminator, intrinsic, accumulator);
     }
   };
 };
@@ -314,8 +318,11 @@ template <typename ACCUMULATOR> struct PartialLocationHelper {
     void operator()(Descriptor &result, const Descriptor &x, int dim,
         const Descriptor *mask, Terminator &terminator, const char *intrinsic,
         ACCUMULATOR &accumulator) const {
-      PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(
-          result, x, dim, mask, terminator, intrinsic, accumulator);
+      // Element size of the destination descriptor is the size
+      // of {TypeCategory::Integer, KIND}.
+      PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(result, x,
+          Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, mask,
+          terminator, intrinsic, accumulator);
     }
   };
 };

diff  --git a/flang/runtime/reduction.cpp b/flang/runtime/reduction.cpp
index c828f06d52276..e8c2bd3e77e27 100644
--- a/flang/runtime/reduction.cpp
+++ b/flang/runtime/reduction.cpp
@@ -263,7 +263,7 @@ template <LogicalReduction REDUCTION> struct LogicalReduceHelper {
         Terminator &terminator, const char *intrinsic) const {
       // Standard requires result to have same LOGICAL kind as argument.
       CreatePartialReductionResult(
-          result, x, dim, terminator, intrinsic, x.type());
+          result, x, x.ElementBytes(), dim, terminator, intrinsic, x.type());
       SubscriptValue at[maxRank];
       result.GetLowerBounds(at);
       INTERNAL_CHECK(result.rank() == 0 || at[0] == 1);
@@ -310,8 +310,11 @@ class CountAccumulator {
 template <int KIND> struct CountDimension {
   void operator()(Descriptor &result, const Descriptor &x, int dim,
       Terminator &terminator) const {
-    CreatePartialReductionResult(result, x, dim, terminator, "COUNT",
-        TypeCode{TypeCategory::Integer, KIND});
+    // Element size of the descriptor descriptor is the size
+    // of {TypeCategory::Integer, KIND}.
+    CreatePartialReductionResult(result, x,
+        Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, terminator,
+        "COUNT", TypeCode{TypeCategory::Integer, KIND});
     SubscriptValue at[maxRank];
     result.GetLowerBounds(at);
     INTERNAL_CHECK(result.rank() == 0 || at[0] == 1);

diff  --git a/flang/runtime/type-code.cpp b/flang/runtime/type-code.cpp
index b082c689ee207..b9ce519dc1494 100644
--- a/flang/runtime/type-code.cpp
+++ b/flang/runtime/type-code.cpp
@@ -113,6 +113,18 @@ TypeCode::TypeCode(TypeCategory f, int kind) {
 std::optional<std::pair<TypeCategory, int>>
 TypeCode::GetCategoryAndKind() const {
   switch (raw_) {
+  case CFI_type_signed_char:
+    return std::make_pair(TypeCategory::Character, sizeof(signed char));
+  case CFI_type_short:
+    return std::make_pair(TypeCategory::Integer, sizeof(short));
+  case CFI_type_int:
+    return std::make_pair(TypeCategory::Integer, sizeof(int));
+  case CFI_type_long:
+    return std::make_pair(TypeCategory::Integer, sizeof(long));
+  case CFI_type_long_long:
+    return std::make_pair(TypeCategory::Integer, sizeof(long long));
+  case CFI_type_size_t:
+    return std::make_pair(TypeCategory::Integer, sizeof(std::size_t));
   case CFI_type_int8_t:
     return std::make_pair(TypeCategory::Integer, 1);
   case CFI_type_int16_t:
@@ -123,6 +135,32 @@ TypeCode::GetCategoryAndKind() const {
     return std::make_pair(TypeCategory::Integer, 8);
   case CFI_type_int128_t:
     return std::make_pair(TypeCategory::Integer, 16);
+  case CFI_type_int_least8_t:
+    return std::make_pair(TypeCategory::Logical, 1);
+  case CFI_type_int_least16_t:
+    return std::make_pair(TypeCategory::Logical, 2);
+  case CFI_type_int_least32_t:
+    return std::make_pair(TypeCategory::Logical, 4);
+  case CFI_type_int_least64_t:
+    return std::make_pair(TypeCategory::Logical, 8);
+  case CFI_type_int_least128_t:
+    return std::make_pair(TypeCategory::Integer, 16);
+  case CFI_type_int_fast8_t:
+    return std::make_pair(TypeCategory::Integer, sizeof(std::int_fast8_t));
+  case CFI_type_int_fast16_t:
+    return std::make_pair(TypeCategory::Integer, sizeof(std::int_fast16_t));
+  case CFI_type_int_fast32_t:
+    return std::make_pair(TypeCategory::Integer, sizeof(std::int_fast32_t));
+  case CFI_type_int_fast64_t:
+    return std::make_pair(TypeCategory::Integer, sizeof(std::int_fast64_t));
+  case CFI_type_int_fast128_t:
+    return std::make_pair(TypeCategory::Integer, 16);
+  case CFI_type_intmax_t:
+    return std::make_pair(TypeCategory::Integer, sizeof(std::intmax_t));
+  case CFI_type_intptr_t:
+    return std::make_pair(TypeCategory::Integer, sizeof(std::intptr_t));
+  case CFI_type_ptr
diff _t:
+    return std::make_pair(TypeCategory::Integer, sizeof(std::ptr
diff _t));
   case CFI_type_half_float:
     return std::make_pair(TypeCategory::Real, 2);
   case CFI_type_bfloat:
@@ -151,24 +189,18 @@ TypeCode::GetCategoryAndKind() const {
     return std::make_pair(TypeCategory::Complex, 16);
   case CFI_type_float128_Complex:
     return std::make_pair(TypeCategory::Complex, 16);
+  case CFI_type_Bool:
+    return std::make_pair(TypeCategory::Logical, 1);
   case CFI_type_char:
     return std::make_pair(TypeCategory::Character, 1);
+  case CFI_type_cptr:
+    return std::make_pair(TypeCategory::Integer, sizeof(void *));
+  case CFI_type_struct:
+    return std::make_pair(TypeCategory::Derived, 0);
   case CFI_type_char16_t:
     return std::make_pair(TypeCategory::Character, 2);
   case CFI_type_char32_t:
     return std::make_pair(TypeCategory::Character, 4);
-  case CFI_type_Bool:
-    return std::make_pair(TypeCategory::Logical, 1);
-  case CFI_type_int_least8_t:
-    return std::make_pair(TypeCategory::Logical, 1);
-  case CFI_type_int_least16_t:
-    return std::make_pair(TypeCategory::Logical, 2);
-  case CFI_type_int_least32_t:
-    return std::make_pair(TypeCategory::Logical, 4);
-  case CFI_type_int_least64_t:
-    return std::make_pair(TypeCategory::Logical, 8);
-  case CFI_type_struct:
-    return std::make_pair(TypeCategory::Derived, 0);
   default:
     return std::nullopt;
   }

diff  --git a/flang/unittests/Runtime/Transformational.cpp b/flang/unittests/Runtime/Transformational.cpp
index 6abe3d638cc95..a4672369f7824 100644
--- a/flang/unittests/Runtime/Transformational.cpp
+++ b/flang/unittests/Runtime/Transformational.cpp
@@ -317,3 +317,31 @@ TEST(Transformational, Unpack) {
   }
   result.Destroy();
 }
+
+#if LDBL_MANT_DIG == 64
+// Make sure the destination descriptor is created by the runtime
+// with proper element size, when REAL*10 maps to 'long double'.
+#define Real10CppType long double
+TEST(Transformational, TransposeReal10) {
+  // ARRAY  1 3 5
+  //        2 4 6
+  auto array{MakeArray<TypeCategory::Real, 10>(std::vector<int>{2, 3},
+      std::vector<Real10CppType>{1.0, 2.0, 3.0, 4.0, 5.0, 6.0},
+      sizeof(Real10CppType))};
+  StaticDescriptor<2, true> statDesc;
+  Descriptor &result{statDesc.descriptor()};
+  RTNAME(Transpose)(result, *array, __FILE__, __LINE__);
+  EXPECT_EQ(result.ElementBytes(), sizeof(Real10CppType));
+  EXPECT_EQ(result.type(), array->type());
+  EXPECT_EQ(result.rank(), 2);
+  EXPECT_EQ(result.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(result.GetDimension(0).Extent(), 3);
+  EXPECT_EQ(result.GetDimension(1).LowerBound(), 1);
+  EXPECT_EQ(result.GetDimension(1).Extent(), 2);
+  static Real10CppType expect[6]{1.0, 3.0, 5.0, 2.0, 4.0, 6.0};
+  for (int j{0}; j < 6; ++j) {
+    EXPECT_EQ(*result.ZeroBasedIndexedElement<Real10CppType>(j), expect[j]);
+  }
+  result.Destroy();
+}
+#endif


        


More information about the flang-commits mailing list