[flang-commits] [flang] fde0ea0 - [flang][AddAliasTags] Fix segfault when type contains `fir.boxproc` (#198997)

via flang-commits flang-commits at lists.llvm.org
Thu May 21 10:55:05 PDT 2026


Author: Kareem Ergawy
Date: 2026-05-21T19:55:00+02:00
New Revision: fde0ea05dab19678ee341c6d955cd89b7608c16b

URL: https://github.com/llvm/llvm-project/commit/fde0ea05dab19678ee341c6d955cd89b7608c16b
DIFF: https://github.com/llvm/llvm-project/commit/fde0ea05dab19678ee341c6d955cd89b7608c16b.diff

LOG: [flang][AddAliasTags] Fix segfault when type contains `fir.boxproc` (#198997)

`fir.boxproc` currently has no LLVM representation (its converter
returns `std::nullopt`). When `AddAliasTags` called
`getTypeSizeAndAlignment` on a type containing `fir.boxproc` (e.g. a
sequence of a derived type with procedure pointer components),
`convertRecordType` and `convertSequenceType` would crash trying to
mlir::cast a null type.

For any type that might recursively contain a non-convertible type
(`fir.boxproc` in this case), `TypeConverter` would now propagate an
empty optional `mlir::Type` and emit a debug warning that conversion
failed. This helps us avoid seg faulting expecting that the type or some
part of it were converted correctly.

Co-Authored-By: Claude Sonnet 4.6 <noreply at anthropic.com>

---------

Co-authored-by: Claude Sonnet 4.6 <noreply at anthropic.com>

Added: 
    flang/test/Transforms/tbaa-type-converter-boxproc.fir

Modified: 
    flang/include/flang/Optimizer/CodeGen/TypeConverter.h
    flang/lib/Optimizer/CodeGen/TypeConverter.cpp
    flang/lib/Optimizer/Transforms/AddAliasTags.cpp

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/CodeGen/TypeConverter.h b/flang/include/flang/Optimizer/CodeGen/TypeConverter.h
index 20270d41b1e9a..2a4b9b2e53464 100644
--- a/flang/include/flang/Optimizer/CodeGen/TypeConverter.h
+++ b/flang/include/flang/Optimizer/CodeGen/TypeConverter.h
@@ -60,9 +60,8 @@ class LLVMTypeConverter : public mlir::LLVMTypeConverter {
   mlir::Type indexType() const;
 
   // fir.type<name(p : TY'...){f : TY...}>  -->  llvm<"%name = { ty... }">
-  std::optional<llvm::LogicalResult>
-  convertRecordType(fir::RecordType derived,
-                    llvm::SmallVectorImpl<mlir::Type> &results, bool isPacked);
+  std::optional<mlir::Type> convertRecordType(fir::RecordType derived,
+                                              bool isPacked);
 
   // Is an extended descriptor needed given the element type of a fir.box type ?
   // Extended descriptors are required for derived types.
@@ -94,7 +93,7 @@ class LLVMTypeConverter : public mlir::LLVMTypeConverter {
   }
 
   // fir.array<c ... :any>  -->  llvm<"[...[c x any]]">
-  mlir::Type convertSequenceType(SequenceType seq) const;
+  std::optional<mlir::Type> convertSequenceType(SequenceType seq) const;
 
   // fir.tdesc<any>  -->  llvm<"i8*">
   // TODO: For now use a void*, however pointer identity is not sufficient for

diff  --git a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
index 3c4162c4d8298..2fca4111e0980 100644
--- a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
+++ b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
@@ -45,6 +45,24 @@ static mlir::LowerToLLVMOptions MakeLowerOptions(mlir::ModuleOp module) {
   return options;
 }
 
+static bool
+appendConvertedMemberType(mlir::Type mem,
+                          llvm::SmallVectorImpl<mlir::Type> &members,
+                          const LLVMTypeConverter &converter) {
+  mlir::Type memTy;
+  if (auto box = mlir::dyn_cast<fir::BaseBoxType>(mem))
+    memTy = converter.convertBoxTypeAsStruct(box);
+  else
+    memTy = converter.convertType(mem);
+  if (!memTy) {
+    LLVM_DEBUG(llvm::dbgs() << "type conversion failed for aggregate member: "
+                            << mem << "\n");
+    return false;
+  }
+  members.push_back(memTy);
+  return true;
+}
+
 LLVMTypeConverter::LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA,
                                      bool forceUnifiedTBAATree,
                                      const mlir::DataLayout &dl)
@@ -97,10 +115,9 @@ LLVMTypeConverter::LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA,
   });
   addConversion(
       [&](fir::PointerType pointer) { return convertPointerLike(pointer); });
-  addConversion(
-      [&](fir::RecordType derived, llvm::SmallVectorImpl<mlir::Type> &results) {
-        return convertRecordType(derived, results, derived.isPacked());
-      });
+  addConversion([&](fir::RecordType derived) {
+    return convertRecordType(derived, derived.isPacked());
+  });
   addConversion(
       [&](fir::ReferenceType ref) { return convertPointerLike(ref); });
   addConversion([&](fir::SequenceType sequence) {
@@ -113,17 +130,12 @@ LLVMTypeConverter::LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA,
     return mlir::VectorType::get(llvm::ArrayRef<int64_t>(vecTy.getLen()),
                                  convertType(vecTy.getEleTy()));
   });
-  addConversion([&](mlir::TupleType tuple) {
+  addConversion([&](mlir::TupleType tuple) -> std::optional<mlir::Type> {
     LLVM_DEBUG(llvm::dbgs() << "type convert: " << tuple << '\n');
     llvm::SmallVector<mlir::Type> members;
-    for (auto mem : tuple.getTypes()) {
-      // Prevent fir.box from degenerating to a pointer to a descriptor in the
-      // context of a tuple type.
-      if (auto box = mlir::dyn_cast<fir::BaseBoxType>(mem))
-        members.push_back(convertBoxTypeAsStruct(box));
-      else
-        members.push_back(mlir::cast<mlir::Type>(convertType(mem)));
-    }
+    for (auto mem : tuple.getTypes())
+      if (!appendConvertedMemberType(mem, members, *this))
+        return std::nullopt;
     return mlir::LLVM::LLVMStructType::getLiteral(&getContext(), members,
                                                   /*isPacked=*/false);
   });
@@ -150,35 +162,25 @@ mlir::Type LLVMTypeConverter::indexType() const {
 }
 
 // fir.type<name(p : TY'...){f : TY...}>  -->  llvm<"%name = { ty... }">
-std::optional<llvm::LogicalResult>
-LLVMTypeConverter::convertRecordType(fir::RecordType derived,
-                                     llvm::SmallVectorImpl<mlir::Type> &results,
-                                     bool isPacked) {
+std::optional<mlir::Type>
+LLVMTypeConverter::convertRecordType(fir::RecordType derived, bool isPacked) {
   auto name = fir::NameUniquer::dropTypeConversionMarkers(derived.getName());
   auto st = mlir::LLVM::LLVMStructType::getIdentified(&getContext(), name);
 
   auto &callStack = getCurrentThreadRecursiveStack();
-  if (llvm::count(callStack, derived)) {
-    results.push_back(st);
-    return mlir::success();
-  }
+  if (llvm::count(callStack, derived))
+    return st;
   callStack.push_back(derived);
   llvm::scope_exit popConversionCallStack(
       [&callStack]() { callStack.pop_back(); });
 
   llvm::SmallVector<mlir::Type> members;
-  for (auto mem : derived.getTypeList()) {
-    // Prevent fir.box from degenerating to a pointer to a descriptor in the
-    // context of a record type.
-    if (auto box = mlir::dyn_cast<fir::BaseBoxType>(mem.second))
-      members.push_back(convertBoxTypeAsStruct(box));
-    else
-      members.push_back(mlir::cast<mlir::Type>(convertType(mem.second)));
-  }
+  for (auto mem : derived.getTypeList())
+    if (!appendConvertedMemberType(mem.second, members, *this))
+      return std::nullopt;
   if (mlir::failed(st.setBody(members, isPacked)))
-    return mlir::failure();
-  results.push_back(st);
-  return mlir::success();
+    return std::nullopt;
+  return st;
 }
 
 // Is an extended descriptor needed given the element type of a fir.box type ?
@@ -199,6 +201,8 @@ mlir::Type LLVMTypeConverter::convertBoxTypeAsStruct(BaseBoxType box,
   if (auto removeIndirection = fir::dyn_cast_ptrEleTy(ele))
     ele = removeIndirection;
   auto eleTy = convertType(ele);
+  if (!eleTy)
+    return {};
   // base_addr*
   if (mlir::isa<SequenceType>(ele) &&
       mlir::isa<mlir::LLVM::LLVMPointerType>(eleTy))
@@ -293,8 +297,14 @@ mlir::Type LLVMTypeConverter::convertCharType(fir::CharacterType charTy) const {
 }
 
 // fir.array<c ... :any>  -->  llvm<"[...[c x any]]">
-mlir::Type LLVMTypeConverter::convertSequenceType(SequenceType seq) const {
+std::optional<mlir::Type>
+LLVMTypeConverter::convertSequenceType(SequenceType seq) const {
   auto baseTy = convertType(seq.getEleTy());
+  if (!baseTy) {
+    LLVM_DEBUG(llvm::dbgs() << "type conversion failed for sequence element: "
+                            << seq.getEleTy() << "\n");
+    return std::nullopt;
+  }
   if (characterWithDynamicLen(seq.getEleTy()))
     return baseTy;
   auto shape = seq.getShape();

diff  --git a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
index 142e4c8d45c9f..57c13848b3a46 100644
--- a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
+++ b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
@@ -65,10 +65,12 @@ extern llvm::cl::opt<bool> supportCrayPointers;
 
 namespace {
 
-// Return the size and alignment (in bytes) for the given type.
+// Return the size and alignment (in bytes) for the given type, or std::nullopt
+// if the type cannot be converted.
+//
 // TODO: this must be combined with DebugTypeGenerator::getFieldSizeAndAlign().
 // We'd better move fir::LLVMTypeConverter out of the FIRCodeGen component.
-static std::pair<std::uint64_t, unsigned short>
+static std::optional<std::pair<std::uint64_t, unsigned short>>
 getTypeSizeAndAlignment(mlir::Type type,
                         fir::LLVMTypeConverter &llvmTypeConverter) {
   mlir::Type llvmTy;
@@ -77,6 +79,9 @@ getTypeSizeAndAlignment(mlir::Type type,
   else
     llvmTy = llvmTypeConverter.convertType(type);
 
+  if (!llvmTy)
+    return std::nullopt;
+
   const mlir::DataLayout &dataLayout = llvmTypeConverter.getDataLayout();
   uint64_t byteSize = dataLayout.getTypeSize(llvmTy);
   unsigned short byteAlign = dataLayout.getTypeABIAlignment(llvmTy);
@@ -234,12 +239,15 @@ class PassState {
   // about their physical storages and layouts.
   void collectPhysicalStorageAliasSets(mlir::Operation *op);
 
-  // Return the byte size of the given declaration.
-  std::size_t getDeclarationSize(fir::FortranVariableStorageOpInterface decl) {
+  // Return the byte size of the given declaration, or std::nullopt if the
+  // size could not be computed (e.g. the type contains !fir.boxproc members).
+  std::optional<std::size_t>
+  getDeclarationSize(fir::FortranVariableStorageOpInterface decl) {
     mlir::Type memType = fir::unwrapRefType(decl.getBase().getType());
-    auto [size, alignment] =
-        getTypeSizeAndAlignment(memType, llvmTypeConverter);
-    return llvm::alignTo(size, alignment);
+    auto sizeAndAlign = getTypeSizeAndAlignment(memType, llvmTypeConverter);
+    if (!sizeAndAlign)
+      return std::nullopt;
+    return llvm::alignTo(sizeAndAlign->first, sizeAndAlign->second);
   }
 
   // A StorageDesc specifies an operation that defines a physical storage
@@ -454,30 +462,33 @@ void PassState::collectPhysicalStorageAliasSets(mlir::Operation *op) {
     fir::GlobalOp globalDef =
         getGlobalDefiningOp(addrOfOp.getSymbol().getRootReference());
     std::uint64_t storageOffset = decl.getStorageOffset();
-    std::size_t declSize = getDeclarationSize(decl);
+    std::optional<std::size_t> declSize = getDeclarationSize(decl);
     LLVM_DEBUG(llvm::dbgs()
                << "Found variable with storage:\n"
                << "Declaration: " << decl << "\n"
                << "Storage: " << (globalDef ? globalDef : nullptr) << "\n"
                << "Offset: " << storageOffset << "\n"
-               << "Size: " << declSize << "\n");
+               << "Size: "
+               << (declSize ? llvm::Twine(*declSize)
+                            : llvm::Twine("could not be computed"))
+               << "\n");
     if (!globalDef) {
       seenUnknownStorage = true;
       return mlir::WalkResult::advance();
     }
-    // Zero-sized variables do not need any TBAA tags, because
-    // they cannot be accessed.
-    if (declSize == 0)
+    // Skip variables whose size could not be computed or that are zero-sized,
+    // as they do not need any TBAA tags.
+    if (!declSize || *declSize == 0)
       return mlir::WalkResult::advance();
 
     declToStorageMap.try_emplace(decl.getOperation(), globalDef.getOperation(),
-                                 storageOffset, declSize);
+                                 storageOffset, *declSize);
     storageDecls.try_emplace(globalDef.getOperation())
         .first->second.push_back(decl.getOperation());
 
     auto &set =
         memberIntervals.try_emplace(globalDef.getOperation()).first->second;
-    set.insert(IntervalTy(storageOffset, declSize));
+    set.insert(IntervalTy(storageOffset, *declSize));
     return mlir::WalkResult::advance();
   });
 

diff  --git a/flang/test/Transforms/tbaa-type-converter-boxproc.fir b/flang/test/Transforms/tbaa-type-converter-boxproc.fir
new file mode 100644
index 0000000000000..de9ecb4d67e46
--- /dev/null
+++ b/flang/test/Transforms/tbaa-type-converter-boxproc.fir
@@ -0,0 +1,50 @@
+// Tests that the type converter handles record and sequence types containing
+// !fir.boxproc members by returning std::nullopt instead of crashing.
+
+// RUN: fir-opt --split-input-file --fir-add-alias-tags %s | FileCheck %s
+
+// Record type containing a !fir.boxproc member in COMMON storage.
+// convertRecordType returns std::nullopt for such types.
+module attributes {dlti.dl_spec = #dlti.dl_spec<!llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, i64 = dense<[32, 64]> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little">, llvm.data_layout = ""} {
+  fir.global common @g1_(dense<0> : vector<16xi8>) {alignment = 8 : i64} : !fir.array<16xi8>
+  func.func @_QPtest_record() {
+    %c0 = arith.constant 0 : index
+    %addr = fir.address_of(@g1_) : !fir.ref<!fir.array<16xi8>>
+    %0 = fir.coordinate_of %addr, %c0 : (!fir.ref<!fir.array<16xi8>>, index) -> !fir.ref<i8>
+    %1 = fir.convert %0 : (!fir.ref<i8>) -> !fir.ref<!fir.type<t_rec{proc:!fir.boxproc<()->()>}>>
+    %2 = fir.declare %1 storage(%addr[0]) {uniq_name = "_QPtest_recordEv"} : (!fir.ref<!fir.type<t_rec{proc:!fir.boxproc<()->()>}>>, !fir.ref<!fir.array<16xi8>>) -> !fir.ref<!fir.type<t_rec{proc:!fir.boxproc<()->()>}>>
+    %3 = fir.load %2 : !fir.ref<!fir.type<t_rec{proc:!fir.boxproc<()->()>}>>
+    fir.store %3 to %2 : !fir.ref<!fir.type<t_rec{proc:!fir.boxproc<()->()>}>>
+    return
+  }
+}
+// No byte-range TBAA should be generated.
+// CHECK-NOT: bytes_
+// CHECK-LABEL: func.func @_QPtest_record
+// CHECK: return
+
+// -----
+
+// Array of records containing a !fir.boxproc member in COMMON storage.
+// convertSequenceType returns std::nullopt for such types.
+module attributes {dlti.dl_spec = #dlti.dl_spec<!llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, i64 = dense<[32, 64]> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little">, llvm.data_layout = ""} {
+  fir.global common @g2_(dense<0> : vector<160xi8>) {alignment = 8 : i64} : !fir.array<160xi8>
+  func.func @_QPtest_sequence() {
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %c10 = arith.constant 10 : index
+    %addr = fir.address_of(@g2_) : !fir.ref<!fir.array<160xi8>>
+    %0 = fir.coordinate_of %addr, %c0 : (!fir.ref<!fir.array<160xi8>>, index) -> !fir.ref<i8>
+    %1 = fir.convert %0 : (!fir.ref<i8>) -> !fir.ref<!fir.array<10x!fir.type<t_rec{proc:!fir.boxproc<()->()>}>>>
+    %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+    %2 = fir.declare %1(%shape) storage(%addr[0]) {uniq_name = "_QPtest_sequenceEv"} : (!fir.ref<!fir.array<10x!fir.type<t_rec{proc:!fir.boxproc<()->()>}>>>, !fir.shape<1>, !fir.ref<!fir.array<160xi8>>) -> !fir.ref<!fir.array<10x!fir.type<t_rec{proc:!fir.boxproc<()->()>}>>>
+    %3 = fir.array_coor %2(%shape) %c1 : (!fir.ref<!fir.array<10x!fir.type<t_rec{proc:!fir.boxproc<()->()>}>>>, !fir.shape<1>, index) -> !fir.ref<!fir.type<t_rec{proc:!fir.boxproc<()->()>}>>
+    %4 = fir.load %3 : !fir.ref<!fir.type<t_rec{proc:!fir.boxproc<()->()>}>>
+    fir.store %4 to %3 : !fir.ref<!fir.type<t_rec{proc:!fir.boxproc<()->()>}>>
+    return
+  }
+}
+// No byte-range TBAA should be generated since convertSequenceType returns nullopt.
+// CHECK-NOT: bytes_
+// CHECK-LABEL: func.func @_QPtest_sequence
+// CHECK: return


        


More information about the flang-commits mailing list