[llvm] [LLVM][IR] Add support for address space names in DataLayout (PR #170559)

Rahul Joshi via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 8 13:50:10 PST 2025


https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/170559

>From 7ba46b85328a3473272c6bcfd26287e97da0bf2e Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Wed, 3 Dec 2025 13:31:17 -0800
Subject: [PATCH 1/3] [LLVM][IR] Add support for address space names in
 DataLayout

---
 llvm/include/llvm/IR/DataLayout.h             |  26 +++-
 llvm/lib/AsmParser/LLParser.cpp               |   5 +-
 llvm/lib/IR/AsmWriter.cpp                     |  71 ++++++----
 llvm/lib/IR/DataLayout.cpp                    | 133 +++++++++++++++---
 .../symbolic-addrspace-datalayout.ll          |  52 +++++++
 llvm/unittests/IR/DataLayoutTest.cpp          |  34 ++++-
 6 files changed, 265 insertions(+), 56 deletions(-)
 create mode 100644 llvm/test/Assembler/symbolic-addrspace-datalayout.ll

diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 54458201af0b3..4b42c1f5a78a6 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -21,6 +21,7 @@
 
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -92,7 +93,15 @@ class DataLayout {
     /// of this would be CHERI capabilities where the validity bit is stored
     /// separately from the pointer address+bounds information.
     bool HasExternalState;
-    LLVM_ABI bool operator==(const PointerSpec &Other) const;
+    // Symbolic name of the address space. We can store a StringRef here
+    // directly (backed by StringRepresentation) but then the copy construtor
+    // for DataLayout has to be updated to redirect these StringRefs to the new
+    // copy of StringRepresentation. To avoid that, we store just the offset and
+    // size of the address space name within the StringRepresentation.
+    size_t AddrSpaceNameOffset;
+    size_t AddrSpaceNameSize;
+
+    StringRef getAddrSpaceName(const DataLayout &DL) const;
   };
 
   enum class FunctionPtrAlignType {
@@ -158,7 +167,8 @@ class DataLayout {
   /// Sets or updates the specification for pointer in the given address space.
   void setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign,
                       Align PrefAlign, uint32_t IndexBitWidth,
-                      bool HasUnstableRepr, bool HasExternalState);
+                      bool HasUnstableRepr, bool HasExternalState,
+                      StringRef AddrSpaceName);
 
   /// Internal helper to get alignment for integer of given bitwidth.
   LLVM_ABI Align getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const;
@@ -173,11 +183,13 @@ class DataLayout {
   Error parseAggregateSpec(StringRef Spec);
 
   /// Attempts to parse pointer specification ('p').
-  Error parsePointerSpec(StringRef Spec);
+  Error parsePointerSpec(StringRef Spec,
+                         SmallDenseSet<StringRef, 8> &AddrSpaceNames);
 
   /// Attempts to parse a single specification.
   Error parseSpecification(StringRef Spec,
-                           SmallVectorImpl<unsigned> &NonIntegralAddressSpaces);
+                           SmallVectorImpl<unsigned> &NonIntegralAddressSpaces,
+                           SmallDenseSet<StringRef, 8> &AddrSpaceNames);
 
   /// Attempts to parse a data layout string.
   Error parseLayoutString(StringRef LayoutString);
@@ -324,9 +336,13 @@ class DataLayout {
     return false;
   }
 
-  /// Layout pointer alignment
+  /// Layout pointer alignment.
   LLVM_ABI Align getPointerABIAlignment(unsigned AS) const;
 
+  LLVM_ABI StringRef getAddressSpaceName(unsigned AS) const;
+
+  LLVM_ABI std::optional<unsigned> getNamedAddressSpace(StringRef Name) const;
+
   /// Return target's alignment for stack-based pointers
   /// FIXME: The defaults need to be removed once all of
   /// the backends/clients are updated.
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 2a0246074a462..a09ab4fc7828c 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1958,13 +1958,16 @@ bool LLParser::parseOptionalAddrSpace(unsigned &AddrSpace, unsigned DefaultAS) {
 
   auto ParseAddrspaceValue = [&](unsigned &AddrSpace) -> bool {
     if (Lex.getKind() == lltok::StringConstant) {
-      auto AddrSpaceStr = Lex.getStrVal();
+      const std::string &AddrSpaceStr = Lex.getStrVal();
       if (AddrSpaceStr == "A") {
         AddrSpace = M->getDataLayout().getAllocaAddrSpace();
       } else if (AddrSpaceStr == "G") {
         AddrSpace = M->getDataLayout().getDefaultGlobalsAddressSpace();
       } else if (AddrSpaceStr == "P") {
         AddrSpace = M->getDataLayout().getProgramAddressSpace();
+      } else if (std::optional<unsigned> AS =
+                     M->getDataLayout().getNamedAddressSpace(AddrSpaceStr)) {
+        AddrSpace = *AS;
       } else {
         return tokError("invalid symbolic addrspace '" + AddrSpaceStr + "'");
       }
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 7932765db8359..10577fceee239 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -543,7 +543,8 @@ namespace {
 
 class TypePrinting {
 public:
-  TypePrinting(const Module *M = nullptr) : DeferredM(M) {}
+  TypePrinting(const Module *M = nullptr)
+      : M(M), TypesIncorporated(M == nullptr) {}
 
   TypePrinting(const TypePrinting &) = delete;
   TypePrinting &operator=(const TypePrinting &) = delete;
@@ -563,8 +564,9 @@ class TypePrinting {
 private:
   void incorporateTypes();
 
-  /// A module to process lazily when needed. Set to nullptr as soon as used.
-  const Module *DeferredM;
+  /// A module to process lazily.
+  const Module *M;
+  bool TypesIncorporated;
 
   TypeFinder NamedTypes;
 
@@ -605,11 +607,11 @@ bool TypePrinting::empty() {
 }
 
 void TypePrinting::incorporateTypes() {
-  if (!DeferredM)
+  if (TypesIncorporated)
     return;
 
-  NamedTypes.run(*DeferredM, false);
-  DeferredM = nullptr;
+  NamedTypes.run(*M, false);
+  TypesIncorporated = true;
 
   // The list of struct types we got back includes all the struct types, split
   // the unnamed ones out to a numbering and remove the anonymous structs.
@@ -630,6 +632,20 @@ void TypePrinting::incorporateTypes() {
   NamedTypes.erase(NextToUse, NamedTypes.end());
 }
 
+static void printAddressSpace(const Module *M, unsigned AS, raw_ostream &OS,
+                              StringRef Prefix = " ", StringRef Suffix = "",
+                              bool ForcePrint = false) {
+  if (AS == 0 && !ForcePrint)
+    return;
+  OS << Prefix << "addrspace(";
+  StringRef ASName = M ? M->getDataLayout().getAddressSpaceName(AS) : "";
+  if (!ASName.empty())
+    OS << "\"" << ASName << "\"";
+  else
+    OS << AS;
+  OS << ")" << Suffix;
+}
+
 /// Write the specified type to the specified raw_ostream, making use of type
 /// names or up references to shorten the type name where possible.
 void TypePrinting::print(Type *Ty, raw_ostream &OS) {
@@ -686,8 +702,7 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
   case Type::PointerTyID: {
     PointerType *PTy = cast<PointerType>(Ty);
     OS << "ptr";
-    if (unsigned AddressSpace = PTy->getAddressSpace())
-      OS << " addrspace(" << AddressSpace << ')';
+    printAddressSpace(M, PTy->getAddressSpace(), OS);
     return;
   }
   case Type::ArrayTyID: {
@@ -3896,10 +3911,10 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
   printThreadLocalModel(GV->getThreadLocalMode(), Out);
   StringRef UA = getUnnamedAddrEncoding(GV->getUnnamedAddr());
   if (!UA.empty())
-      Out << UA << ' ';
+    Out << UA << ' ';
 
-  if (unsigned AddressSpace = GV->getType()->getAddressSpace())
-    Out << "addrspace(" << AddressSpace << ") ";
+  printAddressSpace(GV->getParent(), GV->getType()->getAddressSpace(), Out,
+                    /*Prefix=*/"", /*Suffix=*/" ");
   if (GV->isExternallyInitialized()) Out << "externally_initialized ";
   Out << (GV->isConstant() ? "constant " : "global ");
   TypePrinter.print(GV->getValueType(), Out);
@@ -4174,9 +4189,10 @@ void AssemblyWriter::printFunction(const Function *F) {
   // a module with a non-zero program address space or if there is no valid
   // Module* so that the file can be parsed without the datalayout string.
   const Module *Mod = F->getParent();
-  if (F->getAddressSpace() != 0 || !Mod ||
-      Mod->getDataLayout().getProgramAddressSpace() != 0)
-    Out << " addrspace(" << F->getAddressSpace() << ")";
+  bool ForcePrintAddressSpace =
+      !Mod || Mod->getDataLayout().getProgramAddressSpace() != 0;
+  printAddressSpace(Mod, F->getAddressSpace(), Out, /*Prefix=*/" ",
+                    /*Suffix=*/"", ForcePrintAddressSpace);
   if (Attrs.hasFnAttrs())
     Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttrs());
   if (F->hasSection()) {
@@ -4352,23 +4368,21 @@ void AssemblyWriter::printInfoComment(const Value &V, bool isMaterializable) {
 
 static void maybePrintCallAddrSpace(const Value *Operand, const Instruction *I,
                                     raw_ostream &Out) {
-  // We print the address space of the call if it is non-zero.
   if (Operand == nullptr) {
     Out << " <cannot get addrspace!>";
     return;
   }
+
+  // We print the address space of the call if it is non-zero.
+  // We also print it if it is zero but not equal to the program address space
+  // or if we can't find a valid Module* to make it possible to parse
+  // the resulting file even without a datalayout string.
   unsigned CallAddrSpace = Operand->getType()->getPointerAddressSpace();
-  bool PrintAddrSpace = CallAddrSpace != 0;
-  if (!PrintAddrSpace) {
-    const Module *Mod = getModuleFromVal(I);
-    // We also print it if it is zero but not equal to the program address space
-    // or if we can't find a valid Module* to make it possible to parse
-    // the resulting file even without a datalayout string.
-    if (!Mod || Mod->getDataLayout().getProgramAddressSpace() != 0)
-      PrintAddrSpace = true;
-  }
-  if (PrintAddrSpace)
-    Out << " addrspace(" << CallAddrSpace << ")";
+  const Module *Mod = getModuleFromVal(I);
+  bool ForcePrintAddrSpace =
+      !Mod || Mod->getDataLayout().getProgramAddressSpace() != 0;
+  printAddressSpace(Mod, CallAddrSpace, Out, /*Prefix=*/" ", /*Suffix=*/"",
+                    ForcePrintAddrSpace);
 }
 
 // This member is called for each Instruction in a function..
@@ -4735,9 +4749,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
       Out << ", align " << A->value();
     }
 
-    unsigned AddrSpace = AI->getAddressSpace();
-    if (AddrSpace != 0)
-      Out << ", addrspace(" << AddrSpace << ')';
+    printAddressSpace(AI->getModule(), AI->getAddressSpace(), Out,
+                      /*Prefix=*/", ");
   } else if (isa<CastInst>(I)) {
     if (Operand) {
       Out << ' ';
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index 49e1f898ca594..a48f548524da1 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -147,12 +147,10 @@ bool DataLayout::PrimitiveSpec::operator==(const PrimitiveSpec &Other) const {
          PrefAlign == Other.PrefAlign;
 }
 
-bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const {
-  return AddrSpace == Other.AddrSpace && BitWidth == Other.BitWidth &&
-         ABIAlign == Other.ABIAlign && PrefAlign == Other.PrefAlign &&
-         IndexBitWidth == Other.IndexBitWidth &&
-         HasUnstableRepresentation == Other.HasUnstableRepresentation &&
-         HasExternalState == Other.HasExternalState;
+StringRef
+DataLayout::PointerSpec::getAddrSpaceName(const DataLayout &DL) const {
+  return StringRef(DL.StringRepresentation)
+      .substr(AddrSpaceNameOffset, AddrSpaceNameSize);
 }
 
 namespace {
@@ -195,7 +193,7 @@ constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = {
 // Default pointer type specifications.
 constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = {
     // p0:64:64:64:64
-    {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false, false},
+    {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false, false, 0, 0},
 };
 
 DataLayout::DataLayout()
@@ -233,6 +231,15 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) {
 
 bool DataLayout::operator==(const DataLayout &Other) const {
   // NOTE: StringRepresentation might differ, it is not canonicalized.
+  auto IsPointerSpecEqual = [this, &Other](const PointerSpec &A,
+                                           const PointerSpec &B) {
+    return A.AddrSpace == B.AddrSpace && A.BitWidth == B.BitWidth &&
+           A.ABIAlign == B.ABIAlign && A.PrefAlign == B.PrefAlign &&
+           A.IndexBitWidth == B.IndexBitWidth &&
+           A.HasUnstableRepresentation == B.HasUnstableRepresentation &&
+           A.HasExternalState == B.HasExternalState &&
+           A.getAddrSpaceName(*this) == B.getAddrSpaceName(Other);
+  };
   return BigEndian == Other.BigEndian &&
          AllocaAddrSpace == Other.AllocaAddrSpace &&
          ProgramAddrSpace == Other.ProgramAddrSpace &&
@@ -243,9 +250,9 @@ bool DataLayout::operator==(const DataLayout &Other) const {
          ManglingMode == Other.ManglingMode &&
          LegalIntWidths == Other.LegalIntWidths && IntSpecs == Other.IntSpecs &&
          FloatSpecs == Other.FloatSpecs && VectorSpecs == Other.VectorSpecs &&
-         PointerSpecs == Other.PointerSpecs &&
          StructABIAlignment == Other.StructABIAlignment &&
-         StructPrefAlignment == Other.StructPrefAlignment;
+         StructPrefAlignment == Other.StructPrefAlignment &&
+         llvm::equal(PointerSpecs, Other.PointerSpecs, IsPointerSpecEqual);
 }
 
 Expected<DataLayout> DataLayout::parse(StringRef LayoutString) {
@@ -271,6 +278,48 @@ static Error parseAddrSpace(StringRef Str, unsigned &AddrSpace) {
   return Error::success();
 }
 
+/// Attempts to parse an address space component of a specification allowing
+/// name to be specified as well. The input is expected to be of the form
+/// <number> '(' name ' )', with the name otional and the number is optional as
+/// well.
+static Error parseAddrSpaceAndName(StringRef Str, unsigned &AddrSpace,
+                                   StringRef &AddrSpaceName) {
+  if (Str.empty())
+    return createStringError("address space component cannot be empty");
+
+  if (isDigit(Str.front())) {
+    if (Str.consumeInteger(10, AddrSpace) || !isUInt<24>(AddrSpace))
+      return createStringError("address space must be a 24-bit integer");
+  }
+
+  if (Str.empty())
+    return Error::success();
+
+  if (Str.front() != '(')
+    return createStringError("address space must be a 24-bit integer");
+
+  // Expect atleast one character in between the ( and ).
+  if (Str.back() != ')' || Str.size() == 2)
+    return createStringError("Expected `( address space name )`");
+
+  AddrSpaceName = Str.drop_front().drop_back();
+  // TODO: Do we need any additional verification for address space name? Like
+  // should be a valid identifier of some sort? Its not strictly needed.
+
+  // LLVM's assembly parser used names "P", "G" and "A" to represent the
+  // program, default global, and alloca address space. This mapping is not 1:1
+  // in the sense that all of them can map to the same numberic address space.
+  // Diallow using these predefined symbolic address space names as address
+  // space names specified in the data layout.
+  if (AddrSpaceName.size() == 1) {
+    char C = AddrSpaceName.front();
+    if (C == 'P' || C == 'G' || C == 'A')
+      return createStringError(
+          "Cannot use predefined address space names P/G/A in data layout");
+  }
+  return Error::success();
+}
+
 /// Attempts to parse a size component of a specification.
 static Error parseSize(StringRef Str, unsigned &BitWidth,
                        StringRef Name = "size") {
@@ -395,7 +444,8 @@ Error DataLayout::parseAggregateSpec(StringRef Spec) {
   return Error::success();
 }
 
-Error DataLayout::parsePointerSpec(StringRef Spec) {
+Error DataLayout::parsePointerSpec(
+    StringRef Spec, SmallDenseSet<StringRef, 8> &AddrSpaceNames) {
   // p[<n>]:<size>:<abi>[:<pref>[:<idx>]]
   SmallVector<StringRef, 5> Components;
   assert(Spec.front() == 'p');
@@ -408,6 +458,7 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
   unsigned AddrSpace = 0;
   bool ExternalState = false;
   bool UnstableRepr = false;
+  StringRef AddrSpaceName;
   StringRef AddrSpaceStr = Components[0];
   while (!AddrSpaceStr.empty()) {
     char C = AddrSpaceStr.front();
@@ -424,12 +475,18 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
     AddrSpaceStr = AddrSpaceStr.drop_front(1);
   }
   if (!AddrSpaceStr.empty())
-    if (Error Err = parseAddrSpace(AddrSpaceStr, AddrSpace))
+    if (Error Err =
+            parseAddrSpaceAndName(AddrSpaceStr, AddrSpace, AddrSpaceName))
       return Err; // Failed to parse the remaining characters as a number
   if (AddrSpace == 0 && (ExternalState || UnstableRepr))
     return createStringError(
         "address space 0 cannot be unstable or have external state");
 
+  // Check for duplicate address space names.
+  if (!AddrSpaceName.empty() && !AddrSpaceNames.insert(AddrSpaceName).second)
+    return createStringError("address space name `" + AddrSpaceName +
+                             "` already used");
+
   // Size. Required, cannot be zero.
   unsigned BitWidth;
   if (Error Err = parseSize(Components[1], BitWidth, "pointer size"))
@@ -462,12 +519,13 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
         "index size cannot be larger than the pointer size");
 
   setPointerSpec(AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth,
-                 UnstableRepr, ExternalState);
+                 UnstableRepr, ExternalState, AddrSpaceName);
   return Error::success();
 }
 
 Error DataLayout::parseSpecification(
-    StringRef Spec, SmallVectorImpl<unsigned> &NonIntegralAddressSpaces) {
+    StringRef Spec, SmallVectorImpl<unsigned> &NonIntegralAddressSpaces,
+    SmallDenseSet<StringRef, 8> &AddrSpaceNames) {
   // The "ni" specifier is the only two-character specifier. Handle it first.
   if (Spec.starts_with("ni")) {
     // ni:<address space>[:<address space>]...
@@ -499,7 +557,7 @@ Error DataLayout::parseSpecification(
     return parseAggregateSpec(Spec);
 
   if (Specifier == 'p')
-    return parsePointerSpec(Spec);
+    return parsePointerSpec(Spec, AddrSpaceNames);
 
   StringRef Rest = Spec.drop_front();
   switch (Specifier) {
@@ -616,7 +674,7 @@ Error DataLayout::parseSpecification(
 }
 
 Error DataLayout::parseLayoutString(StringRef LayoutString) {
-  StringRepresentation = std::string(LayoutString);
+  StringRepresentation = LayoutString.str();
 
   if (LayoutString.empty())
     return Error::success();
@@ -624,10 +682,12 @@ Error DataLayout::parseLayoutString(StringRef LayoutString) {
   // Split the data layout string into specifications separated by '-' and
   // parse each specification individually, updating internal data structures.
   SmallVector<unsigned, 8> NonIntegralAddressSpaces;
-  for (StringRef Spec : split(LayoutString, '-')) {
+  SmallDenseSet<StringRef, 8> AddessSpaceNames;
+  for (StringRef Spec : split(StringRepresentation, '-')) {
     if (Spec.empty())
       return createStringError("empty specification is not allowed");
-    if (Error Err = parseSpecification(Spec, NonIntegralAddressSpaces))
+    if (Error Err = parseSpecification(Spec, NonIntegralAddressSpaces,
+                                       AddessSpaceNames))
       return Err;
   }
   // Mark all address spaces that were qualified as non-integral now. This has
@@ -638,7 +698,8 @@ Error DataLayout::parseLayoutString(StringRef LayoutString) {
     // the spec for AS0, and we then update that to mark it non-integral.
     const PointerSpec &PS = getPointerSpec(AS);
     setPointerSpec(AS, PS.BitWidth, PS.ABIAlign, PS.PrefAlign, PS.IndexBitWidth,
-                   /*HasUnstableRepr=*/true, /*HasExternalState=*/false);
+                   /*HasUnstableRepr=*/true, /*HasExternalState=*/false,
+                   getAddressSpaceName(AS));
   }
 
   return Error::success();
@@ -687,12 +748,28 @@ DataLayout::getPointerSpec(uint32_t AddrSpace) const {
 void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth,
                                 Align ABIAlign, Align PrefAlign,
                                 uint32_t IndexBitWidth, bool HasUnstableRepr,
-                                bool HasExternalState) {
+                                bool HasExternalState,
+                                StringRef AddrSpaceName) {
   auto I = lower_bound(PointerSpecs, AddrSpace, LessPointerAddrSpace());
+  size_t AddrSpaceNameOffset = 0, AddrSpaceNameSize = AddrSpaceName.size();
+  if (!AddrSpaceName.empty()) {
+    // Validate that AddrSpaceName points to data within the
+    // StringRepresentation.
+    const char *RepStart = StringRepresentation.data();
+    const char *ASStart = AddrSpaceName.data();
+    [[maybe_unused]] const char *RepEnd =
+        RepStart + StringRepresentation.size();
+    [[maybe_unused]] const char *ASEnd = ASStart + AddrSpaceNameSize;
+
+    assert(RepStart <= ASStart && ASStart < RepEnd && RepStart < ASEnd &&
+           ASEnd <= RepEnd);
+    AddrSpaceNameOffset = std::distance(RepStart, ASStart);
+  }
   if (I == PointerSpecs.end() || I->AddrSpace != AddrSpace) {
     PointerSpecs.insert(I, PointerSpec{AddrSpace, BitWidth, ABIAlign, PrefAlign,
                                        IndexBitWidth, HasUnstableRepr,
-                                       HasExternalState});
+                                       HasExternalState, AddrSpaceNameOffset,
+                                       AddrSpaceNameSize});
   } else {
     I->BitWidth = BitWidth;
     I->ABIAlign = ABIAlign;
@@ -700,6 +777,8 @@ void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth,
     I->IndexBitWidth = IndexBitWidth;
     I->HasUnstableRepresentation = HasUnstableRepr;
     I->HasExternalState = HasExternalState;
+    I->AddrSpaceNameOffset = AddrSpaceNameOffset;
+    I->AddrSpaceNameSize = AddrSpaceNameSize;
   }
 }
 
@@ -747,6 +826,20 @@ Align DataLayout::getPointerABIAlignment(unsigned AS) const {
   return getPointerSpec(AS).ABIAlign;
 }
 
+StringRef DataLayout::getAddressSpaceName(unsigned AS) const {
+  const PointerSpec &PS = getPointerSpec(AS);
+  return PS.getAddrSpaceName(*this);
+}
+
+std::optional<unsigned> DataLayout::getNamedAddressSpace(StringRef Name) const {
+  auto II = llvm::find_if(PointerSpecs, [Name, this](const PointerSpec &PS) {
+    return PS.getAddrSpaceName(*this) == Name;
+  });
+  if (II != PointerSpecs.end())
+    return II->AddrSpace;
+  return std::nullopt;
+}
+
 Align DataLayout::getPointerPrefAlignment(unsigned AS) const {
   return getPointerSpec(AS).PrefAlign;
 }
diff --git a/llvm/test/Assembler/symbolic-addrspace-datalayout.ll b/llvm/test/Assembler/symbolic-addrspace-datalayout.ll
new file mode 100644
index 0000000000000..c7473b2662398
--- /dev/null
+++ b/llvm/test/Assembler/symbolic-addrspace-datalayout.ll
@@ -0,0 +1,52 @@
+;; Check support for printing and parsing of address space names specified in
+;; the datalayout.
+; RUN: split-file %s %t --leading-lines
+; RUN: llvm-as < %t/num-to-sym.ll | llvm-dis | FileCheck %t/num-to-sym.ll
+; RUN: llvm-as < %t/sym-to-sym.ll | llvm-dis | FileCheck %t/sym-to-sym.ll
+; RUN: not llvm-as < %t/invalid-name.ll 2>&1 | FileCheck %t/invalid-name.ll
+
+;--- num-to-sym.ll
+target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
+; CHECK: target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
+
+; CHECK: @str = private addrspace("global") constant [4 x i8] c"str\00"
+ at str = private addrspace(2) constant [4 x i8] c"str\00"
+
+define void @foo() {
+  ; CHECK: %alloca = alloca i32, align 4, addrspace("stack")
+  %alloca = alloca i32, addrspace(8)
+  ret void
+}
+
+; CHECK: define void @bar() addrspace("code")
+define void @bar() addrspace(11) {
+  ; CHECK: call addrspace("code") void @foo()
+  call addrspace(11) void @foo()
+  ret void
+}
+
+;--- sym-to-sym.ll
+target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
+; CHECK: target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
+
+; CHECK: @str = private addrspace("global") constant [4 x i8] c"str\00"
+ at str = private addrspace("global") constant [4 x i8] c"str\00"
+
+define void @foo() {
+  ; CHECK: %alloca = alloca i32, align 4, addrspace("stack")
+  %alloca = alloca i32, addrspace("stack")
+  ret void
+}
+
+; CHECK: define void @bar() addrspace("code")
+define void @bar() addrspace(11) {
+  ; CHECK: call addrspace("code") void @foo()
+  call addrspace("code") void @foo()
+  ret void
+}
+
+;--- invalid-name.ll
+target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
+; CHECK: error: invalid symbolic addrspace 'global3'
+ at str = private addrspace("global3") constant [4 x i8] c"str\00"
+
diff --git a/llvm/unittests/IR/DataLayoutTest.cpp b/llvm/unittests/IR/DataLayoutTest.cpp
index 9ca88141ca0eb..5bef6fcd20126 100644
--- a/llvm/unittests/IR/DataLayoutTest.cpp
+++ b/llvm/unittests/IR/DataLayoutTest.cpp
@@ -309,7 +309,8 @@ TEST(DataLayoutTest, ParseAggregateSpec) {
 TEST(DataLayout, ParsePointerSpec) {
   for (StringRef Str :
        {"p:16:8", "p:16:16:64", "p:32:64:64:32", "p0:32:64", "p42:64:32:32",
-        "p16777215:32:32:64:8", "p16777215:16777215:32768:32768:16777215"})
+        "p1(global):16:8", "p(generic):32:8", "p16777215:32:32:64:8",
+        "p16777215:16777215:32768:32768:16777215"})
     EXPECT_THAT_EXPECTED(DataLayout::parse(Str), Succeeded());
 
   for (StringRef Str :
@@ -422,6 +423,17 @@ TEST(DataLayout, ParsePointerSpec) {
         DataLayout::parse(Str),
         FailedWithMessage(
             "address space 0 cannot be unstable or have external state"));
+
+  // Duplicate address space names not allowed.
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("p1(foo):32:32-p2(foo):32:32"),
+      FailedWithMessage("address space name `foo` already used"));
+
+  // Predefined address space names not allowed.
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("p1(A):32:32"),
+      FailedWithMessage(
+          "Cannot use predefined address space names P/G/A in data layout"));
 }
 
 TEST(DataLayoutTest, ParseNativeIntegersSpec) {
@@ -576,6 +588,15 @@ TEST(DataLayout, GetPointerPrefAlignment) {
   }
 }
 
+TEST(DataLayout, AddressSpaceName) {
+  DataLayout DL =
+      cantFail(DataLayout::parse("p:16:32-p1(foo):16:32-p10(bar):16:16"));
+  EXPECT_EQ(DL.getAddressSpaceName(0), "");
+  EXPECT_EQ(DL.getAddressSpaceName(1), "foo");
+  EXPECT_EQ(DL.getAddressSpaceName(10), "bar");
+  EXPECT_EQ(DL.getAddressSpaceName(3), "");
+}
+
 TEST(DataLayout, IsNonIntegralAddressSpace) {
   const DataLayout Default;
   EXPECT_THAT(Default.getNonStandardAddressSpaces(), ::testing::SizeIs(0));
@@ -807,4 +828,15 @@ TEST(DataLayoutTest, VectorAlign) {
   EXPECT_EQ(Align(4 * 8), DL->getPrefTypeAlign(V8F32Ty));
 }
 
+TEST(DataLayoutTest, Equality) {
+  const char *Layout0 = "p00(global):32:8";
+  const char *Layout1 = "p0(global):32:8";
+  DataLayout DL0 = cantFail(DataLayout::parse(Layout0));
+  DataLayout DL1 = cantFail(DataLayout::parse(Layout1));
+
+  EXPECT_EQ(DL0.getStringRepresentation(), Layout0);
+  EXPECT_EQ(DL1.getStringRepresentation(), Layout1);
+  EXPECT_EQ(DL0, DL1);
+}
+
 } // anonymous namespace

>From 36f457e3f61c55226d95cf2ad8e35ed83538b1c4 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Thu, 4 Dec 2025 10:43:12 -0800
Subject: [PATCH 2/3] Review feedback: use std::string for AddrSpaceName

---
 llvm/include/llvm/IR/DataLayout.h | 13 ++-----
 llvm/lib/IR/DataLayout.cpp        | 64 +++++++++----------------------
 2 files changed, 23 insertions(+), 54 deletions(-)

diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 4b42c1f5a78a6..4d695e1e5c566 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -93,15 +93,10 @@ class DataLayout {
     /// of this would be CHERI capabilities where the validity bit is stored
     /// separately from the pointer address+bounds information.
     bool HasExternalState;
-    // Symbolic name of the address space. We can store a StringRef here
-    // directly (backed by StringRepresentation) but then the copy construtor
-    // for DataLayout has to be updated to redirect these StringRefs to the new
-    // copy of StringRepresentation. To avoid that, we store just the offset and
-    // size of the address space name within the StringRepresentation.
-    size_t AddrSpaceNameOffset;
-    size_t AddrSpaceNameSize;
-
-    StringRef getAddrSpaceName(const DataLayout &DL) const;
+    // Symbolic name of the address space.
+    std::string AddrSpaceName;
+
+    LLVM_ABI bool operator==(const PointerSpec &Other) const;
   };
 
   enum class FunctionPtrAlignType {
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index a48f548524da1..05aa6c29c0e50 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -147,10 +147,13 @@ bool DataLayout::PrimitiveSpec::operator==(const PrimitiveSpec &Other) const {
          PrefAlign == Other.PrefAlign;
 }
 
-StringRef
-DataLayout::PointerSpec::getAddrSpaceName(const DataLayout &DL) const {
-  return StringRef(DL.StringRepresentation)
-      .substr(AddrSpaceNameOffset, AddrSpaceNameSize);
+bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const {
+  return AddrSpace == Other.AddrSpace && BitWidth == Other.BitWidth &&
+         ABIAlign == Other.ABIAlign && PrefAlign == Other.PrefAlign &&
+         IndexBitWidth == Other.IndexBitWidth &&
+         HasUnstableRepresentation == Other.HasUnstableRepresentation &&
+         HasExternalState == Other.HasExternalState &&
+         AddrSpaceName == Other.AddrSpaceName;
 }
 
 namespace {
@@ -190,17 +193,14 @@ constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = {
     {128, Align::Constant<16>(), Align::Constant<16>()}, // v128:128:128
 };
 
-// Default pointer type specifications.
-constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = {
-    // p0:64:64:64:64
-    {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false, false, 0, 0},
-};
-
 DataLayout::DataLayout()
     : IntSpecs(ArrayRef(DefaultIntSpecs)),
       FloatSpecs(ArrayRef(DefaultFloatSpecs)),
-      VectorSpecs(ArrayRef(DefaultVectorSpecs)),
-      PointerSpecs(ArrayRef(DefaultPointerSpecs)) {}
+      VectorSpecs(ArrayRef(DefaultVectorSpecs)) {
+  // Default pointer type specifications.
+  setPointerSpec(0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false,
+                 false, "");
+}
 
 DataLayout::DataLayout(StringRef LayoutString) : DataLayout() {
   if (Error Err = parseLayoutString(LayoutString))
@@ -231,15 +231,6 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) {
 
 bool DataLayout::operator==(const DataLayout &Other) const {
   // NOTE: StringRepresentation might differ, it is not canonicalized.
-  auto IsPointerSpecEqual = [this, &Other](const PointerSpec &A,
-                                           const PointerSpec &B) {
-    return A.AddrSpace == B.AddrSpace && A.BitWidth == B.BitWidth &&
-           A.ABIAlign == B.ABIAlign && A.PrefAlign == B.PrefAlign &&
-           A.IndexBitWidth == B.IndexBitWidth &&
-           A.HasUnstableRepresentation == B.HasUnstableRepresentation &&
-           A.HasExternalState == B.HasExternalState &&
-           A.getAddrSpaceName(*this) == B.getAddrSpaceName(Other);
-  };
   return BigEndian == Other.BigEndian &&
          AllocaAddrSpace == Other.AllocaAddrSpace &&
          ProgramAddrSpace == Other.ProgramAddrSpace &&
@@ -250,9 +241,9 @@ bool DataLayout::operator==(const DataLayout &Other) const {
          ManglingMode == Other.ManglingMode &&
          LegalIntWidths == Other.LegalIntWidths && IntSpecs == Other.IntSpecs &&
          FloatSpecs == Other.FloatSpecs && VectorSpecs == Other.VectorSpecs &&
+         PointerSpecs == Other.PointerSpecs &&
          StructABIAlignment == Other.StructABIAlignment &&
-         StructPrefAlignment == Other.StructPrefAlignment &&
-         llvm::equal(PointerSpecs, Other.PointerSpecs, IsPointerSpecEqual);
+         StructPrefAlignment == Other.StructPrefAlignment;
 }
 
 Expected<DataLayout> DataLayout::parse(StringRef LayoutString) {
@@ -751,25 +742,10 @@ void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth,
                                 bool HasExternalState,
                                 StringRef AddrSpaceName) {
   auto I = lower_bound(PointerSpecs, AddrSpace, LessPointerAddrSpace());
-  size_t AddrSpaceNameOffset = 0, AddrSpaceNameSize = AddrSpaceName.size();
-  if (!AddrSpaceName.empty()) {
-    // Validate that AddrSpaceName points to data within the
-    // StringRepresentation.
-    const char *RepStart = StringRepresentation.data();
-    const char *ASStart = AddrSpaceName.data();
-    [[maybe_unused]] const char *RepEnd =
-        RepStart + StringRepresentation.size();
-    [[maybe_unused]] const char *ASEnd = ASStart + AddrSpaceNameSize;
-
-    assert(RepStart <= ASStart && ASStart < RepEnd && RepStart < ASEnd &&
-           ASEnd <= RepEnd);
-    AddrSpaceNameOffset = std::distance(RepStart, ASStart);
-  }
   if (I == PointerSpecs.end() || I->AddrSpace != AddrSpace) {
     PointerSpecs.insert(I, PointerSpec{AddrSpace, BitWidth, ABIAlign, PrefAlign,
                                        IndexBitWidth, HasUnstableRepr,
-                                       HasExternalState, AddrSpaceNameOffset,
-                                       AddrSpaceNameSize});
+                                       HasExternalState, AddrSpaceName.str()});
   } else {
     I->BitWidth = BitWidth;
     I->ABIAlign = ABIAlign;
@@ -777,8 +753,7 @@ void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth,
     I->IndexBitWidth = IndexBitWidth;
     I->HasUnstableRepresentation = HasUnstableRepr;
     I->HasExternalState = HasExternalState;
-    I->AddrSpaceNameOffset = AddrSpaceNameOffset;
-    I->AddrSpaceNameSize = AddrSpaceNameSize;
+    I->AddrSpaceName = AddrSpaceName.str();
   }
 }
 
@@ -827,13 +802,12 @@ Align DataLayout::getPointerABIAlignment(unsigned AS) const {
 }
 
 StringRef DataLayout::getAddressSpaceName(unsigned AS) const {
-  const PointerSpec &PS = getPointerSpec(AS);
-  return PS.getAddrSpaceName(*this);
+  return getPointerSpec(AS).AddrSpaceName;
 }
 
 std::optional<unsigned> DataLayout::getNamedAddressSpace(StringRef Name) const {
-  auto II = llvm::find_if(PointerSpecs, [Name, this](const PointerSpec &PS) {
-    return PS.getAddrSpaceName(*this) == Name;
+  auto II = llvm::find_if(PointerSpecs, [Name](const PointerSpec &PS) {
+    return PS.AddrSpaceName == Name;
   });
   if (II != PointerSpecs.end())
     return II->AddrSpace;

>From dc036b74988a6503a970d2205803bd770a8b7b40 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Mon, 8 Dec 2025 13:49:14 -0800
Subject: [PATCH 3/3] Update documentation and add option to print address
 space names

---
 llvm/docs/LangRef.rst                         |  8 ++++--
 llvm/lib/IR/AsmWriter.cpp                     |  9 ++++++-
 .../symbolic-addrspace-datalayout.ll          | 25 +++++++++++++++++--
 3 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 02865f8a29c67..840537f457cc2 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3331,7 +3331,7 @@ as follows:
 ``A<address space>``
     Specifies the address space of objects created by '``alloca``'.
     Defaults to the default address space of 0.
-``p[<flags>][<as>]:<size>:<abi>[:<pref>[:<idx>]]``
+``p[<flags>][<as>][(<name>)]:<size>:<abi>[:<pref>[:<idx>]]``
     This specifies the properties of a pointer in address space ``as``.
     The ``<size>`` parameter specifies the size of the bitwise representation.
     For :ref:`non-integral pointers <nointptrtype>` the representation size may
@@ -3350,7 +3350,11 @@ as follows:
     The optional ``<flags>`` are used to specify properties of pointers in this
     address space: the character ``u`` marks pointers as having an unstable
     representation, and ``e`` marks pointers having external state. See
-    :ref:`Non-Integral Pointer Types <nointptrtype>`.
+    :ref:`Non-Integral Pointer Types <nointptrtype>`. The ``<name>`` is an
+    optional name of that address space, surrounded by ``(`` and ``)``. If the
+    name is specified, it must be unique to that address space and cannot be
+    ``A``, ``G``, or ``P`` which are pre-defined names used to denote alloca,
+    global, and program address space respectively.
 
 ``i<size>:<abi>[:<pref>]``
     This specifies the alignment for an integer type of a given bit
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 10577fceee239..a2ac113877179 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -107,6 +107,11 @@ static cl::opt<bool> PreserveAssemblyUseListOrder(
     "preserve-ll-uselistorder", cl::Hidden, cl::init(false),
     cl::desc("Preserve use-list order when writing LLVM assembly."));
 
+static cl::opt<bool>
+    PrintSymbolicAddressSpace("print-sym-addr-space", cl::Hidden,
+                              cl::init(false),
+                              cl::desc("Print symbolic address space names"));
+
 // Make virtual table appear in this compilation unit.
 AssemblyAnnotationWriter::~AssemblyAnnotationWriter() = default;
 
@@ -638,7 +643,9 @@ static void printAddressSpace(const Module *M, unsigned AS, raw_ostream &OS,
   if (AS == 0 && !ForcePrint)
     return;
   OS << Prefix << "addrspace(";
-  StringRef ASName = M ? M->getDataLayout().getAddressSpaceName(AS) : "";
+  StringRef ASName = PrintSymbolicAddressSpace && M
+                         ? M->getDataLayout().getAddressSpaceName(AS)
+                         : "";
   if (!ASName.empty())
     OS << "\"" << ASName << "\"";
   else
diff --git a/llvm/test/Assembler/symbolic-addrspace-datalayout.ll b/llvm/test/Assembler/symbolic-addrspace-datalayout.ll
index c7473b2662398..683df47adb5f0 100644
--- a/llvm/test/Assembler/symbolic-addrspace-datalayout.ll
+++ b/llvm/test/Assembler/symbolic-addrspace-datalayout.ll
@@ -1,8 +1,9 @@
 ;; Check support for printing and parsing of address space names specified in
 ;; the datalayout.
 ; RUN: split-file %s %t --leading-lines
-; RUN: llvm-as < %t/num-to-sym.ll | llvm-dis | FileCheck %t/num-to-sym.ll
-; RUN: llvm-as < %t/sym-to-sym.ll | llvm-dis | FileCheck %t/sym-to-sym.ll
+; RUN: llvm-as < %t/num-to-sym.ll | llvm-dis --print-sym-addr-space=true  | FileCheck %t/num-to-sym.ll
+; RUN: llvm-as < %t/sym-to-sym.ll | llvm-dis --print-sym-addr-space=true  | FileCheck %t/sym-to-sym.ll
+; RUN: llvm-as < %t/sym-to-num.ll | llvm-dis --print-sym-addr-space=false | FileCheck %t/sym-to-num.ll
 ; RUN: not llvm-as < %t/invalid-name.ll 2>&1 | FileCheck %t/invalid-name.ll
 
 ;--- num-to-sym.ll
@@ -45,6 +46,26 @@ define void @bar() addrspace(11) {
   ret void
 }
 
+;--- sym-to-num.ll
+target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
+; CHECK: target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
+
+; CHECK: @str = private addrspace(2) constant [4 x i8] c"str\00"
+ at str = private addrspace("global") constant [4 x i8] c"str\00"
+
+define void @foo() {
+  ; CHECK: %alloca = alloca i32, align 4, addrspace(8)
+  %alloca = alloca i32, addrspace("stack")
+  ret void
+}
+
+; CHECK: define void @bar() addrspace(11)
+define void @bar() addrspace(11) {
+  ; CHECK: call addrspace(11) void @foo()
+  call addrspace("code") void @foo()
+  ret void
+}
+
 ;--- invalid-name.ll
 target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
 ; CHECK: error: invalid symbolic addrspace 'global3'



More information about the llvm-commits mailing list