[llvm] [LLVM][IR] Add support for address space names in DataLayout (PR #170559)
Rahul Joshi via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 13:50:10 PST 2025
https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/170559
>From 7ba46b85328a3473272c6bcfd26287e97da0bf2e Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Wed, 3 Dec 2025 13:31:17 -0800
Subject: [PATCH 1/3] [LLVM][IR] Add support for address space names in
DataLayout
---
llvm/include/llvm/IR/DataLayout.h | 26 +++-
llvm/lib/AsmParser/LLParser.cpp | 5 +-
llvm/lib/IR/AsmWriter.cpp | 71 ++++++----
llvm/lib/IR/DataLayout.cpp | 133 +++++++++++++++---
.../symbolic-addrspace-datalayout.ll | 52 +++++++
llvm/unittests/IR/DataLayoutTest.cpp | 34 ++++-
6 files changed, 265 insertions(+), 56 deletions(-)
create mode 100644 llvm/test/Assembler/symbolic-addrspace-datalayout.ll
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 54458201af0b3..4b42c1f5a78a6 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -21,6 +21,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -92,7 +93,15 @@ class DataLayout {
/// of this would be CHERI capabilities where the validity bit is stored
/// separately from the pointer address+bounds information.
bool HasExternalState;
- LLVM_ABI bool operator==(const PointerSpec &Other) const;
+ // Symbolic name of the address space. We can store a StringRef here
+ // directly (backed by StringRepresentation) but then the copy construtor
+ // for DataLayout has to be updated to redirect these StringRefs to the new
+ // copy of StringRepresentation. To avoid that, we store just the offset and
+ // size of the address space name within the StringRepresentation.
+ size_t AddrSpaceNameOffset;
+ size_t AddrSpaceNameSize;
+
+ StringRef getAddrSpaceName(const DataLayout &DL) const;
};
enum class FunctionPtrAlignType {
@@ -158,7 +167,8 @@ class DataLayout {
/// Sets or updates the specification for pointer in the given address space.
void setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign,
Align PrefAlign, uint32_t IndexBitWidth,
- bool HasUnstableRepr, bool HasExternalState);
+ bool HasUnstableRepr, bool HasExternalState,
+ StringRef AddrSpaceName);
/// Internal helper to get alignment for integer of given bitwidth.
LLVM_ABI Align getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const;
@@ -173,11 +183,13 @@ class DataLayout {
Error parseAggregateSpec(StringRef Spec);
/// Attempts to parse pointer specification ('p').
- Error parsePointerSpec(StringRef Spec);
+ Error parsePointerSpec(StringRef Spec,
+ SmallDenseSet<StringRef, 8> &AddrSpaceNames);
/// Attempts to parse a single specification.
Error parseSpecification(StringRef Spec,
- SmallVectorImpl<unsigned> &NonIntegralAddressSpaces);
+ SmallVectorImpl<unsigned> &NonIntegralAddressSpaces,
+ SmallDenseSet<StringRef, 8> &AddrSpaceNames);
/// Attempts to parse a data layout string.
Error parseLayoutString(StringRef LayoutString);
@@ -324,9 +336,13 @@ class DataLayout {
return false;
}
- /// Layout pointer alignment
+ /// Layout pointer alignment.
LLVM_ABI Align getPointerABIAlignment(unsigned AS) const;
+ LLVM_ABI StringRef getAddressSpaceName(unsigned AS) const;
+
+ LLVM_ABI std::optional<unsigned> getNamedAddressSpace(StringRef Name) const;
+
/// Return target's alignment for stack-based pointers
/// FIXME: The defaults need to be removed once all of
/// the backends/clients are updated.
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 2a0246074a462..a09ab4fc7828c 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1958,13 +1958,16 @@ bool LLParser::parseOptionalAddrSpace(unsigned &AddrSpace, unsigned DefaultAS) {
auto ParseAddrspaceValue = [&](unsigned &AddrSpace) -> bool {
if (Lex.getKind() == lltok::StringConstant) {
- auto AddrSpaceStr = Lex.getStrVal();
+ const std::string &AddrSpaceStr = Lex.getStrVal();
if (AddrSpaceStr == "A") {
AddrSpace = M->getDataLayout().getAllocaAddrSpace();
} else if (AddrSpaceStr == "G") {
AddrSpace = M->getDataLayout().getDefaultGlobalsAddressSpace();
} else if (AddrSpaceStr == "P") {
AddrSpace = M->getDataLayout().getProgramAddressSpace();
+ } else if (std::optional<unsigned> AS =
+ M->getDataLayout().getNamedAddressSpace(AddrSpaceStr)) {
+ AddrSpace = *AS;
} else {
return tokError("invalid symbolic addrspace '" + AddrSpaceStr + "'");
}
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 7932765db8359..10577fceee239 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -543,7 +543,8 @@ namespace {
class TypePrinting {
public:
- TypePrinting(const Module *M = nullptr) : DeferredM(M) {}
+ TypePrinting(const Module *M = nullptr)
+ : M(M), TypesIncorporated(M == nullptr) {}
TypePrinting(const TypePrinting &) = delete;
TypePrinting &operator=(const TypePrinting &) = delete;
@@ -563,8 +564,9 @@ class TypePrinting {
private:
void incorporateTypes();
- /// A module to process lazily when needed. Set to nullptr as soon as used.
- const Module *DeferredM;
+ /// A module to process lazily.
+ const Module *M;
+ bool TypesIncorporated;
TypeFinder NamedTypes;
@@ -605,11 +607,11 @@ bool TypePrinting::empty() {
}
void TypePrinting::incorporateTypes() {
- if (!DeferredM)
+ if (TypesIncorporated)
return;
- NamedTypes.run(*DeferredM, false);
- DeferredM = nullptr;
+ NamedTypes.run(*M, false);
+ TypesIncorporated = true;
// The list of struct types we got back includes all the struct types, split
// the unnamed ones out to a numbering and remove the anonymous structs.
@@ -630,6 +632,20 @@ void TypePrinting::incorporateTypes() {
NamedTypes.erase(NextToUse, NamedTypes.end());
}
+static void printAddressSpace(const Module *M, unsigned AS, raw_ostream &OS,
+ StringRef Prefix = " ", StringRef Suffix = "",
+ bool ForcePrint = false) {
+ if (AS == 0 && !ForcePrint)
+ return;
+ OS << Prefix << "addrspace(";
+ StringRef ASName = M ? M->getDataLayout().getAddressSpaceName(AS) : "";
+ if (!ASName.empty())
+ OS << "\"" << ASName << "\"";
+ else
+ OS << AS;
+ OS << ")" << Suffix;
+}
+
/// Write the specified type to the specified raw_ostream, making use of type
/// names or up references to shorten the type name where possible.
void TypePrinting::print(Type *Ty, raw_ostream &OS) {
@@ -686,8 +702,7 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
case Type::PointerTyID: {
PointerType *PTy = cast<PointerType>(Ty);
OS << "ptr";
- if (unsigned AddressSpace = PTy->getAddressSpace())
- OS << " addrspace(" << AddressSpace << ')';
+ printAddressSpace(M, PTy->getAddressSpace(), OS);
return;
}
case Type::ArrayTyID: {
@@ -3896,10 +3911,10 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
printThreadLocalModel(GV->getThreadLocalMode(), Out);
StringRef UA = getUnnamedAddrEncoding(GV->getUnnamedAddr());
if (!UA.empty())
- Out << UA << ' ';
+ Out << UA << ' ';
- if (unsigned AddressSpace = GV->getType()->getAddressSpace())
- Out << "addrspace(" << AddressSpace << ") ";
+ printAddressSpace(GV->getParent(), GV->getType()->getAddressSpace(), Out,
+ /*Prefix=*/"", /*Suffix=*/" ");
if (GV->isExternallyInitialized()) Out << "externally_initialized ";
Out << (GV->isConstant() ? "constant " : "global ");
TypePrinter.print(GV->getValueType(), Out);
@@ -4174,9 +4189,10 @@ void AssemblyWriter::printFunction(const Function *F) {
// a module with a non-zero program address space or if there is no valid
// Module* so that the file can be parsed without the datalayout string.
const Module *Mod = F->getParent();
- if (F->getAddressSpace() != 0 || !Mod ||
- Mod->getDataLayout().getProgramAddressSpace() != 0)
- Out << " addrspace(" << F->getAddressSpace() << ")";
+ bool ForcePrintAddressSpace =
+ !Mod || Mod->getDataLayout().getProgramAddressSpace() != 0;
+ printAddressSpace(Mod, F->getAddressSpace(), Out, /*Prefix=*/" ",
+ /*Suffix=*/"", ForcePrintAddressSpace);
if (Attrs.hasFnAttrs())
Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttrs());
if (F->hasSection()) {
@@ -4352,23 +4368,21 @@ void AssemblyWriter::printInfoComment(const Value &V, bool isMaterializable) {
static void maybePrintCallAddrSpace(const Value *Operand, const Instruction *I,
raw_ostream &Out) {
- // We print the address space of the call if it is non-zero.
if (Operand == nullptr) {
Out << " <cannot get addrspace!>";
return;
}
+
+ // We print the address space of the call if it is non-zero.
+ // We also print it if it is zero but not equal to the program address space
+ // or if we can't find a valid Module* to make it possible to parse
+ // the resulting file even without a datalayout string.
unsigned CallAddrSpace = Operand->getType()->getPointerAddressSpace();
- bool PrintAddrSpace = CallAddrSpace != 0;
- if (!PrintAddrSpace) {
- const Module *Mod = getModuleFromVal(I);
- // We also print it if it is zero but not equal to the program address space
- // or if we can't find a valid Module* to make it possible to parse
- // the resulting file even without a datalayout string.
- if (!Mod || Mod->getDataLayout().getProgramAddressSpace() != 0)
- PrintAddrSpace = true;
- }
- if (PrintAddrSpace)
- Out << " addrspace(" << CallAddrSpace << ")";
+ const Module *Mod = getModuleFromVal(I);
+ bool ForcePrintAddrSpace =
+ !Mod || Mod->getDataLayout().getProgramAddressSpace() != 0;
+ printAddressSpace(Mod, CallAddrSpace, Out, /*Prefix=*/" ", /*Suffix=*/"",
+ ForcePrintAddrSpace);
}
// This member is called for each Instruction in a function..
@@ -4735,9 +4749,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ", align " << A->value();
}
- unsigned AddrSpace = AI->getAddressSpace();
- if (AddrSpace != 0)
- Out << ", addrspace(" << AddrSpace << ')';
+ printAddressSpace(AI->getModule(), AI->getAddressSpace(), Out,
+ /*Prefix=*/", ");
} else if (isa<CastInst>(I)) {
if (Operand) {
Out << ' ';
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index 49e1f898ca594..a48f548524da1 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -147,12 +147,10 @@ bool DataLayout::PrimitiveSpec::operator==(const PrimitiveSpec &Other) const {
PrefAlign == Other.PrefAlign;
}
-bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const {
- return AddrSpace == Other.AddrSpace && BitWidth == Other.BitWidth &&
- ABIAlign == Other.ABIAlign && PrefAlign == Other.PrefAlign &&
- IndexBitWidth == Other.IndexBitWidth &&
- HasUnstableRepresentation == Other.HasUnstableRepresentation &&
- HasExternalState == Other.HasExternalState;
+StringRef
+DataLayout::PointerSpec::getAddrSpaceName(const DataLayout &DL) const {
+ return StringRef(DL.StringRepresentation)
+ .substr(AddrSpaceNameOffset, AddrSpaceNameSize);
}
namespace {
@@ -195,7 +193,7 @@ constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = {
// Default pointer type specifications.
constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = {
// p0:64:64:64:64
- {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false, false},
+ {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false, false, 0, 0},
};
DataLayout::DataLayout()
@@ -233,6 +231,15 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) {
bool DataLayout::operator==(const DataLayout &Other) const {
// NOTE: StringRepresentation might differ, it is not canonicalized.
+ auto IsPointerSpecEqual = [this, &Other](const PointerSpec &A,
+ const PointerSpec &B) {
+ return A.AddrSpace == B.AddrSpace && A.BitWidth == B.BitWidth &&
+ A.ABIAlign == B.ABIAlign && A.PrefAlign == B.PrefAlign &&
+ A.IndexBitWidth == B.IndexBitWidth &&
+ A.HasUnstableRepresentation == B.HasUnstableRepresentation &&
+ A.HasExternalState == B.HasExternalState &&
+ A.getAddrSpaceName(*this) == B.getAddrSpaceName(Other);
+ };
return BigEndian == Other.BigEndian &&
AllocaAddrSpace == Other.AllocaAddrSpace &&
ProgramAddrSpace == Other.ProgramAddrSpace &&
@@ -243,9 +250,9 @@ bool DataLayout::operator==(const DataLayout &Other) const {
ManglingMode == Other.ManglingMode &&
LegalIntWidths == Other.LegalIntWidths && IntSpecs == Other.IntSpecs &&
FloatSpecs == Other.FloatSpecs && VectorSpecs == Other.VectorSpecs &&
- PointerSpecs == Other.PointerSpecs &&
StructABIAlignment == Other.StructABIAlignment &&
- StructPrefAlignment == Other.StructPrefAlignment;
+ StructPrefAlignment == Other.StructPrefAlignment &&
+ llvm::equal(PointerSpecs, Other.PointerSpecs, IsPointerSpecEqual);
}
Expected<DataLayout> DataLayout::parse(StringRef LayoutString) {
@@ -271,6 +278,48 @@ static Error parseAddrSpace(StringRef Str, unsigned &AddrSpace) {
return Error::success();
}
+/// Attempts to parse an address space component of a specification allowing
+/// name to be specified as well. The input is expected to be of the form
+/// <number> '(' name ' )', with the name otional and the number is optional as
+/// well.
+static Error parseAddrSpaceAndName(StringRef Str, unsigned &AddrSpace,
+ StringRef &AddrSpaceName) {
+ if (Str.empty())
+ return createStringError("address space component cannot be empty");
+
+ if (isDigit(Str.front())) {
+ if (Str.consumeInteger(10, AddrSpace) || !isUInt<24>(AddrSpace))
+ return createStringError("address space must be a 24-bit integer");
+ }
+
+ if (Str.empty())
+ return Error::success();
+
+ if (Str.front() != '(')
+ return createStringError("address space must be a 24-bit integer");
+
+ // Expect atleast one character in between the ( and ).
+ if (Str.back() != ')' || Str.size() == 2)
+ return createStringError("Expected `( address space name )`");
+
+ AddrSpaceName = Str.drop_front().drop_back();
+ // TODO: Do we need any additional verification for address space name? Like
+ // should be a valid identifier of some sort? Its not strictly needed.
+
+ // LLVM's assembly parser used names "P", "G" and "A" to represent the
+ // program, default global, and alloca address space. This mapping is not 1:1
+ // in the sense that all of them can map to the same numberic address space.
+ // Diallow using these predefined symbolic address space names as address
+ // space names specified in the data layout.
+ if (AddrSpaceName.size() == 1) {
+ char C = AddrSpaceName.front();
+ if (C == 'P' || C == 'G' || C == 'A')
+ return createStringError(
+ "Cannot use predefined address space names P/G/A in data layout");
+ }
+ return Error::success();
+}
+
/// Attempts to parse a size component of a specification.
static Error parseSize(StringRef Str, unsigned &BitWidth,
StringRef Name = "size") {
@@ -395,7 +444,8 @@ Error DataLayout::parseAggregateSpec(StringRef Spec) {
return Error::success();
}
-Error DataLayout::parsePointerSpec(StringRef Spec) {
+Error DataLayout::parsePointerSpec(
+ StringRef Spec, SmallDenseSet<StringRef, 8> &AddrSpaceNames) {
// p[<n>]:<size>:<abi>[:<pref>[:<idx>]]
SmallVector<StringRef, 5> Components;
assert(Spec.front() == 'p');
@@ -408,6 +458,7 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
unsigned AddrSpace = 0;
bool ExternalState = false;
bool UnstableRepr = false;
+ StringRef AddrSpaceName;
StringRef AddrSpaceStr = Components[0];
while (!AddrSpaceStr.empty()) {
char C = AddrSpaceStr.front();
@@ -424,12 +475,18 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
AddrSpaceStr = AddrSpaceStr.drop_front(1);
}
if (!AddrSpaceStr.empty())
- if (Error Err = parseAddrSpace(AddrSpaceStr, AddrSpace))
+ if (Error Err =
+ parseAddrSpaceAndName(AddrSpaceStr, AddrSpace, AddrSpaceName))
return Err; // Failed to parse the remaining characters as a number
if (AddrSpace == 0 && (ExternalState || UnstableRepr))
return createStringError(
"address space 0 cannot be unstable or have external state");
+ // Check for duplicate address space names.
+ if (!AddrSpaceName.empty() && !AddrSpaceNames.insert(AddrSpaceName).second)
+ return createStringError("address space name `" + AddrSpaceName +
+ "` already used");
+
// Size. Required, cannot be zero.
unsigned BitWidth;
if (Error Err = parseSize(Components[1], BitWidth, "pointer size"))
@@ -462,12 +519,13 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
"index size cannot be larger than the pointer size");
setPointerSpec(AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth,
- UnstableRepr, ExternalState);
+ UnstableRepr, ExternalState, AddrSpaceName);
return Error::success();
}
Error DataLayout::parseSpecification(
- StringRef Spec, SmallVectorImpl<unsigned> &NonIntegralAddressSpaces) {
+ StringRef Spec, SmallVectorImpl<unsigned> &NonIntegralAddressSpaces,
+ SmallDenseSet<StringRef, 8> &AddrSpaceNames) {
// The "ni" specifier is the only two-character specifier. Handle it first.
if (Spec.starts_with("ni")) {
// ni:<address space>[:<address space>]...
@@ -499,7 +557,7 @@ Error DataLayout::parseSpecification(
return parseAggregateSpec(Spec);
if (Specifier == 'p')
- return parsePointerSpec(Spec);
+ return parsePointerSpec(Spec, AddrSpaceNames);
StringRef Rest = Spec.drop_front();
switch (Specifier) {
@@ -616,7 +674,7 @@ Error DataLayout::parseSpecification(
}
Error DataLayout::parseLayoutString(StringRef LayoutString) {
- StringRepresentation = std::string(LayoutString);
+ StringRepresentation = LayoutString.str();
if (LayoutString.empty())
return Error::success();
@@ -624,10 +682,12 @@ Error DataLayout::parseLayoutString(StringRef LayoutString) {
// Split the data layout string into specifications separated by '-' and
// parse each specification individually, updating internal data structures.
SmallVector<unsigned, 8> NonIntegralAddressSpaces;
- for (StringRef Spec : split(LayoutString, '-')) {
+ SmallDenseSet<StringRef, 8> AddessSpaceNames;
+ for (StringRef Spec : split(StringRepresentation, '-')) {
if (Spec.empty())
return createStringError("empty specification is not allowed");
- if (Error Err = parseSpecification(Spec, NonIntegralAddressSpaces))
+ if (Error Err = parseSpecification(Spec, NonIntegralAddressSpaces,
+ AddessSpaceNames))
return Err;
}
// Mark all address spaces that were qualified as non-integral now. This has
@@ -638,7 +698,8 @@ Error DataLayout::parseLayoutString(StringRef LayoutString) {
// the spec for AS0, and we then update that to mark it non-integral.
const PointerSpec &PS = getPointerSpec(AS);
setPointerSpec(AS, PS.BitWidth, PS.ABIAlign, PS.PrefAlign, PS.IndexBitWidth,
- /*HasUnstableRepr=*/true, /*HasExternalState=*/false);
+ /*HasUnstableRepr=*/true, /*HasExternalState=*/false,
+ getAddressSpaceName(AS));
}
return Error::success();
@@ -687,12 +748,28 @@ DataLayout::getPointerSpec(uint32_t AddrSpace) const {
void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth,
Align ABIAlign, Align PrefAlign,
uint32_t IndexBitWidth, bool HasUnstableRepr,
- bool HasExternalState) {
+ bool HasExternalState,
+ StringRef AddrSpaceName) {
auto I = lower_bound(PointerSpecs, AddrSpace, LessPointerAddrSpace());
+ size_t AddrSpaceNameOffset = 0, AddrSpaceNameSize = AddrSpaceName.size();
+ if (!AddrSpaceName.empty()) {
+ // Validate that AddrSpaceName points to data within the
+ // StringRepresentation.
+ const char *RepStart = StringRepresentation.data();
+ const char *ASStart = AddrSpaceName.data();
+ [[maybe_unused]] const char *RepEnd =
+ RepStart + StringRepresentation.size();
+ [[maybe_unused]] const char *ASEnd = ASStart + AddrSpaceNameSize;
+
+ assert(RepStart <= ASStart && ASStart < RepEnd && RepStart < ASEnd &&
+ ASEnd <= RepEnd);
+ AddrSpaceNameOffset = std::distance(RepStart, ASStart);
+ }
if (I == PointerSpecs.end() || I->AddrSpace != AddrSpace) {
PointerSpecs.insert(I, PointerSpec{AddrSpace, BitWidth, ABIAlign, PrefAlign,
IndexBitWidth, HasUnstableRepr,
- HasExternalState});
+ HasExternalState, AddrSpaceNameOffset,
+ AddrSpaceNameSize});
} else {
I->BitWidth = BitWidth;
I->ABIAlign = ABIAlign;
@@ -700,6 +777,8 @@ void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth,
I->IndexBitWidth = IndexBitWidth;
I->HasUnstableRepresentation = HasUnstableRepr;
I->HasExternalState = HasExternalState;
+ I->AddrSpaceNameOffset = AddrSpaceNameOffset;
+ I->AddrSpaceNameSize = AddrSpaceNameSize;
}
}
@@ -747,6 +826,20 @@ Align DataLayout::getPointerABIAlignment(unsigned AS) const {
return getPointerSpec(AS).ABIAlign;
}
+StringRef DataLayout::getAddressSpaceName(unsigned AS) const {
+ const PointerSpec &PS = getPointerSpec(AS);
+ return PS.getAddrSpaceName(*this);
+}
+
+std::optional<unsigned> DataLayout::getNamedAddressSpace(StringRef Name) const {
+ auto II = llvm::find_if(PointerSpecs, [Name, this](const PointerSpec &PS) {
+ return PS.getAddrSpaceName(*this) == Name;
+ });
+ if (II != PointerSpecs.end())
+ return II->AddrSpace;
+ return std::nullopt;
+}
+
Align DataLayout::getPointerPrefAlignment(unsigned AS) const {
return getPointerSpec(AS).PrefAlign;
}
diff --git a/llvm/test/Assembler/symbolic-addrspace-datalayout.ll b/llvm/test/Assembler/symbolic-addrspace-datalayout.ll
new file mode 100644
index 0000000000000..c7473b2662398
--- /dev/null
+++ b/llvm/test/Assembler/symbolic-addrspace-datalayout.ll
@@ -0,0 +1,52 @@
+;; Check support for printing and parsing of address space names specified in
+;; the datalayout.
+; RUN: split-file %s %t --leading-lines
+; RUN: llvm-as < %t/num-to-sym.ll | llvm-dis | FileCheck %t/num-to-sym.ll
+; RUN: llvm-as < %t/sym-to-sym.ll | llvm-dis | FileCheck %t/sym-to-sym.ll
+; RUN: not llvm-as < %t/invalid-name.ll 2>&1 | FileCheck %t/invalid-name.ll
+
+;--- num-to-sym.ll
+target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
+; CHECK: target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
+
+; CHECK: @str = private addrspace("global") constant [4 x i8] c"str\00"
+ at str = private addrspace(2) constant [4 x i8] c"str\00"
+
+define void @foo() {
+ ; CHECK: %alloca = alloca i32, align 4, addrspace("stack")
+ %alloca = alloca i32, addrspace(8)
+ ret void
+}
+
+; CHECK: define void @bar() addrspace("code")
+define void @bar() addrspace(11) {
+ ; CHECK: call addrspace("code") void @foo()
+ call addrspace(11) void @foo()
+ ret void
+}
+
+;--- sym-to-sym.ll
+target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
+; CHECK: target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
+
+; CHECK: @str = private addrspace("global") constant [4 x i8] c"str\00"
+ at str = private addrspace("global") constant [4 x i8] c"str\00"
+
+define void @foo() {
+ ; CHECK: %alloca = alloca i32, align 4, addrspace("stack")
+ %alloca = alloca i32, addrspace("stack")
+ ret void
+}
+
+; CHECK: define void @bar() addrspace("code")
+define void @bar() addrspace(11) {
+ ; CHECK: call addrspace("code") void @foo()
+ call addrspace("code") void @foo()
+ ret void
+}
+
+;--- invalid-name.ll
+target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
+; CHECK: error: invalid symbolic addrspace 'global3'
+ at str = private addrspace("global3") constant [4 x i8] c"str\00"
+
diff --git a/llvm/unittests/IR/DataLayoutTest.cpp b/llvm/unittests/IR/DataLayoutTest.cpp
index 9ca88141ca0eb..5bef6fcd20126 100644
--- a/llvm/unittests/IR/DataLayoutTest.cpp
+++ b/llvm/unittests/IR/DataLayoutTest.cpp
@@ -309,7 +309,8 @@ TEST(DataLayoutTest, ParseAggregateSpec) {
TEST(DataLayout, ParsePointerSpec) {
for (StringRef Str :
{"p:16:8", "p:16:16:64", "p:32:64:64:32", "p0:32:64", "p42:64:32:32",
- "p16777215:32:32:64:8", "p16777215:16777215:32768:32768:16777215"})
+ "p1(global):16:8", "p(generic):32:8", "p16777215:32:32:64:8",
+ "p16777215:16777215:32768:32768:16777215"})
EXPECT_THAT_EXPECTED(DataLayout::parse(Str), Succeeded());
for (StringRef Str :
@@ -422,6 +423,17 @@ TEST(DataLayout, ParsePointerSpec) {
DataLayout::parse(Str),
FailedWithMessage(
"address space 0 cannot be unstable or have external state"));
+
+ // Duplicate address space names not allowed.
+ EXPECT_THAT_EXPECTED(
+ DataLayout::parse("p1(foo):32:32-p2(foo):32:32"),
+ FailedWithMessage("address space name `foo` already used"));
+
+ // Predefined address space names not allowed.
+ EXPECT_THAT_EXPECTED(
+ DataLayout::parse("p1(A):32:32"),
+ FailedWithMessage(
+ "Cannot use predefined address space names P/G/A in data layout"));
}
TEST(DataLayoutTest, ParseNativeIntegersSpec) {
@@ -576,6 +588,15 @@ TEST(DataLayout, GetPointerPrefAlignment) {
}
}
+TEST(DataLayout, AddressSpaceName) {
+ DataLayout DL =
+ cantFail(DataLayout::parse("p:16:32-p1(foo):16:32-p10(bar):16:16"));
+ EXPECT_EQ(DL.getAddressSpaceName(0), "");
+ EXPECT_EQ(DL.getAddressSpaceName(1), "foo");
+ EXPECT_EQ(DL.getAddressSpaceName(10), "bar");
+ EXPECT_EQ(DL.getAddressSpaceName(3), "");
+}
+
TEST(DataLayout, IsNonIntegralAddressSpace) {
const DataLayout Default;
EXPECT_THAT(Default.getNonStandardAddressSpaces(), ::testing::SizeIs(0));
@@ -807,4 +828,15 @@ TEST(DataLayoutTest, VectorAlign) {
EXPECT_EQ(Align(4 * 8), DL->getPrefTypeAlign(V8F32Ty));
}
+TEST(DataLayoutTest, Equality) {
+ const char *Layout0 = "p00(global):32:8";
+ const char *Layout1 = "p0(global):32:8";
+ DataLayout DL0 = cantFail(DataLayout::parse(Layout0));
+ DataLayout DL1 = cantFail(DataLayout::parse(Layout1));
+
+ EXPECT_EQ(DL0.getStringRepresentation(), Layout0);
+ EXPECT_EQ(DL1.getStringRepresentation(), Layout1);
+ EXPECT_EQ(DL0, DL1);
+}
+
} // anonymous namespace
>From 36f457e3f61c55226d95cf2ad8e35ed83538b1c4 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Thu, 4 Dec 2025 10:43:12 -0800
Subject: [PATCH 2/3] Review feedback: use std::string for AddrSpaceName
---
llvm/include/llvm/IR/DataLayout.h | 13 ++-----
llvm/lib/IR/DataLayout.cpp | 64 +++++++++----------------------
2 files changed, 23 insertions(+), 54 deletions(-)
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 4b42c1f5a78a6..4d695e1e5c566 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -93,15 +93,10 @@ class DataLayout {
/// of this would be CHERI capabilities where the validity bit is stored
/// separately from the pointer address+bounds information.
bool HasExternalState;
- // Symbolic name of the address space. We can store a StringRef here
- // directly (backed by StringRepresentation) but then the copy construtor
- // for DataLayout has to be updated to redirect these StringRefs to the new
- // copy of StringRepresentation. To avoid that, we store just the offset and
- // size of the address space name within the StringRepresentation.
- size_t AddrSpaceNameOffset;
- size_t AddrSpaceNameSize;
-
- StringRef getAddrSpaceName(const DataLayout &DL) const;
+ // Symbolic name of the address space.
+ std::string AddrSpaceName;
+
+ LLVM_ABI bool operator==(const PointerSpec &Other) const;
};
enum class FunctionPtrAlignType {
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index a48f548524da1..05aa6c29c0e50 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -147,10 +147,13 @@ bool DataLayout::PrimitiveSpec::operator==(const PrimitiveSpec &Other) const {
PrefAlign == Other.PrefAlign;
}
-StringRef
-DataLayout::PointerSpec::getAddrSpaceName(const DataLayout &DL) const {
- return StringRef(DL.StringRepresentation)
- .substr(AddrSpaceNameOffset, AddrSpaceNameSize);
+bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const {
+ return AddrSpace == Other.AddrSpace && BitWidth == Other.BitWidth &&
+ ABIAlign == Other.ABIAlign && PrefAlign == Other.PrefAlign &&
+ IndexBitWidth == Other.IndexBitWidth &&
+ HasUnstableRepresentation == Other.HasUnstableRepresentation &&
+ HasExternalState == Other.HasExternalState &&
+ AddrSpaceName == Other.AddrSpaceName;
}
namespace {
@@ -190,17 +193,14 @@ constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = {
{128, Align::Constant<16>(), Align::Constant<16>()}, // v128:128:128
};
-// Default pointer type specifications.
-constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = {
- // p0:64:64:64:64
- {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false, false, 0, 0},
-};
-
DataLayout::DataLayout()
: IntSpecs(ArrayRef(DefaultIntSpecs)),
FloatSpecs(ArrayRef(DefaultFloatSpecs)),
- VectorSpecs(ArrayRef(DefaultVectorSpecs)),
- PointerSpecs(ArrayRef(DefaultPointerSpecs)) {}
+ VectorSpecs(ArrayRef(DefaultVectorSpecs)) {
+ // Default pointer type specifications.
+ setPointerSpec(0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false,
+ false, "");
+}
DataLayout::DataLayout(StringRef LayoutString) : DataLayout() {
if (Error Err = parseLayoutString(LayoutString))
@@ -231,15 +231,6 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) {
bool DataLayout::operator==(const DataLayout &Other) const {
// NOTE: StringRepresentation might differ, it is not canonicalized.
- auto IsPointerSpecEqual = [this, &Other](const PointerSpec &A,
- const PointerSpec &B) {
- return A.AddrSpace == B.AddrSpace && A.BitWidth == B.BitWidth &&
- A.ABIAlign == B.ABIAlign && A.PrefAlign == B.PrefAlign &&
- A.IndexBitWidth == B.IndexBitWidth &&
- A.HasUnstableRepresentation == B.HasUnstableRepresentation &&
- A.HasExternalState == B.HasExternalState &&
- A.getAddrSpaceName(*this) == B.getAddrSpaceName(Other);
- };
return BigEndian == Other.BigEndian &&
AllocaAddrSpace == Other.AllocaAddrSpace &&
ProgramAddrSpace == Other.ProgramAddrSpace &&
@@ -250,9 +241,9 @@ bool DataLayout::operator==(const DataLayout &Other) const {
ManglingMode == Other.ManglingMode &&
LegalIntWidths == Other.LegalIntWidths && IntSpecs == Other.IntSpecs &&
FloatSpecs == Other.FloatSpecs && VectorSpecs == Other.VectorSpecs &&
+ PointerSpecs == Other.PointerSpecs &&
StructABIAlignment == Other.StructABIAlignment &&
- StructPrefAlignment == Other.StructPrefAlignment &&
- llvm::equal(PointerSpecs, Other.PointerSpecs, IsPointerSpecEqual);
+ StructPrefAlignment == Other.StructPrefAlignment;
}
Expected<DataLayout> DataLayout::parse(StringRef LayoutString) {
@@ -751,25 +742,10 @@ void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth,
bool HasExternalState,
StringRef AddrSpaceName) {
auto I = lower_bound(PointerSpecs, AddrSpace, LessPointerAddrSpace());
- size_t AddrSpaceNameOffset = 0, AddrSpaceNameSize = AddrSpaceName.size();
- if (!AddrSpaceName.empty()) {
- // Validate that AddrSpaceName points to data within the
- // StringRepresentation.
- const char *RepStart = StringRepresentation.data();
- const char *ASStart = AddrSpaceName.data();
- [[maybe_unused]] const char *RepEnd =
- RepStart + StringRepresentation.size();
- [[maybe_unused]] const char *ASEnd = ASStart + AddrSpaceNameSize;
-
- assert(RepStart <= ASStart && ASStart < RepEnd && RepStart < ASEnd &&
- ASEnd <= RepEnd);
- AddrSpaceNameOffset = std::distance(RepStart, ASStart);
- }
if (I == PointerSpecs.end() || I->AddrSpace != AddrSpace) {
PointerSpecs.insert(I, PointerSpec{AddrSpace, BitWidth, ABIAlign, PrefAlign,
IndexBitWidth, HasUnstableRepr,
- HasExternalState, AddrSpaceNameOffset,
- AddrSpaceNameSize});
+ HasExternalState, AddrSpaceName.str()});
} else {
I->BitWidth = BitWidth;
I->ABIAlign = ABIAlign;
@@ -777,8 +753,7 @@ void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth,
I->IndexBitWidth = IndexBitWidth;
I->HasUnstableRepresentation = HasUnstableRepr;
I->HasExternalState = HasExternalState;
- I->AddrSpaceNameOffset = AddrSpaceNameOffset;
- I->AddrSpaceNameSize = AddrSpaceNameSize;
+ I->AddrSpaceName = AddrSpaceName.str();
}
}
@@ -827,13 +802,12 @@ Align DataLayout::getPointerABIAlignment(unsigned AS) const {
}
StringRef DataLayout::getAddressSpaceName(unsigned AS) const {
- const PointerSpec &PS = getPointerSpec(AS);
- return PS.getAddrSpaceName(*this);
+ return getPointerSpec(AS).AddrSpaceName;
}
std::optional<unsigned> DataLayout::getNamedAddressSpace(StringRef Name) const {
- auto II = llvm::find_if(PointerSpecs, [Name, this](const PointerSpec &PS) {
- return PS.getAddrSpaceName(*this) == Name;
+ auto II = llvm::find_if(PointerSpecs, [Name](const PointerSpec &PS) {
+ return PS.AddrSpaceName == Name;
});
if (II != PointerSpecs.end())
return II->AddrSpace;
>From dc036b74988a6503a970d2205803bd770a8b7b40 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Mon, 8 Dec 2025 13:49:14 -0800
Subject: [PATCH 3/3] Update documentation and add option to print address
space names
---
llvm/docs/LangRef.rst | 8 ++++--
llvm/lib/IR/AsmWriter.cpp | 9 ++++++-
.../symbolic-addrspace-datalayout.ll | 25 +++++++++++++++++--
3 files changed, 37 insertions(+), 5 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 02865f8a29c67..840537f457cc2 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3331,7 +3331,7 @@ as follows:
``A<address space>``
Specifies the address space of objects created by '``alloca``'.
Defaults to the default address space of 0.
-``p[<flags>][<as>]:<size>:<abi>[:<pref>[:<idx>]]``
+``p[<flags>][<as>][(<name>)]:<size>:<abi>[:<pref>[:<idx>]]``
This specifies the properties of a pointer in address space ``as``.
The ``<size>`` parameter specifies the size of the bitwise representation.
For :ref:`non-integral pointers <nointptrtype>` the representation size may
@@ -3350,7 +3350,11 @@ as follows:
The optional ``<flags>`` are used to specify properties of pointers in this
address space: the character ``u`` marks pointers as having an unstable
representation, and ``e`` marks pointers having external state. See
- :ref:`Non-Integral Pointer Types <nointptrtype>`.
+ :ref:`Non-Integral Pointer Types <nointptrtype>`. The ``<name>`` is an
+ optional name of that address space, surrounded by ``(`` and ``)``. If the
+ name is specified, it must be unique to that address space and cannot be
+ ``A``, ``G``, or ``P`` which are pre-defined names used to denote alloca,
+ global, and program address space respectively.
``i<size>:<abi>[:<pref>]``
This specifies the alignment for an integer type of a given bit
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 10577fceee239..a2ac113877179 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -107,6 +107,11 @@ static cl::opt<bool> PreserveAssemblyUseListOrder(
"preserve-ll-uselistorder", cl::Hidden, cl::init(false),
cl::desc("Preserve use-list order when writing LLVM assembly."));
+static cl::opt<bool>
+ PrintSymbolicAddressSpace("print-sym-addr-space", cl::Hidden,
+ cl::init(false),
+ cl::desc("Print symbolic address space names"));
+
// Make virtual table appear in this compilation unit.
AssemblyAnnotationWriter::~AssemblyAnnotationWriter() = default;
@@ -638,7 +643,9 @@ static void printAddressSpace(const Module *M, unsigned AS, raw_ostream &OS,
if (AS == 0 && !ForcePrint)
return;
OS << Prefix << "addrspace(";
- StringRef ASName = M ? M->getDataLayout().getAddressSpaceName(AS) : "";
+ StringRef ASName = PrintSymbolicAddressSpace && M
+ ? M->getDataLayout().getAddressSpaceName(AS)
+ : "";
if (!ASName.empty())
OS << "\"" << ASName << "\"";
else
diff --git a/llvm/test/Assembler/symbolic-addrspace-datalayout.ll b/llvm/test/Assembler/symbolic-addrspace-datalayout.ll
index c7473b2662398..683df47adb5f0 100644
--- a/llvm/test/Assembler/symbolic-addrspace-datalayout.ll
+++ b/llvm/test/Assembler/symbolic-addrspace-datalayout.ll
@@ -1,8 +1,9 @@
;; Check support for printing and parsing of address space names specified in
;; the datalayout.
; RUN: split-file %s %t --leading-lines
-; RUN: llvm-as < %t/num-to-sym.ll | llvm-dis | FileCheck %t/num-to-sym.ll
-; RUN: llvm-as < %t/sym-to-sym.ll | llvm-dis | FileCheck %t/sym-to-sym.ll
+; RUN: llvm-as < %t/num-to-sym.ll | llvm-dis --print-sym-addr-space=true | FileCheck %t/num-to-sym.ll
+; RUN: llvm-as < %t/sym-to-sym.ll | llvm-dis --print-sym-addr-space=true | FileCheck %t/sym-to-sym.ll
+; RUN: llvm-as < %t/sym-to-num.ll | llvm-dis --print-sym-addr-space=false | FileCheck %t/sym-to-num.ll
; RUN: not llvm-as < %t/invalid-name.ll 2>&1 | FileCheck %t/invalid-name.ll
;--- num-to-sym.ll
@@ -45,6 +46,26 @@ define void @bar() addrspace(11) {
ret void
}
+;--- sym-to-num.ll
+target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
+; CHECK: target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
+
+; CHECK: @str = private addrspace(2) constant [4 x i8] c"str\00"
+ at str = private addrspace("global") constant [4 x i8] c"str\00"
+
+define void @foo() {
+ ; CHECK: %alloca = alloca i32, align 4, addrspace(8)
+ %alloca = alloca i32, addrspace("stack")
+ ret void
+}
+
+; CHECK: define void @bar() addrspace(11)
+define void @bar() addrspace(11) {
+ ; CHECK: call addrspace(11) void @foo()
+ call addrspace("code") void @foo()
+ ret void
+}
+
;--- invalid-name.ll
target datalayout = "P11-p2(global):32:8-p8(stack):8:8-p11(code):8:8"
; CHECK: error: invalid symbolic addrspace 'global3'
More information about the llvm-commits
mailing list