[llvm] [DataLayout] Introduce DataLayout::getPointerAddressSize(AS) (PR #137412)
Alexander Richardson via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 29 23:52:07 PDT 2025
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/137412
>From 110540d7e9104f14cc2ed41c48fb4b8f11c3c38e Mon Sep 17 00:00:00 2001
From: Alex Richardson <alexrichardson at google.com>
Date: Fri, 25 Apr 2025 15:55:57 -0700
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.6-beta.1
---
llvm/docs/LangRef.rst | 25 +++++++++++++++++--------
llvm/include/llvm/IR/DataLayout.h | 29 ++++++++++++++++++++++++++---
llvm/lib/IR/DataLayout.cpp | 26 +++++++++++++++++++++-----
3 files changed, 64 insertions(+), 16 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 33c85c7ba9d29..b89fb3ba56e8b 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3134,16 +3134,25 @@ as follows:
``A<address space>``
Specifies the address space of objects created by '``alloca``'.
Defaults to the default address space of 0.
-``p[n]:<size>:<abi>[:<pref>][:<idx>]``
+``p[n]:<size>:<abi>[:<pref>][:<idx>][:<addr>]``
This specifies the *size* of a pointer and its ``<abi>`` and
``<pref>``\erred alignments for address space ``n``. ``<pref>`` is optional
- and defaults to ``<abi>``. The fourth parameter ``<idx>`` is the size of the
- index that used for address calculation, which must be less than or equal
- to the pointer size. If not
- specified, the default index size is equal to the pointer size. All sizes
- are in bits. The address space, ``n``, is optional, and if not specified,
+ and defaults to ``<abi>``.
+ The fourth parameter ``<idx>`` is the size of the index that used for
+ address calculations such as :ref:`getelementptr <i_getelementptr>`.
+ It must be less than or equal to the pointer size. If not specified, the
+ default index size is equal to the pointer size.
+ The fifth parameter ``<addr>`` specifies the width of addresses in this
+ address space. If not specified, the default address size is equal to the
+ index size. The address size may be wider than either the index or pointer
+ size as it could be a value relative to a base address. For example AMDGPU
+ buffer fat pointers use a 48-bit address range, but only allow for 32 bits
+ of indexing.
+ All sizes are in bits.
+ The address space, ``n``, is optional, and if not specified,
denotes the default address space 0. The value of ``n`` must be
in the range [1,2^24).
+
``i<size>:<abi>[:<pref>]``
This specifies the alignment for an integer type of a given bit
``<size>``. The value of ``<size>`` must be in the range [1,2^24).
@@ -12996,9 +13005,9 @@ This instruction requires several arguments:
- Caller and callee both have the calling convention ``fastcc`` or ``tailcc``.
- The call is in tail position (ret immediately follows call and ret
uses value of call or is void).
- - Option ``-tailcallopt`` is enabled, ``llvm::GuaranteedTailCallOpt`` is
+ - Option ``-tailcallopt`` is enabled, ``llvm::GuaranteedTailCallOpt`` is
``true``, or the calling convention is ``tailcc``.
- - `Platform-specific constraints are met.
+ - `Platform-specific constraints are met.
<CodeGenerator.html#tail-call-optimization>`_
#. The optional ``notail`` marker indicates that the optimizers should not add
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 2ad080e6d0cd2..b6d788f4db66c 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -78,6 +78,7 @@ class DataLayout {
Align ABIAlign;
Align PrefAlign;
uint32_t IndexBitWidth;
+ uint32_t AddressBitWidth;
/// Pointers in this address space don't have a well-defined bitwise
/// representation (e.g. may be relocated by a copying garbage collector).
/// Additionally, they may also be non-integral (i.e. containing additional
@@ -148,7 +149,7 @@ class DataLayout {
/// Sets or updates the specification for pointer in the given address space.
void setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign,
Align PrefAlign, uint32_t IndexBitWidth,
- bool IsNonIntegral);
+ uint32_t AddressBitWidth, bool IsNonIntegral);
/// Internal helper to get alignment for integer of given bitwidth.
Align getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const;
@@ -324,12 +325,26 @@ class DataLayout {
/// the backends/clients are updated.
Align getPointerPrefAlignment(unsigned AS = 0) const;
- /// Layout pointer size in bytes, rounded up to a whole
- /// number of bytes.
+ /// Layout pointer size in bytes, rounded up to a whole number of bytes. The
+ /// difference between this function and getPointerAddressSize() is this one
+ /// returns the size of the entire pointer type (this includes metadata bits
+ /// for fat pointers) and the latter only returns the number of address bits.
+ /// \sa DataLayout::getPointerAddressSizeInBits
/// FIXME: The defaults need to be removed once all of
/// the backends/clients are updated.
unsigned getPointerSize(unsigned AS = 0) const;
+ /// Returns the integral size of a pointer in a given address space in bytes.
+ /// For targets that store bits in pointers that are not part of the address,
+ /// this returns the number of bits that can be manipulated using operations
+ /// that change the address (e.g. addition/subtraction).
+ /// For example, a 64-bit CHERI-enabled target has 128-bit pointers of which
+ /// only 64 are used to represent the address and the remaining ones are used
+ /// for metadata such as bounds and access permissions. In this case
+ /// getPointerSize() returns 16, but getPointerAddressSize() returns 8.
+ /// \sa DataLayout::getPointerSize
+ unsigned getPointerAddressSize(unsigned AS) const;
+
// Index size in bytes used for address calculation,
/// rounded up to a whole number of bytes.
unsigned getIndexSize(unsigned AS) const;
@@ -365,6 +380,14 @@ class DataLayout {
return getPointerSpec(AS).BitWidth;
}
+ unsigned getPointerAddressSizeInBits(unsigned AS) const {
+ // Currently, this returns the same value as getIndexSizeInBits() as this
+ // is correct for all currently known LLVM targets. If another target is
+ // added that has pointer size != pointer range != GEP index width, we can
+ // add a new datalayout field for pointer integral range.
+ return getPointerSpec(AS).AddressBitWidth;
+ }
+
/// Size in bits of index used for address calculation in getelementptr.
unsigned getIndexSizeInBits(unsigned AS) const {
return getPointerSpec(AS).IndexBitWidth;
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index 0cf0bfc9702d3..fe618939ead63 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -208,7 +208,7 @@ constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = {
// Default pointer type specifications.
constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = {
// p0:64:64:64:64
- {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false},
+ {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, 64, false},
};
DataLayout::DataLayout()
@@ -454,8 +454,17 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
return createStringError(
"index size cannot be larger than the pointer size");
+ unsigned AddressBitWidth = BitWidth;
+ if (Components.size() > 4)
+ if (Error Err = parseSize(Components[4], AddressBitWidth, "address size"))
+ return Err;
+
+ if (AddressBitWidth > BitWidth)
+ return createStringError(
+ "address size cannot be larger than the pointer size");
+
setPointerSpec(AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth,
- false);
+ AddressBitWidth, false);
return Error::success();
}
@@ -631,7 +640,7 @@ Error DataLayout::parseLayoutString(StringRef LayoutString) {
// the spec for AS0, and we then update that to mark it non-integral.
const PointerSpec &PS = getPointerSpec(AS);
setPointerSpec(AS, PS.BitWidth, PS.ABIAlign, PS.PrefAlign, PS.IndexBitWidth,
- true);
+ PS.AddressBitWidth, true);
}
return Error::success();
@@ -679,16 +688,19 @@ DataLayout::getPointerSpec(uint32_t AddrSpace) const {
void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth,
Align ABIAlign, Align PrefAlign,
- uint32_t IndexBitWidth, bool IsNonIntegral) {
+ uint32_t IndexBitWidth,
+ uint32_t AddressBitWidth, bool IsNonIntegral) {
auto I = lower_bound(PointerSpecs, AddrSpace, LessPointerAddrSpace());
if (I == PointerSpecs.end() || I->AddrSpace != AddrSpace) {
PointerSpecs.insert(I, PointerSpec{AddrSpace, BitWidth, ABIAlign, PrefAlign,
- IndexBitWidth, IsNonIntegral});
+ IndexBitWidth, AddressBitWidth,
+ IsNonIntegral});
} else {
I->BitWidth = BitWidth;
I->ABIAlign = ABIAlign;
I->PrefAlign = PrefAlign;
I->IndexBitWidth = IndexBitWidth;
+ I->AddressBitWidth = AddressBitWidth;
I->IsNonIntegral = IsNonIntegral;
}
}
@@ -728,6 +740,10 @@ const StructLayout *DataLayout::getStructLayout(StructType *Ty) const {
return L;
}
+unsigned DataLayout::getPointerAddressSize(unsigned AS) const {
+ return divideCeil(getPointerAddressSizeInBits(AS), 8);
+}
+
Align DataLayout::getPointerABIAlignment(unsigned AS) const {
return getPointerSpec(AS).ABIAlign;
}
>From f8c4f106f1247f1c61928f813748d7b0e0232023 Mon Sep 17 00:00:00 2001
From: Alex Richardson <alexrichardson at google.com>
Date: Fri, 25 Apr 2025 16:22:46 -0700
Subject: [PATCH 2/2] add tests
Created using spr 1.3.6-beta.1
---
llvm/docs/LangRef.rst | 7 ++--
llvm/include/llvm/IR/DataLayout.h | 8 ++--
llvm/lib/IR/DataLayout.cpp | 12 +++---
llvm/unittests/IR/DataLayoutTest.cpp | 61 ++++++++++++++++++++++++++--
4 files changed, 72 insertions(+), 16 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index b89fb3ba56e8b..213f3f7e0e5f5 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3144,10 +3144,9 @@ as follows:
default index size is equal to the pointer size.
The fifth parameter ``<addr>`` specifies the width of addresses in this
address space. If not specified, the default address size is equal to the
- index size. The address size may be wider than either the index or pointer
- size as it could be a value relative to a base address. For example AMDGPU
- buffer fat pointers use a 48-bit address range, but only allow for 32 bits
- of indexing.
+ index size. The address size may be wider than the index size as it could be
+ calculated relative to a base address. For example AMDGPU buffer fat
+ pointers use a 48-bit address range, but only allow for 32 bits of indexing.
All sizes are in bits.
The address space, ``n``, is optional, and if not specified,
denotes the default address space 0. The value of ``n`` must be
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index b6d788f4db66c..3e7f3b79c0a8c 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -380,11 +380,11 @@ class DataLayout {
return getPointerSpec(AS).BitWidth;
}
+ /// The size of an address in for the given AS. This is usually the same size
+ /// as the index width but in same cases (e.g. AMDGPU buffer fat pointers with
+ /// 48-bit addresses and 32-bit offsets), the address size can be larger than
+ /// the valid range of indexing.
unsigned getPointerAddressSizeInBits(unsigned AS) const {
- // Currently, this returns the same value as getIndexSizeInBits() as this
- // is correct for all currently known LLVM targets. If another target is
- // added that has pointer size != pointer range != GEP index width, we can
- // add a new datalayout field for pointer integral range.
return getPointerSpec(AS).AddressBitWidth;
}
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index fe618939ead63..d818ae8c42371 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -152,6 +152,7 @@ bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const {
return AddrSpace == Other.AddrSpace && BitWidth == Other.BitWidth &&
ABIAlign == Other.ABIAlign && PrefAlign == Other.PrefAlign &&
IndexBitWidth == Other.IndexBitWidth &&
+ AddressBitWidth == Other.AddressBitWidth &&
IsNonIntegral == Other.IsNonIntegral;
}
@@ -414,8 +415,9 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
assert(Spec.front() == 'p');
Spec.drop_front().split(Components, ':');
- if (Components.size() < 3 || Components.size() > 5)
- return createSpecFormatError("p[<n>]:<size>:<abi>[:<pref>[:<idx>]]");
+ if (Components.size() < 3 || Components.size() > 6)
+ return createSpecFormatError(
+ "p[<n>]:<size>:<abi>[:<pref>[:<idx>[:<addr>]]]");
// Address space. Optional, defaults to 0.
unsigned AddrSpace = 0;
@@ -454,9 +456,9 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
return createStringError(
"index size cannot be larger than the pointer size");
- unsigned AddressBitWidth = BitWidth;
- if (Components.size() > 4)
- if (Error Err = parseSize(Components[4], AddressBitWidth, "address size"))
+ unsigned AddressBitWidth = IndexBitWidth;
+ if (Components.size() > 5)
+ if (Error Err = parseSize(Components[5], AddressBitWidth, "address size"))
return Err;
if (AddressBitWidth > BitWidth)
diff --git a/llvm/unittests/IR/DataLayoutTest.cpp b/llvm/unittests/IR/DataLayoutTest.cpp
index 16a603ff6416f..02a87a2a77d80 100644
--- a/llvm/unittests/IR/DataLayoutTest.cpp
+++ b/llvm/unittests/IR/DataLayoutTest.cpp
@@ -312,12 +312,12 @@ TEST(DataLayout, ParsePointerSpec) {
"p16777215:32:32:64:8", "p16777215:16777215:32768:32768:16777215"})
EXPECT_THAT_EXPECTED(DataLayout::parse(Str), Succeeded());
- for (StringRef Str :
- {"p", "p0", "p:32", "p0:32", "p:32:32:32:32:32", "p0:32:32:32:32:32"})
+ for (StringRef Str : {"p", "p0", "p:32", "p0:32", "p:32:32:32:32:32:32",
+ "p0:32:32:32:32:32:32"})
EXPECT_THAT_EXPECTED(
DataLayout::parse(Str),
FailedWithMessage("malformed specification, must be of the form "
- "\"p[<n>]:<size>:<abi>[:<pref>[:<idx>]]\""));
+ "\"p[<n>]:<size>:<abi>[:<pref>[:<idx>[:<addr>]]]\""));
// address space
for (StringRef Str : {"p0x0:32:32", "px:32:32:32", "p16777216:32:32:32:32"})
@@ -401,6 +401,27 @@ TEST(DataLayout, ParsePointerSpec) {
EXPECT_THAT_EXPECTED(
DataLayout::parse(Str),
FailedWithMessage("index size cannot be larger than the pointer size"));
+
+ // address size
+ for (StringRef Str : {"p:64:32:32:64:", "p0:64:32:32:64:"})
+ EXPECT_THAT_EXPECTED(
+ DataLayout::parse(Str),
+ FailedWithMessage("address size component cannot be empty"));
+
+ // Note: in the future we might allow 0 for address size to indicate pointers
+ // that do not have a meaning full address (e.g. relocatable GC pointers).
+ for (StringRef Str :
+ {"p:32:32:32:32:0", "p0:32:32:32:32:0x20", "p42:32:32:32:32:16777216"})
+ EXPECT_THAT_EXPECTED(
+ DataLayout::parse(Str),
+ FailedWithMessage("address size must be a non-zero 24-bit integer"));
+
+ for (StringRef Str :
+ {"p:16:16:16:16:17", "p0:32:64:64:32:64", "p42:16:64:64:16:32"})
+ EXPECT_THAT_EXPECTED(
+ DataLayout::parse(Str),
+ FailedWithMessage(
+ "address size cannot be larger than the pointer size"));
}
TEST(DataLayoutTest, ParseNativeIntegersSpec) {
@@ -523,6 +544,40 @@ TEST(DataLayout, GetIndexSize) {
}
}
+TEST(DataLayout, GetAddressSizeInBits) {
+ // Address size defaults to index size
+ std::tuple<StringRef, unsigned, unsigned, unsigned> Cases[] = {
+ {"", 64, 64, 64},
+ {"p:16:32", 16, 16, 16},
+ {"p0:32:64", 32, 32, 32},
+ {"p1:16:32:32:10", 64, 10, 64},
+ {"p1:31:32:64:10:20-p2:17:16:16:16:15", 64, 20, 15},
+ };
+ for (auto [Layout, V0, V1, V2] : Cases) {
+ DataLayout DL = cantFail(DataLayout::parse(Layout));
+ EXPECT_EQ(DL.getPointerAddressSizeInBits(0), V0) << Layout;
+ EXPECT_EQ(DL.getPointerAddressSizeInBits(1), V1) << Layout;
+ EXPECT_EQ(DL.getPointerAddressSizeInBits(2), V2) << Layout;
+ }
+}
+
+TEST(DataLayout, GetAddressSize) {
+ // Address size defaults to index size
+ std::tuple<StringRef, unsigned, unsigned, unsigned> Cases[] = {
+ {"", 8, 8, 8},
+ {"p:16:32", 2, 2, 2},
+ {"p0:27:64", 4, 4, 4},
+ {"p1:19:32:64:19:5", 8, 1, 8},
+ {"p1:33:32:64:33:23-p2:21:8:16:21:13", 8, 3, 2},
+ };
+ for (auto [Layout, V0, V1, V2] : Cases) {
+ DataLayout DL = cantFail(DataLayout::parse(Layout));
+ EXPECT_EQ(DL.getPointerAddressSize(0), V0) << Layout;
+ EXPECT_EQ(DL.getPointerAddressSize(1), V1) << Layout;
+ EXPECT_EQ(DL.getPointerAddressSize(2), V2) << Layout;
+ }
+}
+
TEST(DataLayout, GetPointerABIAlignment) {
std::tuple<StringRef, unsigned, unsigned, unsigned> Cases[] = {
{"", 8, 8, 8},
More information about the llvm-commits
mailing list