[llvm] [DataLayout] Introduce DataLayout::getPointerAddressSize(AS) (PR #137412)

Alexander Richardson via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 30 11:57:54 PDT 2025


https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/137412

>From 110540d7e9104f14cc2ed41c48fb4b8f11c3c38e Mon Sep 17 00:00:00 2001
From: Alex Richardson <alexrichardson at google.com>
Date: Fri, 25 Apr 2025 15:55:57 -0700
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.6-beta.1
---
 llvm/docs/LangRef.rst             | 25 +++++++++++++++++--------
 llvm/include/llvm/IR/DataLayout.h | 29 ++++++++++++++++++++++++++---
 llvm/lib/IR/DataLayout.cpp        | 26 +++++++++++++++++++++-----
 3 files changed, 64 insertions(+), 16 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 33c85c7ba9d29..b89fb3ba56e8b 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3134,16 +3134,25 @@ as follows:
 ``A<address space>``
     Specifies the address space of objects created by '``alloca``'.
     Defaults to the default address space of 0.
-``p[n]:<size>:<abi>[:<pref>][:<idx>]``
+``p[n]:<size>:<abi>[:<pref>][:<idx>][:<addr>]``
     This specifies the *size* of a pointer and its ``<abi>`` and
     ``<pref>``\erred alignments for address space ``n``. ``<pref>`` is optional
-    and defaults to ``<abi>``. The fourth parameter ``<idx>`` is the size of the
-    index that used for address calculation, which must be less than or equal
-    to the pointer size. If not
-    specified, the default index size is equal to the pointer size. All sizes
-    are in bits. The address space, ``n``, is optional, and if not specified,
+    and defaults to ``<abi>``.
+    The fourth parameter ``<idx>`` is the size of the index that used for
+    address calculations such as :ref:`getelementptr <i_getelementptr>`.
+    It must be less than or equal to the pointer size. If not specified, the
+    default index size is equal to the pointer size.
+    The fifth parameter ``<addr>`` specifies the width of addresses in this
+    address space. If not specified, the default address size is equal to the
+    index size. The address size may be wider than either the index or pointer
+    size as it could be a value relative to a base address. For example AMDGPU
+    buffer fat pointers use a 48-bit address range, but only allow for 32 bits
+    of indexing.
+    All sizes are in bits.
+    The address space, ``n``, is optional, and if not specified,
     denotes the default address space 0. The value of ``n`` must be
     in the range [1,2^24).
+
 ``i<size>:<abi>[:<pref>]``
     This specifies the alignment for an integer type of a given bit
     ``<size>``. The value of ``<size>`` must be in the range [1,2^24).
@@ -12996,9 +13005,9 @@ This instruction requires several arguments:
    -  Caller and callee both have the calling convention ``fastcc`` or ``tailcc``.
    -  The call is in tail position (ret immediately follows call and ret
       uses value of call or is void).
-   -  Option ``-tailcallopt`` is enabled, ``llvm::GuaranteedTailCallOpt`` is 
+   -  Option ``-tailcallopt`` is enabled, ``llvm::GuaranteedTailCallOpt`` is
       ``true``, or the calling convention is ``tailcc``.
-   -  `Platform-specific constraints are met. 
+   -  `Platform-specific constraints are met.
       <CodeGenerator.html#tail-call-optimization>`_
 
 #. The optional ``notail`` marker indicates that the optimizers should not add
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 2ad080e6d0cd2..b6d788f4db66c 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -78,6 +78,7 @@ class DataLayout {
     Align ABIAlign;
     Align PrefAlign;
     uint32_t IndexBitWidth;
+    uint32_t AddressBitWidth;
     /// Pointers in this address space don't have a well-defined bitwise
     /// representation (e.g. may be relocated by a copying garbage collector).
     /// Additionally, they may also be non-integral (i.e. containing additional
@@ -148,7 +149,7 @@ class DataLayout {
   /// Sets or updates the specification for pointer in the given address space.
   void setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign,
                       Align PrefAlign, uint32_t IndexBitWidth,
-                      bool IsNonIntegral);
+                      uint32_t AddressBitWidth, bool IsNonIntegral);
 
   /// Internal helper to get alignment for integer of given bitwidth.
   Align getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const;
@@ -324,12 +325,26 @@ class DataLayout {
   /// the backends/clients are updated.
   Align getPointerPrefAlignment(unsigned AS = 0) const;
 
-  /// Layout pointer size in bytes, rounded up to a whole
-  /// number of bytes.
+  /// Layout pointer size in bytes, rounded up to a whole number of bytes. The
+  /// difference between this function and getPointerAddressSize() is this one
+  /// returns the size of the entire pointer type (this includes metadata bits
+  /// for fat pointers) and the latter only returns the number of address bits.
+  /// \sa DataLayout::getPointerAddressSizeInBits
   /// FIXME: The defaults need to be removed once all of
   /// the backends/clients are updated.
   unsigned getPointerSize(unsigned AS = 0) const;
 
+  /// Returns the integral size of a pointer in a given address space in bytes.
+  /// For targets that store bits in pointers that are not part of the address,
+  /// this returns the number of bits that can be manipulated using operations
+  /// that change the address (e.g. addition/subtraction).
+  /// For example, a 64-bit CHERI-enabled target has 128-bit pointers of which
+  /// only 64 are used to represent the address and the remaining ones are used
+  /// for metadata such as bounds and access permissions. In this case
+  /// getPointerSize() returns 16, but getPointerAddressSize() returns 8.
+  /// \sa DataLayout::getPointerSize
+  unsigned getPointerAddressSize(unsigned AS) const;
+
   // Index size in bytes used for address calculation,
   /// rounded up to a whole number of bytes.
   unsigned getIndexSize(unsigned AS) const;
@@ -365,6 +380,14 @@ class DataLayout {
     return getPointerSpec(AS).BitWidth;
   }
 
+  unsigned getPointerAddressSizeInBits(unsigned AS) const {
+    // Currently, this returns the same value as getIndexSizeInBits() as this
+    // is correct for all currently known LLVM targets. If another target is
+    // added that has pointer size != pointer range != GEP index width, we can
+    // add a new datalayout field for pointer integral range.
+    return getPointerSpec(AS).AddressBitWidth;
+  }
+
   /// Size in bits of index used for address calculation in getelementptr.
   unsigned getIndexSizeInBits(unsigned AS) const {
     return getPointerSpec(AS).IndexBitWidth;
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index 0cf0bfc9702d3..fe618939ead63 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -208,7 +208,7 @@ constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = {
 // Default pointer type specifications.
 constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = {
     // p0:64:64:64:64
-    {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false},
+    {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, 64, false},
 };
 
 DataLayout::DataLayout()
@@ -454,8 +454,17 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
     return createStringError(
         "index size cannot be larger than the pointer size");
 
+  unsigned AddressBitWidth = BitWidth;
+  if (Components.size() > 4)
+    if (Error Err = parseSize(Components[4], AddressBitWidth, "address size"))
+      return Err;
+
+  if (AddressBitWidth > BitWidth)
+    return createStringError(
+        "address size cannot be larger than the pointer size");
+
   setPointerSpec(AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth,
-                 false);
+                 AddressBitWidth, false);
   return Error::success();
 }
 
@@ -631,7 +640,7 @@ Error DataLayout::parseLayoutString(StringRef LayoutString) {
     // the spec for AS0, and we then update that to mark it non-integral.
     const PointerSpec &PS = getPointerSpec(AS);
     setPointerSpec(AS, PS.BitWidth, PS.ABIAlign, PS.PrefAlign, PS.IndexBitWidth,
-                   true);
+                   PS.AddressBitWidth, true);
   }
 
   return Error::success();
@@ -679,16 +688,19 @@ DataLayout::getPointerSpec(uint32_t AddrSpace) const {
 
 void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth,
                                 Align ABIAlign, Align PrefAlign,
-                                uint32_t IndexBitWidth, bool IsNonIntegral) {
+                                uint32_t IndexBitWidth,
+                                uint32_t AddressBitWidth, bool IsNonIntegral) {
   auto I = lower_bound(PointerSpecs, AddrSpace, LessPointerAddrSpace());
   if (I == PointerSpecs.end() || I->AddrSpace != AddrSpace) {
     PointerSpecs.insert(I, PointerSpec{AddrSpace, BitWidth, ABIAlign, PrefAlign,
-                                       IndexBitWidth, IsNonIntegral});
+                                       IndexBitWidth, AddressBitWidth,
+                                       IsNonIntegral});
   } else {
     I->BitWidth = BitWidth;
     I->ABIAlign = ABIAlign;
     I->PrefAlign = PrefAlign;
     I->IndexBitWidth = IndexBitWidth;
+    I->AddressBitWidth = AddressBitWidth;
     I->IsNonIntegral = IsNonIntegral;
   }
 }
@@ -728,6 +740,10 @@ const StructLayout *DataLayout::getStructLayout(StructType *Ty) const {
   return L;
 }
 
+unsigned DataLayout::getPointerAddressSize(unsigned AS) const {
+  return divideCeil(getPointerAddressSizeInBits(AS), 8);
+}
+
 Align DataLayout::getPointerABIAlignment(unsigned AS) const {
   return getPointerSpec(AS).ABIAlign;
 }

>From f8c4f106f1247f1c61928f813748d7b0e0232023 Mon Sep 17 00:00:00 2001
From: Alex Richardson <alexrichardson at google.com>
Date: Fri, 25 Apr 2025 16:22:46 -0700
Subject: [PATCH 2/2] add tests

Created using spr 1.3.6-beta.1
---
 llvm/docs/LangRef.rst                |  7 ++--
 llvm/include/llvm/IR/DataLayout.h    |  8 ++--
 llvm/lib/IR/DataLayout.cpp           | 12 +++---
 llvm/unittests/IR/DataLayoutTest.cpp | 61 ++++++++++++++++++++++++++--
 4 files changed, 72 insertions(+), 16 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index b89fb3ba56e8b..213f3f7e0e5f5 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3144,10 +3144,9 @@ as follows:
     default index size is equal to the pointer size.
     The fifth parameter ``<addr>`` specifies the width of addresses in this
     address space. If not specified, the default address size is equal to the
-    index size. The address size may be wider than either the index or pointer
-    size as it could be a value relative to a base address. For example AMDGPU
-    buffer fat pointers use a 48-bit address range, but only allow for 32 bits
-    of indexing.
+    index size. The address size may be wider than the index size as it could be
+    calculated relative to a base address. For example AMDGPU buffer fat
+    pointers use a 48-bit address range, but only allow for 32 bits of indexing.
     All sizes are in bits.
     The address space, ``n``, is optional, and if not specified,
     denotes the default address space 0. The value of ``n`` must be
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index b6d788f4db66c..3e7f3b79c0a8c 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -380,11 +380,11 @@ class DataLayout {
     return getPointerSpec(AS).BitWidth;
   }
 
+  /// The size of an address in for the given AS. This is usually the same size
+  /// as the index width but in same cases (e.g. AMDGPU buffer fat pointers with
+  /// 48-bit addresses and 32-bit offsets), the address size can be larger than
+  /// the valid range of indexing.
   unsigned getPointerAddressSizeInBits(unsigned AS) const {
-    // Currently, this returns the same value as getIndexSizeInBits() as this
-    // is correct for all currently known LLVM targets. If another target is
-    // added that has pointer size != pointer range != GEP index width, we can
-    // add a new datalayout field for pointer integral range.
     return getPointerSpec(AS).AddressBitWidth;
   }
 
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index fe618939ead63..d818ae8c42371 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -152,6 +152,7 @@ bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const {
   return AddrSpace == Other.AddrSpace && BitWidth == Other.BitWidth &&
          ABIAlign == Other.ABIAlign && PrefAlign == Other.PrefAlign &&
          IndexBitWidth == Other.IndexBitWidth &&
+         AddressBitWidth == Other.AddressBitWidth &&
          IsNonIntegral == Other.IsNonIntegral;
 }
 
@@ -414,8 +415,9 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
   assert(Spec.front() == 'p');
   Spec.drop_front().split(Components, ':');
 
-  if (Components.size() < 3 || Components.size() > 5)
-    return createSpecFormatError("p[<n>]:<size>:<abi>[:<pref>[:<idx>]]");
+  if (Components.size() < 3 || Components.size() > 6)
+    return createSpecFormatError(
+        "p[<n>]:<size>:<abi>[:<pref>[:<idx>[:<addr>]]]");
 
   // Address space. Optional, defaults to 0.
   unsigned AddrSpace = 0;
@@ -454,9 +456,9 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
     return createStringError(
         "index size cannot be larger than the pointer size");
 
-  unsigned AddressBitWidth = BitWidth;
-  if (Components.size() > 4)
-    if (Error Err = parseSize(Components[4], AddressBitWidth, "address size"))
+  unsigned AddressBitWidth = IndexBitWidth;
+  if (Components.size() > 5)
+    if (Error Err = parseSize(Components[5], AddressBitWidth, "address size"))
       return Err;
 
   if (AddressBitWidth > BitWidth)
diff --git a/llvm/unittests/IR/DataLayoutTest.cpp b/llvm/unittests/IR/DataLayoutTest.cpp
index 16a603ff6416f..02a87a2a77d80 100644
--- a/llvm/unittests/IR/DataLayoutTest.cpp
+++ b/llvm/unittests/IR/DataLayoutTest.cpp
@@ -312,12 +312,12 @@ TEST(DataLayout, ParsePointerSpec) {
         "p16777215:32:32:64:8", "p16777215:16777215:32768:32768:16777215"})
     EXPECT_THAT_EXPECTED(DataLayout::parse(Str), Succeeded());
 
-  for (StringRef Str :
-       {"p", "p0", "p:32", "p0:32", "p:32:32:32:32:32", "p0:32:32:32:32:32"})
+  for (StringRef Str : {"p", "p0", "p:32", "p0:32", "p:32:32:32:32:32:32",
+                        "p0:32:32:32:32:32:32"})
     EXPECT_THAT_EXPECTED(
         DataLayout::parse(Str),
         FailedWithMessage("malformed specification, must be of the form "
-                          "\"p[<n>]:<size>:<abi>[:<pref>[:<idx>]]\""));
+                          "\"p[<n>]:<size>:<abi>[:<pref>[:<idx>[:<addr>]]]\""));
 
   // address space
   for (StringRef Str : {"p0x0:32:32", "px:32:32:32", "p16777216:32:32:32:32"})
@@ -401,6 +401,27 @@ TEST(DataLayout, ParsePointerSpec) {
     EXPECT_THAT_EXPECTED(
         DataLayout::parse(Str),
         FailedWithMessage("index size cannot be larger than the pointer size"));
+
+  // address size
+  for (StringRef Str : {"p:64:32:32:64:", "p0:64:32:32:64:"})
+    EXPECT_THAT_EXPECTED(
+        DataLayout::parse(Str),
+        FailedWithMessage("address size component cannot be empty"));
+
+  // Note: in the future we might allow 0 for address size to indicate pointers
+  // that do not have a meaning full address (e.g. relocatable GC pointers).
+  for (StringRef Str :
+       {"p:32:32:32:32:0", "p0:32:32:32:32:0x20", "p42:32:32:32:32:16777216"})
+    EXPECT_THAT_EXPECTED(
+        DataLayout::parse(Str),
+        FailedWithMessage("address size must be a non-zero 24-bit integer"));
+
+  for (StringRef Str :
+       {"p:16:16:16:16:17", "p0:32:64:64:32:64", "p42:16:64:64:16:32"})
+    EXPECT_THAT_EXPECTED(
+        DataLayout::parse(Str),
+        FailedWithMessage(
+            "address size cannot be larger than the pointer size"));
 }
 
 TEST(DataLayoutTest, ParseNativeIntegersSpec) {
@@ -523,6 +544,40 @@ TEST(DataLayout, GetIndexSize) {
   }
 }
 
+TEST(DataLayout, GetAddressSizeInBits) {
+  // Address size defaults to index size
+  std::tuple<StringRef, unsigned, unsigned, unsigned> Cases[] = {
+      {"", 64, 64, 64},
+      {"p:16:32", 16, 16, 16},
+      {"p0:32:64", 32, 32, 32},
+      {"p1:16:32:32:10", 64, 10, 64},
+      {"p1:31:32:64:10:20-p2:17:16:16:16:15", 64, 20, 15},
+  };
+  for (auto [Layout, V0, V1, V2] : Cases) {
+    DataLayout DL = cantFail(DataLayout::parse(Layout));
+    EXPECT_EQ(DL.getPointerAddressSizeInBits(0), V0) << Layout;
+    EXPECT_EQ(DL.getPointerAddressSizeInBits(1), V1) << Layout;
+    EXPECT_EQ(DL.getPointerAddressSizeInBits(2), V2) << Layout;
+  }
+}
+
+TEST(DataLayout, GetAddressSize) {
+  // Address size defaults to index size
+  std::tuple<StringRef, unsigned, unsigned, unsigned> Cases[] = {
+      {"", 8, 8, 8},
+      {"p:16:32", 2, 2, 2},
+      {"p0:27:64", 4, 4, 4},
+      {"p1:19:32:64:19:5", 8, 1, 8},
+      {"p1:33:32:64:33:23-p2:21:8:16:21:13", 8, 3, 2},
+  };
+  for (auto [Layout, V0, V1, V2] : Cases) {
+    DataLayout DL = cantFail(DataLayout::parse(Layout));
+    EXPECT_EQ(DL.getPointerAddressSize(0), V0) << Layout;
+    EXPECT_EQ(DL.getPointerAddressSize(1), V1) << Layout;
+    EXPECT_EQ(DL.getPointerAddressSize(2), V2) << Layout;
+  }
+}
+
 TEST(DataLayout, GetPointerABIAlignment) {
   std::tuple<StringRef, unsigned, unsigned, unsigned> Cases[] = {
       {"", 8, 8, 8},



More information about the llvm-commits mailing list