[llvm] [DataLayout] Add byte specification (PR #106536)

Sergei Barannikov via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 26 17:56:47 PDT 2024


https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/106536

>From 31ee84fefa96eeb54c72d74d359e6a91726b3417 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Tue, 20 Aug 2024 21:36:15 +0300
Subject: [PATCH] [DataLayout] Add byte specification

This patch adds byte specification to data layout string.
The specification is `b:<size>`, where `<size>` is the size of a byte
in bits (later referred to as "byte width").

Limitations:
* The only values allowed for byte width are 8, 16, and 32.
16-bit bytes are popular, and my downstream target has 32-bit bytes.
These are the widths I'm going to add tests for in follow-up patches,
so this restriction only exists because other widths are untested.
* It is assumed that bytes are the same in all address spaces.
Supporting different byte widths in different address spaces would
require adding an address space argument to all DataLayout methods
that query ABI / preferred alignments because they return *byte*
alignments, and those will be different for different address spaces.
This is too much effort, but it can be done in the future if the need
arises, the specification reserves address space number before ':'.
---
 llvm/include/llvm/IR/DataLayout.h | 24 ++++++---
 llvm/lib/IR/DataLayout.cpp        | 88 ++++++++++++++++++++++++-------
 2 files changed, 85 insertions(+), 27 deletions(-)

diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 93bd519f5727d8..9be2be9ef0b5ee 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -95,6 +95,9 @@ class DataLayout {
 private:
   bool BigEndian = false;
 
+  /// The size of a byte in bits.
+  unsigned ByteWidth = 8;
+
   unsigned AllocaAddrSpace = 0;
   unsigned ProgramAddrSpace = 0;
   unsigned DefaultGlobalsAddrSpace = 0;
@@ -156,6 +159,9 @@ class DataLayout {
   /// Internal helper method that returns requested alignment for type.
   Align getAlignment(Type *Ty, bool abi_or_pref) const;
 
+  /// Attempts to parse a byte specification ('b').
+  Error parseByteSpec(StringRef Spec);
+
   /// Attempts to parse primitive specification ('i', 'f', or 'v').
   Error parsePrimitiveSpec(StringRef Spec);
 
@@ -197,6 +203,9 @@ class DataLayout {
   bool isLittleEndian() const { return !BigEndian; }
   bool isBigEndian() const { return BigEndian; }
 
+  /// Returns the size of a byte, in bits.
+  unsigned getByteWidth() const { return ByteWidth; }
+
   /// Returns the string representation of the DataLayout.
   ///
   /// This representation is in the same format accepted by the string
@@ -370,7 +379,7 @@ class DataLayout {
 
   /// Returns the maximum index size over all address spaces.
   unsigned getMaxIndexSizeInBits() const {
-    return getMaxIndexSize() * 8;
+    return getMaxIndexSize() * ByteWidth;
   }
 
   /// Size in bits of index used for address calculation in getelementptr.
@@ -390,7 +399,7 @@ class DataLayout {
   unsigned getIndexTypeSizeInBits(Type *Ty) const;
 
   unsigned getPointerTypeSize(Type *Ty) const {
-    return getPointerTypeSizeInBits(Ty) / 8;
+    return getPointerTypeSizeInBits(Ty) / ByteWidth;
   }
 
   /// Size examples:
@@ -428,7 +437,7 @@ class DataLayout {
   /// For example, returns 5 for i36 and 10 for x86_fp80.
   TypeSize getTypeStoreSize(Type *Ty) const {
     TypeSize StoreSizeInBits = getTypeStoreSizeInBits(Ty);
-    return {StoreSizeInBits.getKnownMinValue() / 8,
+    return {StoreSizeInBits.getKnownMinValue() / ByteWidth,
             StoreSizeInBits.isScalable()};
   }
 
@@ -442,7 +451,7 @@ class DataLayout {
   TypeSize getTypeStoreSizeInBits(Type *Ty) const {
     TypeSize BaseSize = getTypeSizeInBits(Ty);
     uint64_t AlignedSizeInBits =
-        alignToPowerOf2(BaseSize.getKnownMinValue(), 8);
+        alignToPowerOf2(BaseSize.getKnownMinValue(), ByteWidth);
     return {AlignedSizeInBits, BaseSize.isScalable()};
   }
 
@@ -476,7 +485,7 @@ class DataLayout {
   /// This is the amount that alloca reserves for this type. For example,
   /// returns 96 or 128 for x86_fp80, depending on alignment.
   TypeSize getTypeAllocSizeInBits(Type *Ty) const {
-    return 8 * getTypeAllocSize(Ty);
+    return getTypeAllocSize(Ty) * ByteWidth;
   }
 
   /// Returns the minimum ABI-required alignment for the specified type.
@@ -575,13 +584,14 @@ inline LLVMTargetDataRef wrap(const DataLayout *P) {
 class StructLayout final : public TrailingObjects<StructLayout, TypeSize> {
   TypeSize StructSize;
   Align StructAlignment;
+  unsigned ByteWidth;
   unsigned IsPadded : 1;
   unsigned NumElements : 31;
 
 public:
   TypeSize getSizeInBytes() const { return StructSize; }
 
-  TypeSize getSizeInBits() const { return 8 * StructSize; }
+  TypeSize getSizeInBits() const { return StructSize * ByteWidth; }
 
   Align getAlignment() const { return StructAlignment; }
 
@@ -607,7 +617,7 @@ class StructLayout final : public TrailingObjects<StructLayout, TypeSize> {
   }
 
   TypeSize getElementOffsetInBits(unsigned Idx) const {
-    return getElementOffset(Idx) * 8;
+    return getElementOffset(Idx) * ByteWidth;
   }
 
 private:
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index a4af0ead07cf61..ae039833d13c8d 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -46,7 +46,7 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 StructLayout::StructLayout(StructType *ST, const DataLayout &DL)
-    : StructSize(TypeSize::getFixed(0)) {
+    : StructSize(TypeSize::getFixed(0)), ByteWidth(DL.getByteWidth()) {
   assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
   IsPadded = false;
   NumElements = ST->getNumElements();
@@ -227,6 +227,7 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) {
   LayoutMap = nullptr;
   StringRepresentation = Other.StringRepresentation;
   BigEndian = Other.BigEndian;
+  ByteWidth = Other.ByteWidth;
   AllocaAddrSpace = Other.AllocaAddrSpace;
   ProgramAddrSpace = Other.ProgramAddrSpace;
   DefaultGlobalsAddrSpace = Other.DefaultGlobalsAddrSpace;
@@ -246,7 +247,7 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) {
 
 bool DataLayout::operator==(const DataLayout &Other) const {
   // NOTE: StringRepresentation might differ, it is not canonicalized.
-  return BigEndian == Other.BigEndian &&
+  return BigEndian == Other.BigEndian && ByteWidth == Other.ByteWidth &&
          AllocaAddrSpace == Other.AllocaAddrSpace &&
          ProgramAddrSpace == Other.ProgramAddrSpace &&
          DefaultGlobalsAddrSpace == Other.DefaultGlobalsAddrSpace &&
@@ -308,7 +309,7 @@ static Error parseSize(StringRef Str, unsigned &BitWidth,
 /// - the value is not a multiple of the byte width;
 /// - the value converted to byte amount is not not a power of two.
 static Error parseAlignment(StringRef Str, Align &Alignment, StringRef Name,
-                            bool AllowZero = false) {
+                            unsigned ByteWidth, bool AllowZero = false) {
   if (Str.empty())
     return createStringError(Name + " alignment component cannot be empty");
 
@@ -323,7 +324,6 @@ static Error parseAlignment(StringRef Str, Align &Alignment, StringRef Name,
     return Error::success();
   }
 
-  constexpr unsigned ByteWidth = 8;
   if (Value % ByteWidth || !isPowerOf2_32(Value / ByteWidth))
     return createStringError(
         Name + " alignment must be a power of two times the byte width");
@@ -332,6 +332,36 @@ static Error parseAlignment(StringRef Str, Align &Alignment, StringRef Name,
   return Error::success();
 }
 
+Error DataLayout::parseByteSpec(StringRef Spec) {
+  // b:<size>
+  assert(Spec.front() == 'b');
+  StringRef Rest = Spec.drop_front();
+  if (!Rest.consume_front(":") || Rest.empty())
+    return createSpecFormatError("b:<size>");
+
+  if (Error Err = parseSize(Rest, ByteWidth))
+    return Err;
+
+  if (ByteWidth != 8 && ByteWidth != 16 && ByteWidth != 32)
+    return createStringError("unsupported byte width");
+
+  // The default specs are for targets with 8-bit bytes. If the explicitly
+  // specified byte width is different from 8, reset the default values.
+  if (ByteWidth != 8) {
+    // Byte-sized integers must be one byte aligned. It's hard to guess
+    // reasonable defaults for other types, so just don't provide them
+    // and expect the target to do it.
+    IntSpecs.assign({PrimitiveSpec{ByteWidth, Align(1), Align(1)}});
+    FloatSpecs.clear();
+    VectorSpecs.clear();
+    PointerSpecs.clear();
+    StructABIAlignment = Align(1);
+    StructPrefAlignment = Align(1);
+  }
+
+  return Error::success();
+}
+
 Error DataLayout::parsePrimitiveSpec(StringRef Spec) {
   // [ifv]<size>:<abi>[:<pref>]
   SmallVector<StringRef, 3> Components;
@@ -349,7 +379,7 @@ Error DataLayout::parsePrimitiveSpec(StringRef Spec) {
 
   // ABI alignment.
   Align ABIAlign;
-  if (Error Err = parseAlignment(Components[1], ABIAlign, "ABI"))
+  if (Error Err = parseAlignment(Components[1], ABIAlign, "ABI", ByteWidth))
     return Err;
 
   if (Specifier == 'i' && BitWidth == 8 && ABIAlign != 1)
@@ -358,7 +388,8 @@ Error DataLayout::parsePrimitiveSpec(StringRef Spec) {
   // Preferred alignment. Optional, defaults to the ABI alignment.
   Align PrefAlign = ABIAlign;
   if (Components.size() > 2)
-    if (Error Err = parseAlignment(Components[2], PrefAlign, "preferred"))
+    if (Error Err =
+            parseAlignment(Components[2], PrefAlign, "preferred", ByteWidth))
       return Err;
 
   if (PrefAlign < ABIAlign)
@@ -389,14 +420,15 @@ Error DataLayout::parseAggregateSpec(StringRef Spec) {
 
   // ABI alignment. Required. Can be zero, meaning use one byte alignment.
   Align ABIAlign;
-  if (Error Err =
-          parseAlignment(Components[1], ABIAlign, "ABI", /*AllowZero=*/true))
+  if (Error Err = parseAlignment(Components[1], ABIAlign, "ABI", ByteWidth,
+                                 /*AllowZero=*/true))
     return Err;
 
   // Preferred alignment. Optional, defaults to the ABI alignment.
   Align PrefAlign = ABIAlign;
   if (Components.size() > 2)
-    if (Error Err = parseAlignment(Components[2], PrefAlign, "preferred"))
+    if (Error Err =
+            parseAlignment(Components[2], PrefAlign, "preferred", ByteWidth))
       return Err;
 
   if (PrefAlign < ABIAlign)
@@ -430,14 +462,15 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
 
   // ABI alignment. Required, cannot be zero.
   Align ABIAlign;
-  if (Error Err = parseAlignment(Components[2], ABIAlign, "ABI"))
+  if (Error Err = parseAlignment(Components[2], ABIAlign, "ABI", ByteWidth))
     return Err;
 
   // Preferred alignment. Optional, defaults to the ABI alignment.
   // Cannot be zero.
   Align PrefAlign = ABIAlign;
   if (Components.size() > 3)
-    if (Error Err = parseAlignment(Components[3], PrefAlign, "preferred"))
+    if (Error Err =
+            parseAlignment(Components[3], PrefAlign, "preferred", ByteWidth))
       return Err;
 
   if (PrefAlign < ABIAlign)
@@ -521,7 +554,7 @@ Error DataLayout::parseSpecification(
     if (Rest.empty())
       return createSpecFormatError("S<size>");
     Align Alignment;
-    if (Error Err = parseAlignment(Rest, Alignment, "stack natural"))
+    if (Error Err = parseAlignment(Rest, Alignment, "stack natural", ByteWidth))
       return Err;
     StackNaturalAlign = Alignment;
     break;
@@ -544,7 +577,7 @@ Error DataLayout::parseSpecification(
                                Twine(Type) + "'");
     }
     Align Alignment;
-    if (Error Err = parseAlignment(Rest, Alignment, "ABI"))
+    if (Error Err = parseAlignment(Rest, Alignment, "ABI", ByteWidth))
       return Err;
     FunctionPtrAlign = Alignment;
     break;
@@ -616,10 +649,25 @@ Error DataLayout::parseLayoutString(StringRef LayoutString) {
 
   // Split the data layout string into specifications separated by '-' and
   // parse each specification individually, updating internal data structures.
-  SmallVector<unsigned, 8> NonIntegralAddressSpaces;
-  for (StringRef Spec : split(LayoutString, '-')) {
+  SmallVector<StringRef, 16> Specs;
+  LayoutString.split(Specs, '-');
+
+  // On the first pass, diagnose empty specifications and parse the byte
+  // specification if there is one. The latter is necessary because other
+  // specifications may need the byte width for validation and to convert
+  // bit alignments to byte alignments.
+  for (StringRef Spec : Specs) {
     if (Spec.empty())
       return createStringError("empty specification is not allowed");
+    if (Spec.front() == 'b')
+      if (Error Err = parseByteSpec(Spec))
+        return Err;
+  }
+
+  SmallVector<unsigned, 8> NonIntegralAddressSpaces;
+  for (StringRef Spec : split(LayoutString, '-')) {
+    if (Spec.front() == 'b')
+      continue;
     if (Error Err = parseSpecification(Spec, NonIntegralAddressSpaces))
       return Err;
   }
@@ -667,6 +715,7 @@ void DataLayout::setPrimitiveSpec(char Specifier, uint32_t BitWidth,
 
 const DataLayout::PointerSpec &
 DataLayout::getPointerSpec(uint32_t AddrSpace) const {
+  assert(!PointerSpecs.empty() && "No pointer specs are defined");
   if (AddrSpace != 0) {
     auto I = lower_bound(PointerSpecs, AddrSpace, LessPointerAddrSpace());
     if (I != PointerSpecs.end() && I->AddrSpace == AddrSpace)
@@ -737,14 +786,13 @@ Align DataLayout::getPointerPrefAlignment(unsigned AS) const {
 }
 
 unsigned DataLayout::getPointerSize(unsigned AS) const {
-  return divideCeil(getPointerSpec(AS).BitWidth, 8);
+  return divideCeil(getPointerSpec(AS).BitWidth, ByteWidth);
 }
 
 unsigned DataLayout::getMaxIndexSize() const {
   unsigned MaxIndexSize = 0;
   for (const PointerSpec &Spec : PointerSpecs)
-    MaxIndexSize =
-        std::max(MaxIndexSize, (unsigned)divideCeil(Spec.BitWidth, 8));
+    MaxIndexSize = std::max(MaxIndexSize, divideCeil(Spec.BitWidth, ByteWidth));
 
   return MaxIndexSize;
 }
@@ -757,7 +805,7 @@ unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const {
 }
 
 unsigned DataLayout::getIndexSize(unsigned AS) const {
-  return divideCeil(getPointerSpec(AS).IndexBitWidth, 8);
+  return divideCeil(getPointerSpec(AS).IndexBitWidth, ByteWidth);
 }
 
 unsigned DataLayout::getIndexTypeSizeInBits(Type *Ty) const {
@@ -821,7 +869,7 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
     // approximation of reality, and if the user wanted something less
     // less conservative, they should have specified it explicitly in the data
     // layout.
-    return Align(PowerOf2Ceil(BitWidth / 8));
+    return Align(PowerOf2Ceil(BitWidth / ByteWidth));
   }
   case Type::FixedVectorTyID:
   case Type::ScalableVectorTyID: {



More information about the llvm-commits mailing list