[llvm] [llvm] Move data layout string computation to TargetParser (PR #157612)

Mon Sep 8 22:48:08 PDT 2025

================
@@ -0,0 +1,631 @@
+//===--- TargetDataLayout.cpp - Map Triple to LLVM data layout string -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/TargetParser/ARMTargetParser.h"
+#include "llvm/TargetParser/Triple.h"
+#include <cstring>
+using namespace llvm;
+
+static const char *getManglingComponent(const Triple &T) {
+  if (T.isOSBinFormatGOFF())
+    return "-m:l";
+  if (T.isOSBinFormatMachO())
+    return "-m:o";
+  if ((T.isOSWindows() || T.isUEFI()) && T.isOSBinFormatCOFF())
+    return T.getArch() == Triple::x86 ? "-m:x" : "-m:w";
+  if (T.isOSBinFormatXCOFF())
+    return "-m:a";
+  return "-m:e";
+}
+
+static std::string computeARMDataLayout(const Triple &TT, StringRef ABIName) {
+  auto ABI = ARM::computeTargetABI(TT, ABIName);
+  std::string Ret;
+
+  if (TT.isLittleEndian())
+    // Little endian.
+    Ret += "e";
+  else
+    // Big endian.
+    Ret += "E";
+
+  Ret += getManglingComponent(TT);
+
+  // Pointers are 32 bits and aligned to 32 bits.
+  Ret += "-p:32:32";
+
+  // Function pointers are aligned to 8 bits (because the LSB stores the
+  // ARM/Thumb state).
+  Ret += "-Fi8";
+
+  // ABIs other than APCS have 64 bit integers with natural alignment.
+  if (ABI != ARM::ARM_ABI_APCS)
+    Ret += "-i64:64";
+
+  // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
+  // bits, others to 64 bits. We always try to align to 64 bits.
+  if (ABI == ARM::ARM_ABI_APCS)
+    Ret += "-f64:32:64";
+
+  // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
+  // to 64. We always ty to give them natural alignment.
+  if (ABI == ARM::ARM_ABI_APCS)
+    Ret += "-v64:32:64-v128:32:128";
+  else if (ABI != ARM::ARM_ABI_AAPCS16)
+    Ret += "-v128:64:128";
+
+  // Try to align aggregates to 32 bits (the default is 64 bits, which has no
+  // particular hardware support on 32-bit ARM).
+  Ret += "-a:0:32";
+
+  // Integer registers are 32 bits.
+  Ret += "-n32";
+
+  // The stack is 64 bit aligned on AAPCS and 32 bit aligned everywhere else.
+  if (ABI == ARM::ARM_ABI_AAPCS16)
+    Ret += "-S128";
+  else if (ABI == ARM::ARM_ABI_AAPCS)
+    Ret += "-S64";
+  else
+    Ret += "-S32";
+
+  return Ret;
+}
+
+// Helper function to build a DataLayout string
+static std::string computeAArch64DataLayout(const Triple &TT) {
+  if (TT.isOSBinFormatMachO()) {
+    if (TT.getArch() == Triple::aarch64_32)
+      return "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-"
+             "n32:64-S128-Fn32";
+    return "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-"
+           "Fn32";
+  }
+  if (TT.isOSBinFormatCOFF())
+    return "e-m:w-p270:32:32-p271:32:32-p272:64:64-p:64:64-i32:32-i64:64-i128:"
+           "128-n32:64-S128-Fn32";
+  std::string Endian = TT.isLittleEndian() ? "e" : "E";
+  std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : "";
+  return Endian + "-m:e" + Ptr32 +
+         "-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-"
+         "n32:64-S128-Fn32";
+}
+
+// DataLayout: little or big endian
+static std::string computeBPFDataLayout(const Triple &TT) {
+  if (TT.getArch() == Triple::bpfeb)
+    return "E-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+  else
+    return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+}
+
+static std::string computeCSKYDataLayout(const Triple &TT) {
+  std::string Ret;
+
+  // Only support little endian for now.
+  // TODO: Add support for big endian.
+  Ret += "e";
+
+  // CSKY is always 32-bit target with the CSKYv2 ABI as prefer now.
+  // It's a 4-byte aligned stack with ELF mangling only.
+  Ret += "-m:e-S32-p:32:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:32"
+         "-v128:32:32-a:0:32-Fi32-n32";
+
+  return Ret;
+}
+
+static std::string computeLoongArchDataLayout(const Triple &TT) {
+  if (TT.isArch64Bit())
+    return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+  assert(TT.isArch32Bit() && "only LA32 and LA64 are currently supported");
+  return "e-m:e-p:32:32-i64:64-n32-S128";
+}
+
+static std::string computeM68kDataLayout(const Triple &TT) {
+  std::string Ret = "";
+  // M68k is Big Endian
+  Ret += "E";
+
+  // FIXME how to wire it with the used object format?
+  Ret += "-m:e";
+
+  // M68k pointers are always 32 bit wide even for 16-bit CPUs.
+  // The ABI only specifies 16-bit alignment.
+  // On at least the 68020+ with a 32-bit bus, there is a performance benefit
+  // to having 32-bit alignment.
+  Ret += "-p:32:16:32";
+
+  // Bytes do not require special alignment, words are word aligned and
+  // long words are word aligned at minimum.
+  Ret += "-i8:8:8-i16:16:16-i32:16:32";
+
+  // FIXME no floats at the moment
+
+  // The registers can hold 8, 16, 32 bits
+  Ret += "-n8:16:32";
+
+  Ret += "-a:0:16-S16";
+
+  return Ret;
+}
+
+namespace {
+enum class MipsABI { Unknown, O32, N32, N64 };
+}
+
+// FIXME: This duplicates MipsABIInfo::computeTargetABI, but duplicating this is
+// preferable to violating layering rules. Ideally that information should live
+// in LLVM TargetParser, but for now we just duplicate some ABI name string
+// logic for simplicity.
+static MipsABI getMipsABI(const Triple &TT, StringRef ABIName) {
+  if (ABIName.starts_with("o32"))
+    return MipsABI::O32;
+  if (ABIName.starts_with("n32"))
+    return MipsABI::N32;
+  if (ABIName.starts_with("n64"))
+    return MipsABI::N64;
+  if (TT.isABIN32())
+    return MipsABI::N32;
+  assert(ABIName.empty() && "Unknown ABI option for MIPS");
+
+  if (TT.isMIPS64())
+    return MipsABI::N64;
+  return MipsABI::O32;
+}
+
+static std::string computeMipsDataLayout(const Triple &TT, StringRef ABIName) {
+  std::string Ret;
+  MipsABI ABI = getMipsABI(TT, ABIName);
+
+  // There are both little and big endian mips.
+  if (TT.isLittleEndian())
+    Ret += "e";
+  else
+    Ret += "E";
+
+  if (ABI == MipsABI::O32)
+    Ret += "-m:m";
+  else
+    Ret += "-m:e";
+
+  // Pointers are 32 bit on some ABIs.
+  if (ABI != MipsABI::N64)
+    Ret += "-p:32:32";
+
+  // 8 and 16 bit integers only need to have natural alignment, but try to
+  // align them to 32 bits. 64 bit integers have natural alignment.
+  Ret += "-i8:8:32-i16:16:32-i64:64";
+
+  // 32 bit registers are always available and the stack is at least 64 bit
+  // aligned. On N64 64 bit registers are also available and the stack is
+  // 128 bit aligned.
+  if (ABI == MipsABI::N64 || ABI == MipsABI::N32)
+    Ret += "-i128:128-n32:64-S128";
+  else
+    Ret += "-n32-S64";
+
+  return Ret;
+}
+
+static std::string computePowerDataLayout(const Triple &T) {
+  bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
+  std::string Ret;
+
+  // Most PPC* platforms are big endian, PPC(64)LE is little endian.
+  if (T.isLittleEndian())
+    Ret = "e";
+  else
+    Ret = "E";
+
+  Ret += getManglingComponent(T);
+
+  // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
+  // pointers.
+  if (!is64Bit || T.getOS() == Triple::Lv2)
+    Ret += "-p:32:32";
+
+  // If the target ABI uses function descriptors, then the alignment of function
+  // pointers depends on the alignment used to emit the descriptor. Otherwise,
+  // function pointers are aligned to 32 bits because the instructions must be.
+  if ((T.getArch() == Triple::ppc64 && !T.isPPC64ELFv2ABI())) {
+    Ret += "-Fi64";
+  } else if (T.isOSAIX()) {
+    Ret += is64Bit ? "-Fi64" : "-Fi32";
+  } else {
+    Ret += "-Fn32";
+  }
+
+  // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+  // documentation are wrong; these are correct (i.e. "what gcc does").
+  Ret += "-i64:64";
+
+  // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
+  if (is64Bit)
+    Ret += "-i128:128-n32:64";
+  else
+    Ret += "-n32";
+
+  // Specify the vector alignment explicitly. For v256i1 and v512i1, the
+  // calculated alignment would be 256*alignment(i1) and 512*alignment(i1),
+  // which is 256 and 512 bytes - way over aligned.
+  if (is64Bit && (T.isOSAIX() || T.isOSLinux()))
+    Ret += "-S128-v256:256:256-v512:512:512";
+
+  return Ret;
+}
+
+static std::string computeAMDDataLayout(const Triple &TT) {
+  if (TT.getArch() == Triple::r600) {
+    // 32-bit pointers.
+    return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
+           "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
+  }
+
+  // 32-bit private, local, and region pointers. 64-bit global, constant and
+  // flat. 160-bit non-integral fat buffer pointers that include a 128-bit
+  // buffer descriptor and a 32-bit offset, which are indexed by 32-bit values
+  // (address space 7), and 128-bit non-integral buffer resourcees (address
+  // space 8) which cannot be non-trivilally accessed by LLVM memory operations
+  // like getelementptr.
+  return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
+         "-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-"
+         "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-"
+         "v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9";
+}
+
+static std::string computeRISCVDataLayout(const Triple &TT, StringRef ABIName) {
+  std::string Ret;
+
+  if (TT.isLittleEndian())
+    Ret += "e";
+  else
+    Ret += "E";
+
+  Ret += "-m:e";
+
+  // Pointer and integer sizes.
+  if (TT.isArch64Bit()) {
----------------
arsenm wrote:

```suggestion
  if (TT.isRISCV64()) {
```

https://github.com/llvm/llvm-project/pull/157612