[llvm] [llvm] Move data layout string computation to TargetParser (PR #157612)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 8 22:48:08 PDT 2025
================
@@ -0,0 +1,631 @@
+//===--- TargetDataLayout.cpp - Map Triple to LLVM data layout string -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/TargetParser/ARMTargetParser.h"
+#include "llvm/TargetParser/Triple.h"
+#include <cstring>
+using namespace llvm;
+
+static const char *getManglingComponent(const Triple &T) {
+ if (T.isOSBinFormatGOFF())
+ return "-m:l";
+ if (T.isOSBinFormatMachO())
+ return "-m:o";
+ if ((T.isOSWindows() || T.isUEFI()) && T.isOSBinFormatCOFF())
+ return T.getArch() == Triple::x86 ? "-m:x" : "-m:w";
+ if (T.isOSBinFormatXCOFF())
+ return "-m:a";
+ return "-m:e";
+}
+
+static std::string computeARMDataLayout(const Triple &TT, StringRef ABIName) {
+ auto ABI = ARM::computeTargetABI(TT, ABIName);
+ std::string Ret;
+
+ if (TT.isLittleEndian())
+ // Little endian.
+ Ret += "e";
+ else
+ // Big endian.
+ Ret += "E";
+
+ Ret += getManglingComponent(TT);
+
+ // Pointers are 32 bits and aligned to 32 bits.
+ Ret += "-p:32:32";
+
+ // Function pointers are aligned to 8 bits (because the LSB stores the
+ // ARM/Thumb state).
+ Ret += "-Fi8";
+
+ // ABIs other than APCS have 64 bit integers with natural alignment.
+ if (ABI != ARM::ARM_ABI_APCS)
+ Ret += "-i64:64";
+
+ // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
+ // bits, others to 64 bits. We always try to align to 64 bits.
+ if (ABI == ARM::ARM_ABI_APCS)
+ Ret += "-f64:32:64";
+
+ // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
+ // to 64. We always ty to give them natural alignment.
+ if (ABI == ARM::ARM_ABI_APCS)
+ Ret += "-v64:32:64-v128:32:128";
+ else if (ABI != ARM::ARM_ABI_AAPCS16)
+ Ret += "-v128:64:128";
+
+ // Try to align aggregates to 32 bits (the default is 64 bits, which has no
+ // particular hardware support on 32-bit ARM).
+ Ret += "-a:0:32";
+
+ // Integer registers are 32 bits.
+ Ret += "-n32";
+
+ // The stack is 64 bit aligned on AAPCS and 32 bit aligned everywhere else.
+ if (ABI == ARM::ARM_ABI_AAPCS16)
+ Ret += "-S128";
+ else if (ABI == ARM::ARM_ABI_AAPCS)
+ Ret += "-S64";
+ else
+ Ret += "-S32";
+
+ return Ret;
+}
+
+// Helper function to build a DataLayout string
+static std::string computeAArch64DataLayout(const Triple &TT) {
+ if (TT.isOSBinFormatMachO()) {
+ if (TT.getArch() == Triple::aarch64_32)
+ return "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-"
+ "n32:64-S128-Fn32";
+ return "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-"
+ "Fn32";
+ }
+ if (TT.isOSBinFormatCOFF())
+ return "e-m:w-p270:32:32-p271:32:32-p272:64:64-p:64:64-i32:32-i64:64-i128:"
+ "128-n32:64-S128-Fn32";
+ std::string Endian = TT.isLittleEndian() ? "e" : "E";
+ std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : "";
+ return Endian + "-m:e" + Ptr32 +
+ "-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-"
+ "n32:64-S128-Fn32";
+}
+
+// DataLayout: little or big endian
+static std::string computeBPFDataLayout(const Triple &TT) {
+ if (TT.getArch() == Triple::bpfeb)
+ return "E-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+ else
+ return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+}
+
+static std::string computeCSKYDataLayout(const Triple &TT) {
+ std::string Ret;
+
+ // Only support little endian for now.
+ // TODO: Add support for big endian.
+ Ret += "e";
+
+ // CSKY is always 32-bit target with the CSKYv2 ABI as prefer now.
+ // It's a 4-byte aligned stack with ELF mangling only.
+ Ret += "-m:e-S32-p:32:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:32"
+ "-v128:32:32-a:0:32-Fi32-n32";
+
+ return Ret;
+}
+
+static std::string computeLoongArchDataLayout(const Triple &TT) {
+ if (TT.isArch64Bit())
+ return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+ assert(TT.isArch32Bit() && "only LA32 and LA64 are currently supported");
+ return "e-m:e-p:32:32-i64:64-n32-S128";
+}
+
+static std::string computeM68kDataLayout(const Triple &TT) {
+ std::string Ret = "";
+ // M68k is Big Endian
+ Ret += "E";
+
+ // FIXME how to wire it with the used object format?
+ Ret += "-m:e";
+
+ // M68k pointers are always 32 bit wide even for 16-bit CPUs.
+ // The ABI only specifies 16-bit alignment.
+ // On at least the 68020+ with a 32-bit bus, there is a performance benefit
+ // to having 32-bit alignment.
+ Ret += "-p:32:16:32";
+
+ // Bytes do not require special alignment, words are word aligned and
+ // long words are word aligned at minimum.
+ Ret += "-i8:8:8-i16:16:16-i32:16:32";
+
+ // FIXME no floats at the moment
+
+ // The registers can hold 8, 16, 32 bits
+ Ret += "-n8:16:32";
+
+ Ret += "-a:0:16-S16";
+
+ return Ret;
+}
+
+namespace {
+enum class MipsABI { Unknown, O32, N32, N64 };
+}
+
+// FIXME: This duplicates MipsABIInfo::computeTargetABI, but duplicating this is
+// preferable to violating layering rules. Ideally that information should live
+// in LLVM TargetParser, but for now we just duplicate some ABI name string
+// logic for simplicity.
+static MipsABI getMipsABI(const Triple &TT, StringRef ABIName) {
+ if (ABIName.starts_with("o32"))
+ return MipsABI::O32;
+ if (ABIName.starts_with("n32"))
+ return MipsABI::N32;
+ if (ABIName.starts_with("n64"))
+ return MipsABI::N64;
+ if (TT.isABIN32())
+ return MipsABI::N32;
+ assert(ABIName.empty() && "Unknown ABI option for MIPS");
+
+ if (TT.isMIPS64())
+ return MipsABI::N64;
+ return MipsABI::O32;
+}
+
+static std::string computeMipsDataLayout(const Triple &TT, StringRef ABIName) {
+ std::string Ret;
+ MipsABI ABI = getMipsABI(TT, ABIName);
+
+ // There are both little and big endian mips.
+ if (TT.isLittleEndian())
+ Ret += "e";
+ else
+ Ret += "E";
+
+ if (ABI == MipsABI::O32)
+ Ret += "-m:m";
+ else
+ Ret += "-m:e";
+
+ // Pointers are 32 bit on some ABIs.
+ if (ABI != MipsABI::N64)
+ Ret += "-p:32:32";
+
+ // 8 and 16 bit integers only need to have natural alignment, but try to
+ // align them to 32 bits. 64 bit integers have natural alignment.
+ Ret += "-i8:8:32-i16:16:32-i64:64";
+
+ // 32 bit registers are always available and the stack is at least 64 bit
+ // aligned. On N64 64 bit registers are also available and the stack is
+ // 128 bit aligned.
+ if (ABI == MipsABI::N64 || ABI == MipsABI::N32)
+ Ret += "-i128:128-n32:64-S128";
+ else
+ Ret += "-n32-S64";
+
+ return Ret;
+}
+
+static std::string computePowerDataLayout(const Triple &T) {
+ bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
+ std::string Ret;
+
+ // Most PPC* platforms are big endian, PPC(64)LE is little endian.
+ if (T.isLittleEndian())
+ Ret = "e";
+ else
+ Ret = "E";
+
+ Ret += getManglingComponent(T);
+
+ // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
+ // pointers.
+ if (!is64Bit || T.getOS() == Triple::Lv2)
+ Ret += "-p:32:32";
+
+ // If the target ABI uses function descriptors, then the alignment of function
+ // pointers depends on the alignment used to emit the descriptor. Otherwise,
+ // function pointers are aligned to 32 bits because the instructions must be.
+ if ((T.getArch() == Triple::ppc64 && !T.isPPC64ELFv2ABI())) {
+ Ret += "-Fi64";
+ } else if (T.isOSAIX()) {
+ Ret += is64Bit ? "-Fi64" : "-Fi32";
+ } else {
+ Ret += "-Fn32";
+ }
+
+ // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+ // documentation are wrong; these are correct (i.e. "what gcc does").
+ Ret += "-i64:64";
+
+ // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
+ if (is64Bit)
+ Ret += "-i128:128-n32:64";
+ else
+ Ret += "-n32";
+
+ // Specify the vector alignment explicitly. For v256i1 and v512i1, the
+ // calculated alignment would be 256*alignment(i1) and 512*alignment(i1),
+ // which is 256 and 512 bytes - way over aligned.
+ if (is64Bit && (T.isOSAIX() || T.isOSLinux()))
+ Ret += "-S128-v256:256:256-v512:512:512";
+
+ return Ret;
+}
+
+static std::string computeAMDDataLayout(const Triple &TT) {
+ if (TT.getArch() == Triple::r600) {
+ // 32-bit pointers.
+ return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
+ "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
+ }
+
+ // 32-bit private, local, and region pointers. 64-bit global, constant and
+ // flat. 160-bit non-integral fat buffer pointers that include a 128-bit
+ // buffer descriptor and a 32-bit offset, which are indexed by 32-bit values
+ // (address space 7), and 128-bit non-integral buffer resourcees (address
+ // space 8) which cannot be non-trivilally accessed by LLVM memory operations
+ // like getelementptr.
+ return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
+ "-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-"
+ "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-"
+ "v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9";
+}
+
+static std::string computeRISCVDataLayout(const Triple &TT, StringRef ABIName) {
+ std::string Ret;
+
+ if (TT.isLittleEndian())
+ Ret += "e";
+ else
+ Ret += "E";
+
+ Ret += "-m:e";
+
+ // Pointer and integer sizes.
+ if (TT.isArch64Bit()) {
----------------
arsenm wrote:
```suggestion
if (TT.isRISCV64()) {
```
https://github.com/llvm/llvm-project/pull/157612
More information about the llvm-commits
mailing list