[llvm] [llvm] Move data layout string computation to TargetParser (PR #157612)
Jessica Clarke via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 9 07:02:08 PDT 2025
================
@@ -0,0 +1,631 @@
+//===--- TargetDataLayout.cpp - Map Triple to LLVM data layout string -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/TargetParser/ARMTargetParser.h"
+#include "llvm/TargetParser/Triple.h"
+#include <cstring>
+using namespace llvm;
+
+static const char *getManglingComponent(const Triple &T) {
+ if (T.isOSBinFormatGOFF())
+ return "-m:l";
+ if (T.isOSBinFormatMachO())
+ return "-m:o";
+ if ((T.isOSWindows() || T.isUEFI()) && T.isOSBinFormatCOFF())
+ return T.getArch() == Triple::x86 ? "-m:x" : "-m:w";
+ if (T.isOSBinFormatXCOFF())
+ return "-m:a";
+ return "-m:e";
+}
+
+static std::string computeARMDataLayout(const Triple &TT, StringRef ABIName) {
+ auto ABI = ARM::computeTargetABI(TT, ABIName);
+ std::string Ret;
+
+ if (TT.isLittleEndian())
+ // Little endian.
+ Ret += "e";
+ else
+ // Big endian.
+ Ret += "E";
+
+ Ret += getManglingComponent(TT);
+
+ // Pointers are 32 bits and aligned to 32 bits.
+ Ret += "-p:32:32";
+
+ // Function pointers are aligned to 8 bits (because the LSB stores the
+ // ARM/Thumb state).
+ Ret += "-Fi8";
+
+ // ABIs other than APCS have 64 bit integers with natural alignment.
+ if (ABI != ARM::ARM_ABI_APCS)
+ Ret += "-i64:64";
+
+ // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
+ // bits, others to 64 bits. We always try to align to 64 bits.
+ if (ABI == ARM::ARM_ABI_APCS)
+ Ret += "-f64:32:64";
+
+ // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
+ // to 64. We always ty to give them natural alignment.
+ if (ABI == ARM::ARM_ABI_APCS)
+ Ret += "-v64:32:64-v128:32:128";
+ else if (ABI != ARM::ARM_ABI_AAPCS16)
+ Ret += "-v128:64:128";
+
+ // Try to align aggregates to 32 bits (the default is 64 bits, which has no
+ // particular hardware support on 32-bit ARM).
+ Ret += "-a:0:32";
+
+ // Integer registers are 32 bits.
+ Ret += "-n32";
+
+ // The stack is 64 bit aligned on AAPCS and 32 bit aligned everywhere else.
+ if (ABI == ARM::ARM_ABI_AAPCS16)
+ Ret += "-S128";
+ else if (ABI == ARM::ARM_ABI_AAPCS)
+ Ret += "-S64";
+ else
+ Ret += "-S32";
+
+ return Ret;
+}
+
+// Helper function to build a DataLayout string
+static std::string computeAArch64DataLayout(const Triple &TT) {
+ if (TT.isOSBinFormatMachO()) {
+ if (TT.getArch() == Triple::aarch64_32)
+ return "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-"
+ "n32:64-S128-Fn32";
+ return "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-"
+ "Fn32";
+ }
+ if (TT.isOSBinFormatCOFF())
+ return "e-m:w-p270:32:32-p271:32:32-p272:64:64-p:64:64-i32:32-i64:64-i128:"
+ "128-n32:64-S128-Fn32";
+ std::string Endian = TT.isLittleEndian() ? "e" : "E";
+ std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : "";
+ return Endian + "-m:e" + Ptr32 +
+ "-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-"
+ "n32:64-S128-Fn32";
+}
+
+// DataLayout: little or big endian
+static std::string computeBPFDataLayout(const Triple &TT) {
+ if (TT.getArch() == Triple::bpfeb)
+ return "E-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+ else
+ return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+}
+
+static std::string computeCSKYDataLayout(const Triple &TT) {
+ std::string Ret;
+
+ // Only support little endian for now.
+ // TODO: Add support for big endian.
+ Ret += "e";
+
+ // CSKY is always 32-bit target with the CSKYv2 ABI as prefer now.
+ // It's a 4-byte aligned stack with ELF mangling only.
+ Ret += "-m:e-S32-p:32:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:32"
+ "-v128:32:32-a:0:32-Fi32-n32";
+
+ return Ret;
+}
+
+static std::string computeLoongArchDataLayout(const Triple &TT) {
+ if (TT.isArch64Bit())
+ return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+ assert(TT.isArch32Bit() && "only LA32 and LA64 are currently supported");
+ return "e-m:e-p:32:32-i64:64-n32-S128";
+}
+
+static std::string computeM68kDataLayout(const Triple &TT) {
+ std::string Ret = "";
+ // M68k is Big Endian
+ Ret += "E";
+
+ // FIXME how to wire it with the used object format?
+ Ret += "-m:e";
+
+ // M68k pointers are always 32 bit wide even for 16-bit CPUs.
+ // The ABI only specifies 16-bit alignment.
+ // On at least the 68020+ with a 32-bit bus, there is a performance benefit
+ // to having 32-bit alignment.
+ Ret += "-p:32:16:32";
+
+ // Bytes do not require special alignment, words are word aligned and
+ // long words are word aligned at minimum.
+ Ret += "-i8:8:8-i16:16:16-i32:16:32";
+
+ // FIXME no floats at the moment
+
+ // The registers can hold 8, 16, 32 bits
+ Ret += "-n8:16:32";
+
+ Ret += "-a:0:16-S16";
+
+ return Ret;
+}
+
+namespace {
+enum class MipsABI { Unknown, O32, N32, N64 };
+}
+
+// FIXME: This duplicates MipsABIInfo::computeTargetABI, but duplicating this is
----------------
jrtc27 wrote:
Without looking at the specifics of these use cases, this would risk being incompatible with architectures that use more interesting pointer encodings than "machine word", such as CHERI.
https://github.com/llvm/llvm-project/pull/157612
More information about the llvm-commits
mailing list