[llvm] [llvm] Move data layout string computation to TargetParser (PR #157612)
Sergei Barannikov via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 9 04:54:22 PDT 2025
================
@@ -0,0 +1,631 @@
+//===--- TargetDataLayout.cpp - Map Triple to LLVM data layout string -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/TargetParser/ARMTargetParser.h"
+#include "llvm/TargetParser/Triple.h"
+#include <cstring>
+using namespace llvm;
+
+static const char *getManglingComponent(const Triple &T) {
+ if (T.isOSBinFormatGOFF())
+ return "-m:l";
+ if (T.isOSBinFormatMachO())
+ return "-m:o";
+ if ((T.isOSWindows() || T.isUEFI()) && T.isOSBinFormatCOFF())
+ return T.getArch() == Triple::x86 ? "-m:x" : "-m:w";
+ if (T.isOSBinFormatXCOFF())
+ return "-m:a";
+ return "-m:e";
+}
+
+static std::string computeARMDataLayout(const Triple &TT, StringRef ABIName) {
+ auto ABI = ARM::computeTargetABI(TT, ABIName);
+ std::string Ret;
+
+ if (TT.isLittleEndian())
+ // Little endian.
+ Ret += "e";
+ else
+ // Big endian.
+ Ret += "E";
+
+ Ret += getManglingComponent(TT);
+
+ // Pointers are 32 bits and aligned to 32 bits.
+ Ret += "-p:32:32";
+
+ // Function pointers are aligned to 8 bits (because the LSB stores the
+ // ARM/Thumb state).
+ Ret += "-Fi8";
+
+ // ABIs other than APCS have 64 bit integers with natural alignment.
+ if (ABI != ARM::ARM_ABI_APCS)
+ Ret += "-i64:64";
+
+ // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
+ // bits, others to 64 bits. We always try to align to 64 bits.
+ if (ABI == ARM::ARM_ABI_APCS)
+ Ret += "-f64:32:64";
+
+ // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
+ // to 64. We always ty to give them natural alignment.
+ if (ABI == ARM::ARM_ABI_APCS)
+ Ret += "-v64:32:64-v128:32:128";
+ else if (ABI != ARM::ARM_ABI_AAPCS16)
+ Ret += "-v128:64:128";
+
+ // Try to align aggregates to 32 bits (the default is 64 bits, which has no
+ // particular hardware support on 32-bit ARM).
+ Ret += "-a:0:32";
+
+ // Integer registers are 32 bits.
+ Ret += "-n32";
+
+ // The stack is 64 bit aligned on AAPCS and 32 bit aligned everywhere else.
+ if (ABI == ARM::ARM_ABI_AAPCS16)
+ Ret += "-S128";
+ else if (ABI == ARM::ARM_ABI_AAPCS)
+ Ret += "-S64";
+ else
+ Ret += "-S32";
+
+ return Ret;
+}
+
+// Helper function to build a DataLayout string
+static std::string computeAArch64DataLayout(const Triple &TT) {
+ if (TT.isOSBinFormatMachO()) {
+ if (TT.getArch() == Triple::aarch64_32)
+ return "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-"
+ "n32:64-S128-Fn32";
+ return "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-"
+ "Fn32";
+ }
+ if (TT.isOSBinFormatCOFF())
+ return "e-m:w-p270:32:32-p271:32:32-p272:64:64-p:64:64-i32:32-i64:64-i128:"
+ "128-n32:64-S128-Fn32";
+ std::string Endian = TT.isLittleEndian() ? "e" : "E";
+ std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : "";
+ return Endian + "-m:e" + Ptr32 +
+ "-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-"
+ "n32:64-S128-Fn32";
+}
+
+// DataLayout: little or big endian
+static std::string computeBPFDataLayout(const Triple &TT) {
+ if (TT.getArch() == Triple::bpfeb)
+ return "E-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+ else
+ return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+}
+
+static std::string computeCSKYDataLayout(const Triple &TT) {
+ std::string Ret;
+
+ // Only support little endian for now.
+ // TODO: Add support for big endian.
+ Ret += "e";
+
+ // CSKY is always 32-bit target with the CSKYv2 ABI as prefer now.
+ // It's a 4-byte aligned stack with ELF mangling only.
+ Ret += "-m:e-S32-p:32:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:32"
+ "-v128:32:32-a:0:32-Fi32-n32";
+
+ return Ret;
+}
+
+static std::string computeLoongArchDataLayout(const Triple &TT) {
+ if (TT.isArch64Bit())
+ return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+ assert(TT.isArch32Bit() && "only LA32 and LA64 are currently supported");
+ return "e-m:e-p:32:32-i64:64-n32-S128";
+}
+
+static std::string computeM68kDataLayout(const Triple &TT) {
+ std::string Ret = "";
+ // M68k is Big Endian
+ Ret += "E";
+
+ // FIXME how to wire it with the used object format?
+ Ret += "-m:e";
+
+ // M68k pointers are always 32 bit wide even for 16-bit CPUs.
+ // The ABI only specifies 16-bit alignment.
+ // On at least the 68020+ with a 32-bit bus, there is a performance benefit
+ // to having 32-bit alignment.
+ Ret += "-p:32:16:32";
+
+ // Bytes do not require special alignment, words are word aligned and
+ // long words are word aligned at minimum.
+ Ret += "-i8:8:8-i16:16:16-i32:16:32";
+
+ // FIXME no floats at the moment
+
+ // The registers can hold 8, 16, 32 bits
+ Ret += "-n8:16:32";
+
+ Ret += "-a:0:16-S16";
+
+ return Ret;
+}
+
+namespace {
+enum class MipsABI { Unknown, O32, N32, N64 };
+}
+
+// FIXME: This duplicates MipsABIInfo::computeTargetABI, but duplicating this is
----------------
s-barannikov wrote:
Or we should delete this method. Pointers in different address spaces can have different sizes, and should be queried via DataLayout. There are only two uses of this method in the codebase (not counting transitive uses and uses in tests).
https://github.com/llvm/llvm-project/pull/157612
More information about the llvm-commits
mailing list