[Mlir-commits] [clang] [lld] [llvm] [mlir] [IR] Introduce `T<address space>` to `DataLayout` to represent flat address space if a target supports it (PR #108786)

Mon Oct 14 12:30:31 PDT 2024

https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108786

>From c2be824d5935b57a87a7ccf618ee4eecf55b4e3a Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 17 Sep 2024 21:47:45 -0400
Subject: [PATCH] [IR] Introduce `T<address space>` to `DataLayout` to
 represent flat address space if a target supports it

---
 clang/lib/Basic/Targets/AMDGPU.cpp            |   2 +-
 clang/lib/Basic/Targets/NVPTX.cpp             |   8 +-
 clang/test/CodeGen/target-data.c              |   8 +-
 clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl |   2 +-
 lld/test/ELF/lto/amdgcn-oses.ll               |   6 +-
 lld/test/ELF/lto/amdgcn.ll                    |   2 +-
 llvm/docs/LangRef.rst                         |   6 +
 llvm/docs/ReleaseNotes.rst                    | 251 ++++++++++++++++++
 llvm/include/llvm/IR/DataLayout.h             |   4 +
 llvm/lib/IR/AutoUpgrade.cpp                   |  10 +
 llvm/lib/IR/DataLayout.cpp                    |  11 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   2 +-
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp  |   2 +-
 llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp  |   6 +-
 .../Bitcode/DataLayoutUpgradeTest.cpp         |  40 +--
 .../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp      |   2 +-
 16 files changed, 322 insertions(+), 40 deletions(-)
 create mode 100644 llvm/docs/ReleaseNotes.rst

diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 3b748d0249d57b..9e322cef0d9c38 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -36,7 +36,7 @@ static const char *const DataLayoutStringAMDGCN =
     "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
     "32-v48:64-v96:128"
     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
-    "-ni:7:8:9";
+    "-ni:7:8:9-U0";
 
 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
     llvm::AMDGPUAS::FLAT_ADDRESS,     // Default
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 88a0dbde52d52b..b70d8d30732327 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -66,12 +66,12 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
   HasFloat16 = true;
 
   if (TargetPointerWidth == 32)
-    resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
+    resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64-T0");
   else if (Opts.NVPTXUseShortPointers)
-    resetDataLayout(
-        "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
+    resetDataLayout("e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:"
+                    "32-n16:32:64-U0");
   else
-    resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64");
+    resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64-U0");
 
   // If possible, get a TargetInfo for our host triple, so we can match its
   // types.
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index 8548aa00cfe877..129d6de4927c43 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -160,11 +160,11 @@
 
 // RUN: %clang_cc1 -triple nvptx-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=NVPTX
-// NVPTX: target datalayout = "e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+// NVPTX: target datalayout = "e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64-U0"
 
 // RUN: %clang_cc1 -triple nvptx64-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=NVPTX64
-// NVPTX64: target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+// NVPTX64: target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64-U0"
 
 // RUN: %clang_cc1 -triple r600-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=R600
@@ -176,12 +176,12 @@
 
 // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \
 // RUN: | FileCheck %s -check-prefix=R600SI
-// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-U0"
 
 // Test default -target-cpu
 // RUN: %clang_cc1 -triple amdgcn-unknown -o - -emit-llvm %s \
 // RUN: | FileCheck %s -check-prefix=R600SIDefault
-// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-U0"
 
 // RUN: %clang_cc1 -triple arm64-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=AARCH64
diff --git a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
index bb52f87615214b..32427b971aa525 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
 
-// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-U0"
 void foo(void) {}
diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll
index 7a74d0317f2b9e..5316ce6e3208f7 100644
--- a/lld/test/ELF/lto/amdgcn-oses.ll
+++ b/lld/test/ELF/lto/amdgcn-oses.ll
@@ -25,7 +25,7 @@
 
 ;--- amdhsa.ll
 target triple = "amdgcn-amd-amdhsa"
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-U0"
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
@@ -36,7 +36,7 @@ define void @_start() {
 
 ;--- amdpal.ll
 target triple = "amdgcn-amd-amdpal"
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-U0"
 
 define amdgpu_cs void @_start() {
   ret void
@@ -44,7 +44,7 @@ define amdgpu_cs void @_start() {
 
 ;--- mesa3d.ll
 target triple = "amdgcn-amd-mesa3d"
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-U0"
 
 define void @_start() {
   ret void
diff --git a/lld/test/ELF/lto/amdgcn.ll b/lld/test/ELF/lto/amdgcn.ll
index 4281e209fd9789..5ef32976251cb8 100644
--- a/lld/test/ELF/lto/amdgcn.ll
+++ b/lld/test/ELF/lto/amdgcn.ll
@@ -5,7 +5,7 @@
 ; Make sure the amdgcn triple is handled
 
 target triple = "amdgcn-amd-amdhsa"
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-U0"
 
 define void @_start() {
   ret void
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index b08bd79147f7de..77b050bfb11a40 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3076,6 +3076,12 @@ as follows:
     address space 0, this property only affects the default value to be used
     when creating globals without additional contextual information (e.g. in
     LLVM passes).
+``U<address space>``
+    Specifies the undesirable address space for a target. This specification does
+    not define a new address space; it must correspond to a valid, existing address
+    space and be capable of lossless conversion to other address spaces. If this
+    specification is absent, it indicates that the target does not have such an
+    address space.
 
 .. _alloca_addrspace:
 
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
new file mode 100644
index 00000000000000..b3244437d9bae4
--- /dev/null
+++ b/llvm/docs/ReleaseNotes.rst
@@ -0,0 +1,251 @@
+============================
+LLVM |release| Release Notes
+============================
+
+.. contents::
+    :local:
+
+.. only:: PreRelease
+
+  .. warning::
+     These are in-progress notes for the upcoming LLVM |version| release.
+     Release notes for previous releases can be found on
+     `the Download Page <https://releases.llvm.org/download.html>`_.
+
+
+Introduction
+============
+
+This document contains the release notes for the LLVM Compiler Infrastructure,
+release |release|.  Here we describe the status of LLVM, including major improvements
+from the previous release, improvements in various subprojects of LLVM, and
+some of the current users of the code.  All LLVM releases may be downloaded
+from the `LLVM releases web site <https://llvm.org/releases/>`_.
+
+For more information about LLVM, including information about the latest
+release, please check out the `main LLVM web site <https://llvm.org/>`_.  If you
+have questions or comments, the `Discourse forums
+<https://discourse.llvm.org>`_ is a good place to ask
+them.
+
+Note that if you are reading this file from a Git checkout or the main
+LLVM web page, this document applies to the *next* release, not the current
+one.  To see the release notes for a specific release, please see the `releases
+page <https://llvm.org/releases/>`_.
+
+Non-comprehensive list of changes in this release
+=================================================
+.. NOTE
+   For small 1-3 sentence descriptions, just add an entry at the end of
+   this list. If your description won't fit comfortably in one bullet
+   point (e.g. maybe you would like to give an example of the
+   functionality, or simply have a lot to talk about), see the `NOTE` below
+   for adding a new subsection.
+
+* ...
+
+Update on required toolchains to build LLVM
+-------------------------------------------
+
+Changes to the LLVM IR
+----------------------
+
+* The ``x86_mmx`` IR type has been removed. It will be translated to
+  the standard vector type ``<1 x i64>`` in bitcode upgrade.
+* Renamed ``llvm.experimental.stepvector`` intrinsic to ``llvm.stepvector``.
+
+* Added ``usub_cond`` and ``usub_sat`` operations to ``atomicrmw``.
+
+* Added ``U<address space>`` to data layout to represent undesirable address space
+  if a target has it.
+
+Changes to LLVM infrastructure
+------------------------------
+
+Changes to building LLVM
+------------------------
+
+Changes to TableGen
+-------------------
+
+Changes to Interprocedural Optimizations
+----------------------------------------
+
+Changes to the AArch64 Backend
+------------------------------
+
+* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill
+  the required alignment space with a sequence of `0x0` bytes (the requested
+  fill value) rather than NOPs.
+
+Changes to the AMDGPU Backend
+-----------------------------
+
+* Removed ``llvm.amdgcn.flat.atomic.fadd`` and
+  ``llvm.amdgcn.global.atomic.fadd`` intrinsics. Users should use the
+  :ref:`atomicrmw <i_atomicrmw>` instruction with `fadd` and
+  addrspace(0) or addrspace(1) instead.
+
+Changes to the ARM Backend
+--------------------------
+
+* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill
+  the required alignment space with a sequence of `0x0` bytes (the requested
+  fill value) rather than NOPs.
+
+Changes to the AVR Backend
+--------------------------
+
+Changes to the DirectX Backend
+------------------------------
+
+Changes to the Hexagon Backend
+------------------------------
+
+Changes to the LoongArch Backend
+--------------------------------
+
+Changes to the MIPS Backend
+---------------------------
+
+Changes to the PowerPC Backend
+------------------------------
+
+Changes to the RISC-V Backend
+-----------------------------
+
+* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill
+  the required alignment space with a sequence of `0x0` bytes (the requested
+  fill value) rather than NOPs.
+* Added Syntacore SCR4 and SCR5 CPUs: ``-mcpu=syntacore-scr4/5-rv32/64``
+* ``-mcpu=sifive-p470`` was added.
+* Added Hazard3 CPU as taped out for RP2350: ``-mcpu=rp2350-hazard3`` (32-bit
+  only).
+* Fixed length vector support using RVV instructions now requires VLEN>=64. This
+  means Zve32x and Zve32f will also require Zvl64b. The prior support was
+  largely untested.
+* The ``Zvbc32e`` and ``Zvkgs`` extensions are now supported experimentally.
+* Added ``Smctr`` and ``Ssctr`` extensions.
+* ``-mcpu=syntacore-scr7`` was added.
+
+Changes to the WebAssembly Backend
+----------------------------------
+
+Changes to the Windows Target
+-----------------------------
+
+Changes to the X86 Backend
+--------------------------
+
+* `.balign N, 0x90`, `.p2align N, 0x90`, and `.align N, 0x90` in code sections
+  now fill the required alignment space with repeating `0x90` bytes, rather than
+  using optimised NOP filling. Optimised NOP filling fills the space with NOP
+  instructions of various widths, not just those that use the `0x90` byte
+  encoding. To use optimised NOP filling in a code section, leave off the
+  "fillval" argument, i.e. `.balign N`, `.p2align N` or `.align N` respectively.
+
+* Due to the removal of the ``x86_mmx`` IR type, functions with
+  ``x86_mmx`` arguments or return values will use a different,
+  incompatible, calling convention ABI. Such functions are not
+  generally seen in the wild (Clang never generates them!), so this is
+  not expected to result in real-world compatibility problems.
+
+* Support ISA of ``AVX10.2-256`` and ``AVX10.2-512``.
+
+Changes to the OCaml bindings
+-----------------------------
+
+Changes to the Python bindings
+------------------------------
+
+Changes to the C API
+--------------------
+
+* The following symbols are deleted due to the removal of the ``x86_mmx`` IR type:
+
+  * ``LLVMX86_MMXTypeKind``
+  * ``LLVMX86MMXTypeInContext``
+  * ``LLVMX86MMXType``
+
+ * The following functions are added to further support non-null-terminated strings:
+
+  * ``LLVMGetNamedFunctionWithLength``
+  * ``LLVMGetNamedGlobalWithLength``
+
+* The following functions are added to access the ``LLVMContextRef`` associated
+   with ``LLVMValueRef`` and ``LLVMBuilderRef`` objects:
+
+  * ``LLVMGetValueContext``
+  * ``LLVMGetBuilderContext``
+
+* The new pass manager can now be invoked with a custom alias analysis pipeline, using
+  the ``LLVMPassBuilderOptionsSetAAPipeline`` function.
+
+* It is now also possible to run the new pass manager on a single function, by calling
+  ``LLVMRunPassesOnFunction`` instead of ``LLVMRunPasses``.
+
+* Support for creating instructions with custom synchronization scopes has been added:
+
+  * ``LLVMGetSyncScopeID`` to map a synchronization scope name to an ID.
+  * ``LLVMBuildFenceSyncScope``, ``LLVMBuildAtomicRMWSyncScope`` and
+    ``LLVMBuildAtomicCmpXchgSyncScope`` versions of the existing builder functions
+    with an additional synchronization scope ID parameter.
+  * ``LLVMGetAtomicSyncScopeID`` and ``LLVMSetAtomicSyncScopeID`` to get and set the
+    synchronization scope of any atomic instruction.
+  * ``LLVMIsAtomic`` to check if an instruction is atomic, for use with the above functions.
+    Because of backwards compatibility, ``LLVMIsAtomicSingleThread`` and
+    ``LLVMSetAtomicSingleThread`` continue to work with any instruction type.
+
+* The `LLVMSetPersonalityFn` and `LLVMSetInitializer` APIs now support clearing the
+  personality function and initializer respectively by passing a null pointer.
+
+* The following functions are added to allow iterating over debug records attached to
+  instructions:
+
+  * ``LLVMGetFirstDbgRecord``
+  * ``LLVMGetLastDbgRecord``
+  * ``LLVMGetNextDbgRecord``
+  * ``LLVMGetPreviousDbgRecord``
+
+
+Changes to the CodeGen infrastructure
+-------------------------------------
+
+Changes to the Metadata Info
+---------------------------------
+
+Changes to the Debug Info
+---------------------------------
+
+Changes to the LLVM tools
+---------------------------------
+
+Changes to LLDB
+---------------------------------
+
+Changes to BOLT
+---------------------------------
+
+Changes to Sanitizers
+---------------------
+
+Other Changes
+-------------
+
+External Open Source Projects Using LLVM 19
+===========================================
+
+* A project...
+
+Additional Information
+======================
+
+A wide variety of additional information is available on the `LLVM web page
+<https://llvm.org/>`_, in particular in the `documentation
+<https://llvm.org/docs/>`_ section.  The web page also contains versions of the
+API documentation which is up-to-date with the Git version of the source
+code.  You can access versions of these documents specific to this release by
+going into the ``llvm/docs/`` directory in the LLVM tree.
+
+If you have any questions or comments about LLVM, please feel free to contact
+us via the `Discourse forums <https://discourse.llvm.org>`_.
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 8f7ab2f9df389e..eb9c84d70d3150 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -94,6 +94,7 @@ class DataLayout {
   unsigned AllocaAddrSpace = 0;
   unsigned ProgramAddrSpace = 0;
   unsigned DefaultGlobalsAddrSpace = 0;
+  std::optional<unsigned> UndesirableAddressSpace;
 
   MaybeAlign StackNaturalAlign;
   MaybeAlign FunctionPtrAlign;
@@ -245,6 +246,9 @@ class DataLayout {
   unsigned getDefaultGlobalsAddressSpace() const {
     return DefaultGlobalsAddrSpace;
   }
+  std::optional<unsigned> getUndesirableAddressSpace() const {
+    return UndesirableAddressSpace;
+  }
 
   bool hasMicrosoftFastStdCallMangling() const {
     return ManglingMode == MM_WinCOFFX86;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 477b77a6dd5335..a1584125016a7a 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5574,6 +5574,10 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
     if (!DL.contains("-p9") && !DL.starts_with("p9"))
       Res.append("-p9:192:256:256:32");
 
+    // Add flat address space.
+    if (!DL.contains("-U0") && !DL.starts_with("U0"))
+      Res.append("-U0");
+
     return Res;
   }
 
@@ -5636,6 +5640,12 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
       Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
   }
 
+  if (T.isNVPTX()) {
+    // Add flat address space.
+    if (!DL.contains("-U0") && !DL.starts_with("U0"))
+      Res.append("-U0");
+  }
+
   return Res;
 }
 
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index d295d1f5785eb9..1fbd6a909870c4 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -228,6 +228,7 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) {
   AllocaAddrSpace = Other.AllocaAddrSpace;
   ProgramAddrSpace = Other.ProgramAddrSpace;
   DefaultGlobalsAddrSpace = Other.DefaultGlobalsAddrSpace;
+  UndesirableAddressSpace = Other.UndesirableAddressSpace;
   StackNaturalAlign = Other.StackNaturalAlign;
   FunctionPtrAlign = Other.FunctionPtrAlign;
   TheFunctionPtrAlignType = Other.TheFunctionPtrAlignType;
@@ -250,6 +251,7 @@ bool DataLayout::operator==(const DataLayout &Other) const {
          AllocaAddrSpace == Other.AllocaAddrSpace &&
          ProgramAddrSpace == Other.ProgramAddrSpace &&
          DefaultGlobalsAddrSpace == Other.DefaultGlobalsAddrSpace &&
+         UndesirableAddressSpace == Other.UndesirableAddressSpace &&
          StackNaturalAlign == Other.StackNaturalAlign &&
          FunctionPtrAlign == Other.FunctionPtrAlign &&
          TheFunctionPtrAlignType == Other.TheFunctionPtrAlignType &&
@@ -568,6 +570,15 @@ Error DataLayout::parseSpecification(StringRef Spec) {
       return Err;
     break;
   }
+  case 'U': { // Flat address space.
+    if (Rest.empty())
+      return createSpecFormatError("U<address space>");
+    unsigned AS = ~0U;
+    if (Error Err = parseAddrSpace(Rest, AS))
+      return Err;
+    UndesirableAddressSpace.emplace(AS);
+    break;
+  }
   case 'm':
     if (!Rest.consume_front(":") || Rest.empty())
       return createSpecFormatError("m:<mangling>");
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 16e23879cd735c..7b45c56d5c0764 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -627,7 +627,7 @@ static StringRef computeDataLayout(const Triple &TT) {
          "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-"
          "v32:32-v48:64-v96:"
          "128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-"
-         "G1-ni:7:8:9";
+         "G1-ni:7:8:9-U0";
 }
 
 LLVM_READNONE
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 7d04cf3dc51e67..52f6efb1a5e92b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -117,7 +117,7 @@ static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
   else if (UseShortPointers)
     Ret += "-p3:32:32-p4:32:32-p5:32:32";
 
-  Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
+  Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64-U0";
 
   return Ret;
 }
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index e5384b2eb2c2c1..8f7f0ffb1f3597 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -55,13 +55,13 @@ static std::string computeDataLayout(const Triple &TT) {
   // mean anything.
   if (Arch == Triple::spirv32)
     return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
-           "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
+           "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-U4";
   if (TT.getVendor() == Triple::VendorType::AMD &&
       TT.getOS() == Triple::OSType::AMDHSA)
     return "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
-           "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0";
+           "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0-U4";
   return "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
-         "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
+         "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-U4";
 }
 
 static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
index 1cd4a47c75739b..d699b7a216fde3 100644
--- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
+++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
@@ -36,12 +36,12 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
   // Check that AMDGPU targets add -G1 if it's not present.
   EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "r600"), "e-p:32:32-G1");
   // and that ANDGCN adds p7 and p8 as well.
-  EXPECT_EQ(
-      UpgradeDataLayoutString("e-p:64:64", "amdgcn"),
-      "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32");
-  EXPECT_EQ(
-      UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"),
-      "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32");
+  EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64", "amdgcn"),
+            "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:"
+            "32-U0");
+  EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"),
+            "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:"
+            "32-U0");
   // but that r600 does not.
   EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G1", "r600"), "e-p:32:32-G1");
 
@@ -56,7 +56,7 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
       "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-"
       "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:"
       "1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-"
-      "p9:192:256:256:32");
+      "p9:192:256:256:32-U0");
 
   // Check that RISCV64 upgrades -n64 to -n32:64.
   EXPECT_EQ(UpgradeDataLayoutString("e-m:e-p:64:64-i64:64-i128:128-n64-S128",
@@ -106,23 +106,23 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) {
   // Check that AMDGPU targets don't add -G1 if there is already a -G flag.
   EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G2", "r600"), "e-p:32:32-G2");
   EXPECT_EQ(UpgradeDataLayoutString("G2", "r600"), "G2");
-  EXPECT_EQ(
-      UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"),
-      "e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32");
-  EXPECT_EQ(
-      UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"),
-      "G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32");
-  EXPECT_EQ(
-      UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"),
-      "e-p:64:64-G0-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32");
+  EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"),
+            "e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:"
+            "32-U0");
+  EXPECT_EQ(UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"),
+            "G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:"
+            "32-U0");
+  EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"),
+            "e-p:64:64-G0-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:"
+            "32-U0");
 
   // Check that AMDGCN targets don't add already declared address space 7.
   EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-p7:64:64", "amdgcn"),
-            "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32");
+            "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32-U0");
   EXPECT_EQ(UpgradeDataLayoutString("p7:64:64-G2-e-p:64:64", "amdgcn"),
-            "p7:64:64-G2-e-p:64:64-ni:7:8:9-p8:128:128-p9:192:256:256:32");
+            "p7:64:64-G2-e-p:64:64-ni:7:8:9-p8:128:128-p9:192:256:256:32-U0");
   EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-p7:64:64-G1", "amdgcn"),
-            "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32");
+            "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32-U0");
 
   // Check that SPIR & SPIRV targets don't add -G1 if there is already a -G
   // flag.
@@ -146,7 +146,7 @@ TEST(DataLayoutUpgradeTest, EmptyDataLayout) {
   // Check that AMDGPU targets add G1 if it's not present.
   EXPECT_EQ(UpgradeDataLayoutString("", "r600"), "G1");
   EXPECT_EQ(UpgradeDataLayoutString("", "amdgcn"),
-            "G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32");
+            "G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-U0");
 
   // Check that SPIR & SPIRV targets add G1 if it's not present.
   EXPECT_EQ(UpgradeDataLayoutString("", "spir"), "G1");
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index e253037e0edcea..4183c4731815a4 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -81,7 +81,7 @@ static constexpr StringLiteral amdgcnDataLayout =
     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
     "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
     "32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:"
-    "64-S32-A5-G1-ni:7:8:9";
+    "64-S32-A5-G1-ni:7:8:9-U0";
 
 namespace {
 struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> {