[clang] [lld] [llvm] [mlir] [IR] Introduce `T<address space>` to `DataLayout` to represent flat address space if a target supports it (PR #108786)
Shilei Tian via cfe-commits
cfe-commits at lists.llvm.org
Wed Sep 18 09:41:40 PDT 2024
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108786
>From 4d86d8dc50b526ddca3de21709fe78ddafa7280e Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 17 Sep 2024 21:47:45 -0400
Subject: [PATCH] [IR] Introduce `T<address space>` to `DataLayout` to
represent flat address space if a target supports it
---
clang/lib/Basic/Targets/AMDGPU.cpp | 2 +-
clang/lib/Basic/Targets/NVPTX.cpp | 8 ++--
clang/test/CodeGen/target-data.c | 8 ++--
clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl | 2 +-
lld/test/ELF/lto/amdgcn-oses.ll | 6 +--
lld/test/ELF/lto/amdgcn.ll | 2 +-
llvm/docs/LangRef.rst | 19 +++++++++
llvm/docs/ReleaseNotes.rst | 3 ++
llvm/include/llvm/IR/DataLayout.h | 5 +++
llvm/lib/IR/AutoUpgrade.cpp | 10 +++++
llvm/lib/IR/DataLayout.cpp | 9 +++++
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 +-
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 2 +-
llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 6 +--
.../Bitcode/DataLayoutUpgradeTest.cpp | 40 +++++++++----------
.../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 2 +-
16 files changed, 86 insertions(+), 40 deletions(-)
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 3b748d0249d57b..0ee56848a6cb98 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -36,7 +36,7 @@ static const char *const DataLayoutStringAMDGCN =
"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
"32-v48:64-v96:128"
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
- "-ni:7:8:9";
+ "-ni:7:8:9-T0";
const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
llvm::AMDGPUAS::FLAT_ADDRESS, // Default
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 43b653dc52ce0d..59344c3c71aee2 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -66,12 +66,12 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
HasFloat16 = true;
if (TargetPointerWidth == 32)
- resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
+ resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64-T0");
else if (Opts.NVPTXUseShortPointers)
- resetDataLayout(
- "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
+ resetDataLayout("e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:"
+ "32-n16:32:64-T0");
else
- resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64");
+ resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64-T0");
// If possible, get a TargetInfo for our host triple, so we can match its
// types.
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index 41cbd5a0219d5e..a3d1a8cb9ee234 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -160,11 +160,11 @@
// RUN: %clang_cc1 -triple nvptx-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=NVPTX
-// NVPTX: target datalayout = "e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+// NVPTX: target datalayout = "e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64-T0"
// RUN: %clang_cc1 -triple nvptx64-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=NVPTX64
-// NVPTX64: target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+// NVPTX64: target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64-T0"
// RUN: %clang_cc1 -triple r600-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=R600
@@ -176,12 +176,12 @@
// RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \
// RUN: | FileCheck %s -check-prefix=R600SI
-// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-T0"
// Test default -target-cpu
// RUN: %clang_cc1 -triple amdgcn-unknown -o - -emit-llvm %s \
// RUN: | FileCheck %s -check-prefix=R600SIDefault
-// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-T0"
// RUN: %clang_cc1 -triple arm64-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=AARCH64
diff --git a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
index bb52f87615214b..b4b246cc082e00 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
-// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-T0"
void foo(void) {}
diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll
index 7a74d0317f2b9e..8903b45565b41f 100644
--- a/lld/test/ELF/lto/amdgcn-oses.ll
+++ b/lld/test/ELF/lto/amdgcn-oses.ll
@@ -25,7 +25,7 @@
;--- amdhsa.ll
target triple = "amdgcn-amd-amdhsa"
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-T0"
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
@@ -36,7 +36,7 @@ define void @_start() {
;--- amdpal.ll
target triple = "amdgcn-amd-amdpal"
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-T0"
define amdgpu_cs void @_start() {
ret void
@@ -44,7 +44,7 @@ define amdgpu_cs void @_start() {
;--- mesa3d.ll
target triple = "amdgcn-amd-mesa3d"
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-T0"
define void @_start() {
ret void
diff --git a/lld/test/ELF/lto/amdgcn.ll b/lld/test/ELF/lto/amdgcn.ll
index 4281e209fd9789..bcfbc272afdf9a 100644
--- a/lld/test/ELF/lto/amdgcn.ll
+++ b/lld/test/ELF/lto/amdgcn.ll
@@ -5,7 +5,7 @@
; Make sure the amdgcn triple is handled
target triple = "amdgcn-amd-amdhsa"
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-T0"
define void @_start() {
ret void
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index abeafb7616201a..912e7c31be9176 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3050,6 +3050,25 @@ as follows:
address space 0, this property only affects the default value to be used
when creating globals without additional contextual information (e.g. in
LLVM passes).
+``T<address space>``
+ Specifies the 'flat' address space for a target. Some targets may have an
+ address space that allows access to multiple memory segments, each with
+ distinct address spaces. Accessing memory through a pointer in this address
+ space is valid but may be slower than using a pointer with a more specific
+ address space. When a pointer is converted to this address space,
+ optimizations can attempt to replace the access with one using the original
+ address space. This is the 'flat' address space. It is intended for targets
+ with varying pointer representations, which can be converted using the
+ addrspacecast instruction. It is important to note that the flat address
+ space is not always equivalent to address space 0, which LLVM commonly
+ refers to as the generic address space. For instance, in SPIR and SPIR-V,
+ address space 4 is designated as the flat address space. Additionally, the
+ flat address space must correspond to an existing address space of the
+ target. Specifying an address space as 'flat' address space does not change
+ any inherent characteristics of the original address spaces, nor does it
+ guarantee any additional properties. If this specification is absent, it
+ indicates that the target does not support a flat address space for
+ optimization.
.. _alloca_addrspace:
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 6df4c37b092432..4286902607668c 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -56,6 +56,9 @@ Changes to the LLVM IR
* Added ``usub_cond`` and ``usub_sat`` operations to ``atomicrmw``.
+* Added ``T<address space>`` to data layout to represent flat address space if a
+ target has it.
+
Changes to LLVM infrastructure
------------------------------
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 8f7ab2f9df389e..50279471e71017 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -88,12 +88,16 @@ class DataLayout {
/// The function pointer alignment is a multiple of the function alignment.
MultipleOfFunctionAlign,
};
+
+ enum { AS_INVALID = ~0U };
+
private:
bool BigEndian = false;
unsigned AllocaAddrSpace = 0;
unsigned ProgramAddrSpace = 0;
unsigned DefaultGlobalsAddrSpace = 0;
+ unsigned FlatAddressSpace = AS_INVALID;
MaybeAlign StackNaturalAlign;
MaybeAlign FunctionPtrAlign;
@@ -245,6 +249,7 @@ class DataLayout {
unsigned getDefaultGlobalsAddressSpace() const {
return DefaultGlobalsAddrSpace;
}
+ unsigned getFlatAddressSpace() const { return FlatAddressSpace; }
bool hasMicrosoftFastStdCallMangling() const {
return ManglingMode == MM_WinCOFFX86;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 69dae5e32dbbe8..a08700d4585985 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5451,6 +5451,10 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
if (!DL.contains("-p9") && !DL.starts_with("p9"))
Res.append("-p9:192:256:256:32");
+ // Add flat address space.
+ if (!DL.contains("-T0") && !DL.starts_with("T0"))
+ Res.append("-T0");
+
return Res;
}
@@ -5501,6 +5505,12 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
}
+ if (T.isNVPTX()) {
+ // Add flat address space.
+ if (!DL.contains("-T0") && !DL.starts_with("T0"))
+ Res.append("-T0");
+ }
+
return Res;
}
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index d295d1f5785eb9..51396241515647 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -228,6 +228,7 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) {
AllocaAddrSpace = Other.AllocaAddrSpace;
ProgramAddrSpace = Other.ProgramAddrSpace;
DefaultGlobalsAddrSpace = Other.DefaultGlobalsAddrSpace;
+ FlatAddressSpace = Other.FlatAddressSpace;
StackNaturalAlign = Other.StackNaturalAlign;
FunctionPtrAlign = Other.FunctionPtrAlign;
TheFunctionPtrAlignType = Other.TheFunctionPtrAlignType;
@@ -250,6 +251,7 @@ bool DataLayout::operator==(const DataLayout &Other) const {
AllocaAddrSpace == Other.AllocaAddrSpace &&
ProgramAddrSpace == Other.ProgramAddrSpace &&
DefaultGlobalsAddrSpace == Other.DefaultGlobalsAddrSpace &&
+ FlatAddressSpace == Other.FlatAddressSpace &&
StackNaturalAlign == Other.StackNaturalAlign &&
FunctionPtrAlign == Other.FunctionPtrAlign &&
TheFunctionPtrAlignType == Other.TheFunctionPtrAlignType &&
@@ -568,6 +570,13 @@ Error DataLayout::parseSpecification(StringRef Spec) {
return Err;
break;
}
+ case 'T': { // Flat address space.
+ if (Rest.empty())
+ return createSpecFormatError("T<address space>");
+ if (Error Err = parseAddrSpace(Rest, FlatAddressSpace))
+ return Err;
+ break;
+ }
case 'm':
if (!Rest.consume_front(":") || Rest.empty())
return createSpecFormatError("m:<mangling>");
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index f860b139945122..2e26cb969206f5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -579,7 +579,7 @@ static StringRef computeDataLayout(const Triple &TT) {
"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-"
"v32:32-v48:64-v96:"
"128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-"
- "G1-ni:7:8:9";
+ "G1-ni:7:8:9-T0";
}
LLVM_READNONE
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 57b7fa783c14a7..eb7f29655e6441 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -117,7 +117,7 @@ static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
else if (UseShortPointers)
Ret += "-p3:32:32-p4:32:32-p5:32:32";
- Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
+ Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64-T0";
return Ret;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index 7058b15d53aa0b..88808b3eb8b096 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -55,13 +55,13 @@ static std::string computeDataLayout(const Triple &TT) {
// mean anything.
if (Arch == Triple::spirv32)
return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
+ "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-T4";
if (TT.getVendor() == Triple::VendorType::AMD &&
TT.getOS() == Triple::OSType::AMDHSA)
return "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0";
+ "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0-T4";
return "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
+ "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-T4";
}
static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
index ca50187e5e5ee0..85029297a72df2 100644
--- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
+++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
@@ -36,12 +36,12 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
// Check that AMDGPU targets add -G1 if it's not present.
EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "r600"), "e-p:32:32-G1");
// and that ANDGCN adds p7 and p8 as well.
- EXPECT_EQ(
- UpgradeDataLayoutString("e-p:64:64", "amdgcn"),
- "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32");
- EXPECT_EQ(
- UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"),
- "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32");
+ EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64", "amdgcn"),
+ "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:"
+ "32-T0");
+ EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"),
+ "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:"
+ "32-T0");
// but that r600 does not.
EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G1", "r600"), "e-p:32:32-G1");
@@ -56,7 +56,7 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-"
"v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:"
"1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-"
- "p9:192:256:256:32");
+ "p9:192:256:256:32-T0");
// Check that RISCV64 upgrades -n64 to -n32:64.
EXPECT_EQ(UpgradeDataLayoutString("e-m:e-p:64:64-i64:64-i128:128-n64-S128",
@@ -99,23 +99,23 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) {
// Check that AMDGPU targets don't add -G1 if there is already a -G flag.
EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G2", "r600"), "e-p:32:32-G2");
EXPECT_EQ(UpgradeDataLayoutString("G2", "r600"), "G2");
- EXPECT_EQ(
- UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"),
- "e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32");
- EXPECT_EQ(
- UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"),
- "G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32");
- EXPECT_EQ(
- UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"),
- "e-p:64:64-G0-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32");
+ EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"),
+ "e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:"
+ "32-T0");
+ EXPECT_EQ(UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"),
+ "G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:"
+ "32-T0");
+ EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"),
+ "e-p:64:64-G0-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:"
+ "32-T0");
// Check that AMDGCN targets don't add already declared address space 7.
EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-p7:64:64", "amdgcn"),
- "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32");
+ "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32-T0");
EXPECT_EQ(UpgradeDataLayoutString("p7:64:64-G2-e-p:64:64", "amdgcn"),
- "p7:64:64-G2-e-p:64:64-ni:7:8:9-p8:128:128-p9:192:256:256:32");
+ "p7:64:64-G2-e-p:64:64-ni:7:8:9-p8:128:128-p9:192:256:256:32-T0");
EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-p7:64:64-G1", "amdgcn"),
- "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32");
+ "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32-T0");
// Check that SPIR & SPIRV targets don't add -G1 if there is already a -G
// flag.
@@ -139,7 +139,7 @@ TEST(DataLayoutUpgradeTest, EmptyDataLayout) {
// Check that AMDGPU targets add G1 if it's not present.
EXPECT_EQ(UpgradeDataLayoutString("", "r600"), "G1");
EXPECT_EQ(UpgradeDataLayoutString("", "amdgcn"),
- "G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32");
+ "G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-T0");
// Check that SPIR & SPIRV targets add G1 if it's not present.
EXPECT_EQ(UpgradeDataLayoutString("", "spir"), "G1");
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index fc3e1fc4f9d0c9..e6e3d180e1d29f 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -81,7 +81,7 @@ static constexpr StringLiteral amdgcnDataLayout =
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
"32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:"
- "64-S32-A5-G1-ni:7:8:9";
+ "64-S32-A5-G1-ni:7:8:9-T0";
namespace {
struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
More information about the cfe-commits
mailing list