[llvm] Enable .ptr .global .align attributes for kernel attributes for CUDA (PR #114874)
Lewis Crawford via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 8 08:13:58 PST 2024
https://github.com/LewisCrawford updated https://github.com/llvm/llvm-project/pull/114874
>From a0d23dbd374eca92ed7a7d9f8619d0f3af0df2e8 Mon Sep 17 00:00:00 2001
From: Vandana <vandanak at nvidia.com>
Date: Fri, 26 Jan 2024 13:03:27 -0800
Subject: [PATCH 01/18] Enable .ptr .global .align attributes for kernel
attributes for CUDA
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 4 +++
llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 34 +++++++++++++++++++
2 files changed, 38 insertions(+)
create mode 100644 llvm/test/CodeGen/NVPTX/kernel-param-align.ll
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 7f4e1035e7a70a..abda23eda10a77 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1622,6 +1622,10 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
Align ParamAlign = I->getParamAlign().valueOrOne();
O << ".align " << ParamAlign.value() << " ";
+ } else if (I->getParamAlign().valueOrOne() != 1) {
+ O << ".ptr .global ";
+ Align ParamAlign = I->getParamAlign().value();
+ O << ".align " << ParamAlign.value() << " ";
}
O << TLI->getParamName(F, paramIndex);
continue;
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
new file mode 100644
index 00000000000000..eda45928ea3059
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_72 2>&1 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_72 | %ptxas-verify %}
+
+%struct.Large = type { [16 x double] }
+
+; CHECK: .param .u64 .ptr .global .align 16 func_align_param_0,
+; CHECK: .param .u64 func_align_param_1,
+; CHECK: .param .u32 func_align_param_2
+define void @func_align(ptr nocapture readonly align 16 %input, ptr nocapture %out, i32 %n) {
+entry:
+ %0 = addrspacecast ptr %out to ptr addrspace(1)
+ %1 = addrspacecast ptr %input to ptr addrspace(1)
+ %getElem = getelementptr inbounds %struct.Large, ptr addrspace(1) %1, i64 0, i32 0, i64 5
+ %tmp2 = load i32, ptr addrspace(1) %getElem, align 8
+ store i32 %tmp2, ptr addrspace(1) %0, align 4
+ ret void
+}
+
+; CHECK: .param .u64 func_param_0,
+; CHECK: .param .u64 func_param_1,
+; CHECK: .param .u32 func_param_2
+define void @func(ptr nocapture readonly %input, ptr nocapture %out, i32 %n) {
+entry:
+ %0 = addrspacecast ptr %out to ptr addrspace(1)
+ %1 = addrspacecast ptr %input to ptr addrspace(1)
+ %getElem = getelementptr inbounds %struct.Large, ptr addrspace(1) %1, i64 0, i32 0, i64 5
+ %tmp2 = load i32, ptr addrspace(1) %getElem, align 8
+ store i32 %tmp2, ptr addrspace(1) %0, align 4
+ ret void
+}
+
+!nvvm.annotations = !{!0, !1}
+!0 = !{ptr @func_align, !"kernel", i32 1}
+!1 = !{ptr @func, !"kernel", i32 1}
\ No newline at end of file
>From 7dbc3b7f7f1749321ace16981eefeb307fd84f1e Mon Sep 17 00:00:00 2001
From: Vandana <vandanak at nvidia.com>
Date: Thu, 1 Feb 2024 19:44:36 -0800
Subject: [PATCH 02/18] Rearrange code, add comment
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index abda23eda10a77..f0315249875311 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1601,11 +1601,16 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (isKernelFunc) {
if (PTy) {
// Special handling for pointer arguments to kernel
+ // CUDA kernels assume that pointers are in global address space
+ // See:
+ // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space
O << "\t.param .u" << PTySizeInBits << " ";
- if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
- NVPTX::CUDA) {
- int addrSpace = PTy->getAddressSpace();
+ int addrSpace = PTy->getAddressSpace();
+ if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
+ assert(addrSpace == 0 && "Invalid address space");
+ O << ".ptr .global ";
+ } else {
switch (addrSpace) {
default:
O << ".ptr ";
@@ -1620,13 +1625,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << ".ptr .global ";
break;
}
- Align ParamAlign = I->getParamAlign().valueOrOne();
- O << ".align " << ParamAlign.value() << " ";
- } else if (I->getParamAlign().valueOrOne() != 1) {
- O << ".ptr .global ";
- Align ParamAlign = I->getParamAlign().value();
- O << ".align " << ParamAlign.value() << " ";
}
+ Align ParamAlign = I->getParamAlign().valueOrOne();
+ O << ".align " << ParamAlign.value() << " ";
O << TLI->getParamName(F, paramIndex);
continue;
}
>From 1814093da97d74daba0ae439a3c42e0712ee442c Mon Sep 17 00:00:00 2001
From: Vandana <vandanak at nvidia.com>
Date: Thu, 1 Feb 2024 19:50:49 -0800
Subject: [PATCH 03/18] Fixed clang formatting
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index f0315249875311..3f3acc0d91463b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1607,7 +1607,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << "\t.param .u" << PTySizeInBits << " ";
int addrSpace = PTy->getAddressSpace();
- if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
+ if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
+ NVPTX::CUDA) {
assert(addrSpace == 0 && "Invalid address space");
O << ".ptr .global ";
} else {
>From 608202f523c5617e8dae7238cd2e28e717589261 Mon Sep 17 00:00:00 2001
From: Vandana <vandanak at nvidia.com>
Date: Mon, 11 Mar 2024 04:28:12 -0700
Subject: [PATCH 04/18] Update .global and .align
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 12 ++++++++++--
llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 6 +++---
2 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 3f3acc0d91463b..5630f2a858853b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1609,8 +1609,16 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
int addrSpace = PTy->getAddressSpace();
if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
NVPTX::CUDA) {
+ // Special handling for pointer arguments to kernel
+ // CUDA kernels assume that pointers are in global address space
+ // See:
+ // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space
assert(addrSpace == 0 && "Invalid address space");
O << ".ptr .global ";
+ if (I->getParamAlign().valueOrOne() != 1) {
+ Align ParamAlign = I->getParamAlign().value();
+ O << ".align " << ParamAlign.value() << " ";
+ }
} else {
switch (addrSpace) {
default:
@@ -1626,9 +1634,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << ".ptr .global ";
break;
}
+ Align ParamAlign = I->getParamAlign().valueOrOne();
+ O << ".align " << ParamAlign.value() << " ";
}
- Align ParamAlign = I->getParamAlign().valueOrOne();
- O << ".align " << ParamAlign.value() << " ";
O << TLI->getParamName(F, paramIndex);
continue;
}
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index eda45928ea3059..81446a78bfc73b 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -4,7 +4,7 @@
%struct.Large = type { [16 x double] }
; CHECK: .param .u64 .ptr .global .align 16 func_align_param_0,
-; CHECK: .param .u64 func_align_param_1,
+; CHECK: .param .u64 .ptr .global func_align_param_1,
; CHECK: .param .u32 func_align_param_2
define void @func_align(ptr nocapture readonly align 16 %input, ptr nocapture %out, i32 %n) {
entry:
@@ -16,8 +16,8 @@ entry:
ret void
}
-; CHECK: .param .u64 func_param_0,
-; CHECK: .param .u64 func_param_1,
+; CHECK: .param .ptr .global .u64 func_param_0,
+; CHECK: .param .ptr .global .u64 func_param_1,
; CHECK: .param .u32 func_param_2
define void @func(ptr nocapture readonly %input, ptr nocapture %out, i32 %n) {
entry:
>From 58dc372e40ce95f3cfc78c9472ca6e1da0ec9640 Mon Sep 17 00:00:00 2001
From: Vandana2896 <129426835+Vandana2896 at users.noreply.github.com>
Date: Tue, 20 Feb 2024 14:50:55 -0800
Subject: [PATCH 05/18] Update NVPTXAsmPrinter.cpp
Clang formatting reverted.
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 5630f2a858853b..28312cb9171b28 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1607,8 +1607,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << "\t.param .u" << PTySizeInBits << " ";
int addrSpace = PTy->getAddressSpace();
- if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
- NVPTX::CUDA) {
+
+ if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
// Special handling for pointer arguments to kernel
// CUDA kernels assume that pointers are in global address space
// See:
>From 3f9b0fb3dcb75cf36fd694800f899f042d2c5ff1 Mon Sep 17 00:00:00 2001
From: Vandana2896 <129426835+Vandana2896 at users.noreply.github.com>
Date: Tue, 20 Feb 2024 15:07:09 -0800
Subject: [PATCH 06/18] Update NVPTXAsmPrinter.cpp
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 28312cb9171b28..8b44b58a644de1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1607,12 +1607,17 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << "\t.param .u" << PTySizeInBits << " ";
int addrSpace = PTy->getAddressSpace();
+<<<<<<< HEAD
if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
// Special handling for pointer arguments to kernel
// CUDA kernels assume that pointers are in global address space
// See:
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space
+=======
+ if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
+ NVPTX::CUDA) {
+>>>>>>> 3d49f303bf57 (Update NVPTXAsmPrinter.cpp)
assert(addrSpace == 0 && "Invalid address space");
O << ".ptr .global ";
if (I->getParamAlign().valueOrOne() != 1) {
>From 7eb7dbb64fa688e5bc8f7d8d2708395575b717c2 Mon Sep 17 00:00:00 2001
From: Vandana <vandanak at nvidia.com>
Date: Mon, 11 Mar 2024 04:32:04 -0700
Subject: [PATCH 07/18] Fix comment
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 4 ----
1 file changed, 4 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 8b44b58a644de1..e530f87e02255e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1600,10 +1600,6 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (isKernelFunc) {
if (PTy) {
- // Special handling for pointer arguments to kernel
- // CUDA kernels assume that pointers are in global address space
- // See:
- // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space
O << "\t.param .u" << PTySizeInBits << " ";
int addrSpace = PTy->getAddressSpace();
>From 955d72af8a34c01fe0920e1e60c4a34320aef2ff Mon Sep 17 00:00:00 2001
From: Vandana <vandanak at nvidia.com>
Date: Thu, 25 Apr 2024 07:30:51 -0700
Subject: [PATCH 08/18] add addrspace
---
llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index 81446a78bfc73b..bc6d7844dc79e9 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -5,8 +5,8 @@
; CHECK: .param .u64 .ptr .global .align 16 func_align_param_0,
; CHECK: .param .u64 .ptr .global func_align_param_1,
-; CHECK: .param .u32 func_align_param_2
-define void @func_align(ptr nocapture readonly align 16 %input, ptr nocapture %out, i32 %n) {
+; CHECK: .param .u32 .ptr .global func_align_param_2
+define void @func_align(ptr nocapture readonly align 16 %input, ptr addrspace(3) nocapture %out, i32 %n) {
entry:
%0 = addrspacecast ptr %out to ptr addrspace(1)
%1 = addrspacecast ptr %input to ptr addrspace(1)
>From 4973c303c4688436d2c12092693be27e42b299a8 Mon Sep 17 00:00:00 2001
From: Vandana <vandanak at nvidia.com>
Date: Thu, 25 Apr 2024 10:58:15 -0700
Subject: [PATCH 09/18] Fix typo
---
llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index bc6d7844dc79e9..cdf904792cdae1 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -5,7 +5,7 @@
; CHECK: .param .u64 .ptr .global .align 16 func_align_param_0,
; CHECK: .param .u64 .ptr .global func_align_param_1,
-; CHECK: .param .u32 .ptr .global func_align_param_2
+; CHECK: .param .u32 func_align_param_2
define void @func_align(ptr nocapture readonly align 16 %input, ptr addrspace(3) nocapture %out, i32 %n) {
entry:
%0 = addrspacecast ptr %out to ptr addrspace(1)
>From 22914f7616230626c39792ccc703a7adbdfb35e9 Mon Sep 17 00:00:00 2001
From: Vandana <vandanak at nvidia.com>
Date: Thu, 25 Apr 2024 11:08:14 -0700
Subject: [PATCH 10/18] CHECK-LABEL
---
llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index cdf904792cdae1..1bd4a0f9fc04fb 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -3,6 +3,7 @@
%struct.Large = type { [16 x double] }
+; CHECK-LABEL: func_align
; CHECK: .param .u64 .ptr .global .align 16 func_align_param_0,
; CHECK: .param .u64 .ptr .global func_align_param_1,
; CHECK: .param .u32 func_align_param_2
@@ -16,6 +17,7 @@ entry:
ret void
}
+; CHECK-LABEL: func
; CHECK: .param .ptr .global .u64 func_param_0,
; CHECK: .param .ptr .global .u64 func_param_1,
; CHECK: .param .u32 func_param_2
>From 21da083fe77b49bf5c3d798bf058a9971054eafb Mon Sep 17 00:00:00 2001
From: Vandana <vandanak at nvidia.com>
Date: Fri, 26 Apr 2024 18:40:08 -0700
Subject: [PATCH 11/18] Update testcase
---
llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index 1bd4a0f9fc04fb..b46972fc1044b3 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -6,8 +6,8 @@
; CHECK-LABEL: func_align
; CHECK: .param .u64 .ptr .global .align 16 func_align_param_0,
; CHECK: .param .u64 .ptr .global func_align_param_1,
-; CHECK: .param .u32 func_align_param_2
-define void @func_align(ptr nocapture readonly align 16 %input, ptr addrspace(3) nocapture %out, i32 %n) {
+; CHECK: .param .u64 .ptr .global func_align_param_2
+define void @func_align(ptr nocapture readonly align 16 %input, ptr nocapture %out, ptr addrspace(3) %n) {
entry:
%0 = addrspacecast ptr %out to ptr addrspace(1)
%1 = addrspacecast ptr %input to ptr addrspace(1)
@@ -18,8 +18,8 @@ entry:
}
; CHECK-LABEL: func
-; CHECK: .param .ptr .global .u64 func_param_0,
-; CHECK: .param .ptr .global .u64 func_param_1,
+; CHECK: .param .u64 .ptr .global func_param_0,
+; CHECK: .param .u64 .ptr .global func_param_1,
; CHECK: .param .u32 func_param_2
define void @func(ptr nocapture readonly %input, ptr nocapture %out, i32 %n) {
entry:
>From 118c10c08b9e1bb0582b894e769bfbbd7e1601ff Mon Sep 17 00:00:00 2001
From: Vandana <vandanak at nvidia.com>
Date: Mon, 20 May 2024 10:32:51 -0700
Subject: [PATCH 12/18] upadte test
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 12 +++---------
llvm/test/CodeGen/NVPTX/i1-param.ll | 2 +-
llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 2 +-
3 files changed, 5 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index e530f87e02255e..dbd13540dacab2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
+///===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -1603,18 +1603,12 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << "\t.param .u" << PTySizeInBits << " ";
int addrSpace = PTy->getAddressSpace();
-<<<<<<< HEAD
-
- if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
+ if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
+ NVPTX::CUDA) {
// Special handling for pointer arguments to kernel
// CUDA kernels assume that pointers are in global address space
// See:
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space
-=======
- if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
- NVPTX::CUDA) {
->>>>>>> 3d49f303bf57 (Update NVPTXAsmPrinter.cpp)
- assert(addrSpace == 0 && "Invalid address space");
O << ".ptr .global ";
if (I->getParamAlign().valueOrOne() != 1) {
Align ParamAlign = I->getParamAlign().value();
diff --git a/llvm/test/CodeGen/NVPTX/i1-param.ll b/llvm/test/CodeGen/NVPTX/i1-param.ll
index 375752b619a581..3673ee7c77a136 100644
--- a/llvm/test/CodeGen/NVPTX/i1-param.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-param.ll
@@ -8,7 +8,7 @@ target triple = "nvptx-nvidia-cuda"
; CHECK: .entry foo
; CHECK: .param .u8 foo_param_0
-; CHECK: .param .u64 foo_param_1
+; CHECK: .param .u64 .ptr .global foo_param_1
define void @foo(i1 %p, ptr %out) {
%val = zext i1 %p to i32
store i32 %val, ptr %out
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index b46972fc1044b3..3350b4fcda83e4 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -6,7 +6,7 @@
; CHECK-LABEL: func_align
; CHECK: .param .u64 .ptr .global .align 16 func_align_param_0,
; CHECK: .param .u64 .ptr .global func_align_param_1,
-; CHECK: .param .u64 .ptr .global func_align_param_2
+; CHECK: .param .u32 .ptr .global func_align_param_2
define void @func_align(ptr nocapture readonly align 16 %input, ptr nocapture %out, ptr addrspace(3) %n) {
entry:
%0 = addrspacecast ptr %out to ptr addrspace(1)
>From c8c51a0094c2b94a0694609e1ceb98519cf1b71d Mon Sep 17 00:00:00 2001
From: Lewis Crawford <Lewis Crawford>
Date: Mon, 4 Nov 2024 09:34:00 +0000
Subject: [PATCH 13/18] Fix tests
---
llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index 3350b4fcda83e4..1ef0caf4599a68 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -3,10 +3,10 @@
%struct.Large = type { [16 x double] }
-; CHECK-LABEL: func_align
+; CHECK-LABEL: .entry func_align(
; CHECK: .param .u64 .ptr .global .align 16 func_align_param_0,
; CHECK: .param .u64 .ptr .global func_align_param_1,
-; CHECK: .param .u32 .ptr .global func_align_param_2
+; CHECK: .param .u64 .ptr .global func_align_param_2
define void @func_align(ptr nocapture readonly align 16 %input, ptr nocapture %out, ptr addrspace(3) %n) {
entry:
%0 = addrspacecast ptr %out to ptr addrspace(1)
@@ -17,7 +17,7 @@ entry:
ret void
}
-; CHECK-LABEL: func
+; CHECK-LABEL: .entry func(
; CHECK: .param .u64 .ptr .global func_param_0,
; CHECK: .param .u64 .ptr .global func_param_1,
; CHECK: .param .u32 func_param_2
>From 504c119e7623f1e2c222031eae52d38880e453bd Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Mon, 4 Nov 2024 21:29:47 +0000
Subject: [PATCH 14/18] Handle ptr addrspaces in kernel args
Change the asmprinter so that it will not force .const or .shared
pointers to be changed to .global ones if they are explicitly
annotated with those address-spaces (even though they are not
expected to be present).
Unify the code-path for printing addrspace annotations for both
CUDA and CL, and only coerce generic pointers into .global pointers
on CUDA.
Emit alignment info for both CL and CUDA, but omit it on CUDA if
it is not explicitly supplied.
Update tests to have both aligned and unaligned pointers in all
relevant addrspaces.
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 47 +++++++++----------
llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 36 +++++++++-----
2 files changed, 47 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index dbd13540dacab2..99a379361d8262 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1603,35 +1603,34 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << "\t.param .u" << PTySizeInBits << " ";
int addrSpace = PTy->getAddressSpace();
- if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
- NVPTX::CUDA) {
+ const bool IsCUDA =
+ static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
+ NVPTX::CUDA;
+
+ O << ".ptr ";
+ switch (addrSpace) {
+ default:
// Special handling for pointer arguments to kernel
// CUDA kernels assume that pointers are in global address space
// See:
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space
- O << ".ptr .global ";
- if (I->getParamAlign().valueOrOne() != 1) {
- Align ParamAlign = I->getParamAlign().value();
- O << ".align " << ParamAlign.value() << " ";
- }
- } else {
- switch (addrSpace) {
- default:
- O << ".ptr ";
- break;
- case ADDRESS_SPACE_CONST:
- O << ".ptr .const ";
- break;
- case ADDRESS_SPACE_SHARED:
- O << ".ptr .shared ";
- break;
- case ADDRESS_SPACE_GLOBAL:
- O << ".ptr .global ";
- break;
- }
- Align ParamAlign = I->getParamAlign().valueOrOne();
- O << ".align " << ParamAlign.value() << " ";
+ if (IsCUDA)
+ O << " .global ";
+ break;
+ case ADDRESS_SPACE_CONST:
+ O << " .const ";
+ break;
+ case ADDRESS_SPACE_SHARED:
+ O << " .shared ";
+ break;
+ case ADDRESS_SPACE_GLOBAL:
+ O << " .global ";
+ break;
}
+
+ Align ParamAlign = I->getParamAlign().valueOrOne();
+ if (ParamAlign != 1 || !IsCUDA)
+ O << ".align " << ParamAlign.value() << " ";
O << TLI->getParamName(F, paramIndex);
continue;
}
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index 1ef0caf4599a68..29c20f05ad03fa 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -1,13 +1,19 @@
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_72 2>&1 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_72 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify %}
%struct.Large = type { [16 x double] }
; CHECK-LABEL: .entry func_align(
-; CHECK: .param .u64 .ptr .global .align 16 func_align_param_0,
-; CHECK: .param .u64 .ptr .global func_align_param_1,
-; CHECK: .param .u64 .ptr .global func_align_param_2
-define void @func_align(ptr nocapture readonly align 16 %input, ptr nocapture %out, ptr addrspace(3) %n) {
+; CHECK: .param .u64 .ptr .global .align 16 func_align_param_0
+; CHECK: .param .u64 .ptr .global .align 16 func_align_param_1
+; CHECK: .param .u64 .ptr .global .align 16 func_align_param_2
+; CHECK: .param .u64 .ptr .shared .align 16 func_align_param_3
+; CHECK: .param .u64 .ptr .const .align 16 func_align_param_4
+define void @func_align(ptr nocapture readonly align 16 %input,
+ ptr nocapture align 16 %out,
+ ptr addrspace(1) align 16 %global,
+ ptr addrspace(3) align 16 %shared,
+ ptr addrspace(4) align 16 %const) {
entry:
%0 = addrspacecast ptr %out to ptr addrspace(1)
%1 = addrspacecast ptr %input to ptr addrspace(1)
@@ -17,11 +23,17 @@ entry:
ret void
}
-; CHECK-LABEL: .entry func(
-; CHECK: .param .u64 .ptr .global func_param_0,
-; CHECK: .param .u64 .ptr .global func_param_1,
-; CHECK: .param .u32 func_param_2
-define void @func(ptr nocapture readonly %input, ptr nocapture %out, i32 %n) {
+; CHECK-LABEL: .entry func_noalign(
+; CHECK: .param .u64 .ptr .global func_noalign_param_0
+; CHECK: .param .u64 .ptr .global func_noalign_param_1
+; CHECK: .param .u64 .ptr .global func_noalign_param_2
+; CHECK: .param .u64 .ptr .shared func_noalign_param_3
+; CHECK: .param .u64 .ptr .const func_noalign_param_4
+define void @func_noalign(ptr nocapture readonly %input,
+ ptr nocapture %out,
+ ptr addrspace(1) %global,
+ ptr addrspace(3) %shared,
+ ptr addrspace(4) %const) {
entry:
%0 = addrspacecast ptr %out to ptr addrspace(1)
%1 = addrspacecast ptr %input to ptr addrspace(1)
@@ -33,4 +45,4 @@ entry:
!nvvm.annotations = !{!0, !1}
!0 = !{ptr @func_align, !"kernel", i32 1}
-!1 = !{ptr @func, !"kernel", i32 1}
\ No newline at end of file
+!1 = !{ptr @func_noalign, !"kernel", i32 1}
>From aaade688a43ebd768835d090968f466d74a1dba4 Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Mon, 4 Nov 2024 21:43:50 +0000
Subject: [PATCH 15/18] Fix clang-format issue in header comment
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 99a379361d8262..32eba0c287bf41 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1,4 +1,4 @@
-///===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
+//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
>From 78ea5e5be4b49cbb94fbc3a88d053de5db2f175f Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Wed, 6 Nov 2024 17:17:15 +0000
Subject: [PATCH 16/18] Improve kernel arg attrs accuracy
Allow explicitly defined kernel .ptr arg alignments of 1 to be
specified.
Avoid forcing .global annotations for generic pointers on CUDA.
Allow .local pointers, since the PTX specification says they are legal
here.
Avoid outputting unnecessary extra spaces in PTX between .ptr and the
memory space.
Improve test-cases by removing unnecessary function bodies, and adding
more varied alignments + address spaces.
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 40 +++++++++----------
llvm/test/CodeGen/NVPTX/i1-param.ll | 2 +-
llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 40 ++++++++-----------
3 files changed, 36 insertions(+), 46 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 32eba0c287bf41..9dacdf4cef0931 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1600,37 +1600,33 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (isKernelFunc) {
if (PTy) {
- O << "\t.param .u" << PTySizeInBits << " ";
+ O << "\t.param .u" << PTySizeInBits << " .ptr ";
- int addrSpace = PTy->getAddressSpace();
- const bool IsCUDA =
- static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
- NVPTX::CUDA;
-
- O << ".ptr ";
- switch (addrSpace) {
+ switch (PTy->getAddressSpace()) {
default:
- // Special handling for pointer arguments to kernel
- // CUDA kernels assume that pointers are in global address space
- // See:
- // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space
- if (IsCUDA)
- O << " .global ";
break;
- case ADDRESS_SPACE_CONST:
- O << " .const ";
+ case ADDRESS_SPACE_GLOBAL:
+ O << ".global ";
break;
case ADDRESS_SPACE_SHARED:
- O << " .shared ";
+ O << ".shared ";
break;
- case ADDRESS_SPACE_GLOBAL:
- O << " .global ";
+ case ADDRESS_SPACE_CONST:
+ O << ".const ";
+ break;
+ case ADDRESS_SPACE_LOCAL:
+ O << ".local ";
break;
}
- Align ParamAlign = I->getParamAlign().valueOrOne();
- if (ParamAlign != 1 || !IsCUDA)
- O << ".align " << ParamAlign.value() << " ";
+ const bool IsCUDA =
+ static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
+ NVPTX::CUDA;
+
+ MaybeAlign ParamAlign = I->getParamAlign();
+ if (ParamAlign.has_value() || !IsCUDA)
+ O << ".align " << ParamAlign.valueOrOne().value() << " ";
+
O << TLI->getParamName(F, paramIndex);
continue;
}
diff --git a/llvm/test/CodeGen/NVPTX/i1-param.ll b/llvm/test/CodeGen/NVPTX/i1-param.ll
index 3673ee7c77a136..b71cd9482900c0 100644
--- a/llvm/test/CodeGen/NVPTX/i1-param.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-param.ll
@@ -8,7 +8,7 @@ target triple = "nvptx-nvidia-cuda"
; CHECK: .entry foo
; CHECK: .param .u8 foo_param_0
-; CHECK: .param .u64 .ptr .global foo_param_1
+; CHECK: .param .u64 .ptr foo_param_1
define void @foo(i1 %p, ptr %out) {
%val = zext i1 %p to i32
store i32 %val, ptr %out
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index 29c20f05ad03fa..4e4799b8677ab3 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -4,42 +4,36 @@
%struct.Large = type { [16 x double] }
; CHECK-LABEL: .entry func_align(
-; CHECK: .param .u64 .ptr .global .align 16 func_align_param_0
-; CHECK: .param .u64 .ptr .global .align 16 func_align_param_1
-; CHECK: .param .u64 .ptr .global .align 16 func_align_param_2
-; CHECK: .param .u64 .ptr .shared .align 16 func_align_param_3
+; CHECK: .param .u64 .ptr .align 1 func_align_param_0
+; CHECK: .param .u64 .ptr .align 2 func_align_param_1
+; CHECK: .param .u64 .ptr .global .align 4 func_align_param_2
+; CHECK: .param .u64 .ptr .shared .align 8 func_align_param_3
; CHECK: .param .u64 .ptr .const .align 16 func_align_param_4
-define void @func_align(ptr nocapture readonly align 16 %input,
- ptr nocapture align 16 %out,
- ptr addrspace(1) align 16 %global,
- ptr addrspace(3) align 16 %shared,
- ptr addrspace(4) align 16 %const) {
+; CHECK: .param .u64 .ptr .local .align 32 func_align_param_5
+define void @func_align(ptr nocapture readonly align 1 %input,
+ ptr nocapture align 2 %out,
+ ptr addrspace(1) align 4 %global,
+ ptr addrspace(3) align 8 %shared,
+ ptr addrspace(4) align 16 %const,
+ ptr addrspace(5) align 32 %local) {
entry:
- %0 = addrspacecast ptr %out to ptr addrspace(1)
- %1 = addrspacecast ptr %input to ptr addrspace(1)
- %getElem = getelementptr inbounds %struct.Large, ptr addrspace(1) %1, i64 0, i32 0, i64 5
- %tmp2 = load i32, ptr addrspace(1) %getElem, align 8
- store i32 %tmp2, ptr addrspace(1) %0, align 4
ret void
}
; CHECK-LABEL: .entry func_noalign(
-; CHECK: .param .u64 .ptr .global func_noalign_param_0
-; CHECK: .param .u64 .ptr .global func_noalign_param_1
+; CHECK: .param .u64 .ptr func_noalign_param_0
+; CHECK: .param .u64 .ptr func_noalign_param_1
; CHECK: .param .u64 .ptr .global func_noalign_param_2
; CHECK: .param .u64 .ptr .shared func_noalign_param_3
-; CHECK: .param .u64 .ptr .const func_noalign_param_4
+; CHECK: .param .u64 .ptr .const func_noalign_param_4
+; CHECK: .param .u64 .ptr .local func_noalign_param_5
define void @func_noalign(ptr nocapture readonly %input,
ptr nocapture %out,
ptr addrspace(1) %global,
ptr addrspace(3) %shared,
- ptr addrspace(4) %const) {
+ ptr addrspace(4) %const,
+ ptr addrspace(5) %local) {
entry:
- %0 = addrspacecast ptr %out to ptr addrspace(1)
- %1 = addrspacecast ptr %input to ptr addrspace(1)
- %getElem = getelementptr inbounds %struct.Large, ptr addrspace(1) %1, i64 0, i32 0, i64 5
- %tmp2 = load i32, ptr addrspace(1) %getElem, align 8
- store i32 %tmp2, ptr addrspace(1) %0, align 4
ret void
}
>From 0ef9acbe3bd6db6fa2433d42e230cc121434b8d2 Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Wed, 6 Nov 2024 17:58:12 +0000
Subject: [PATCH 17/18] Call emitPTXAddressSpace instead of switch
Refactor to use emitPTXAddressSpace instead of duplicating
the code in a separate switch statement for kernel .ptr args.
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 20 +++++---------------
1 file changed, 5 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 9dacdf4cef0931..41841e78617f71 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1602,21 +1602,11 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (PTy) {
O << "\t.param .u" << PTySizeInBits << " .ptr ";
- switch (PTy->getAddressSpace()) {
- default:
- break;
- case ADDRESS_SPACE_GLOBAL:
- O << ".global ";
- break;
- case ADDRESS_SPACE_SHARED:
- O << ".shared ";
- break;
- case ADDRESS_SPACE_CONST:
- O << ".const ";
- break;
- case ADDRESS_SPACE_LOCAL:
- O << ".local ";
- break;
+ const unsigned AddrSpace = PTy->getAddressSpace();
+ if (AddrSpace != ADDRESS_SPACE_GENERIC) {
+ O << ".";
+ emitPTXAddressSpace(AddrSpace, O);
+ O << " ";
}
const bool IsCUDA =
>From f6f64a1620ec147d7a6e75cc207f2b0fdf047c09 Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Fri, 8 Nov 2024 16:11:26 +0000
Subject: [PATCH 18/18] Emit .align 1 by default if unspecified
Change to explicitly emit .align 1 for both CL and CUDA if alignment
is unspecified. In most cases, the frontend should already specify
alignment, so this should ideally not change much in practice.
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 35 ++++++++++---------
llvm/test/CodeGen/NVPTX/i1-param.ll | 2 +-
llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 12 +++----
3 files changed, 26 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 41841e78617f71..d1a2aa5d0f84ec 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1600,24 +1600,27 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (isKernelFunc) {
if (PTy) {
- O << "\t.param .u" << PTySizeInBits << " .ptr ";
-
- const unsigned AddrSpace = PTy->getAddressSpace();
- if (AddrSpace != ADDRESS_SPACE_GENERIC) {
- O << ".";
- emitPTXAddressSpace(AddrSpace, O);
- O << " ";
+ O << "\t.param .u" << PTySizeInBits << " .ptr";
+
+ switch (PTy->getAddressSpace()) {
+ default:
+ break;
+ case ADDRESS_SPACE_GLOBAL:
+ O << " .global";
+ break;
+ case ADDRESS_SPACE_SHARED:
+ O << " .shared";
+ break;
+ case ADDRESS_SPACE_CONST:
+ O << " .const";
+ break;
+ case ADDRESS_SPACE_LOCAL:
+ O << " .local";
+ break;
}
- const bool IsCUDA =
- static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
- NVPTX::CUDA;
-
- MaybeAlign ParamAlign = I->getParamAlign();
- if (ParamAlign.has_value() || !IsCUDA)
- O << ".align " << ParamAlign.valueOrOne().value() << " ";
-
- O << TLI->getParamName(F, paramIndex);
+ O << " .align " << I->getParamAlign().valueOrOne().value();
+ O << " " << TLI->getParamName(F, paramIndex);
continue;
}
diff --git a/llvm/test/CodeGen/NVPTX/i1-param.ll b/llvm/test/CodeGen/NVPTX/i1-param.ll
index b71cd9482900c0..0878eb4fcd47b3 100644
--- a/llvm/test/CodeGen/NVPTX/i1-param.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-param.ll
@@ -8,7 +8,7 @@ target triple = "nvptx-nvidia-cuda"
; CHECK: .entry foo
; CHECK: .param .u8 foo_param_0
-; CHECK: .param .u64 .ptr foo_param_1
+; CHECK: .param .u64 .ptr .align 1 foo_param_1
define void @foo(i1 %p, ptr %out) {
%val = zext i1 %p to i32
store i32 %val, ptr %out
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index 4e4799b8677ab3..f29b5823aa5fe6 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -21,12 +21,12 @@ entry:
}
; CHECK-LABEL: .entry func_noalign(
-; CHECK: .param .u64 .ptr func_noalign_param_0
-; CHECK: .param .u64 .ptr func_noalign_param_1
-; CHECK: .param .u64 .ptr .global func_noalign_param_2
-; CHECK: .param .u64 .ptr .shared func_noalign_param_3
-; CHECK: .param .u64 .ptr .const func_noalign_param_4
-; CHECK: .param .u64 .ptr .local func_noalign_param_5
+; CHECK: .param .u64 .ptr .align 1 func_noalign_param_0
+; CHECK: .param .u64 .ptr .align 1 func_noalign_param_1
+; CHECK: .param .u64 .ptr .global .align 1 func_noalign_param_2
+; CHECK: .param .u64 .ptr .shared .align 1 func_noalign_param_3
+; CHECK: .param .u64 .ptr .const .align 1 func_noalign_param_4
+; CHECK: .param .u64 .ptr .local .align 1 func_noalign_param_5
define void @func_noalign(ptr nocapture readonly %input,
ptr nocapture %out,
ptr addrspace(1) %global,
More information about the llvm-commits
mailing list