[llvm] [NVPTX] Add errors for incorrect CUDA addrpaces (PR #138706)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 6 08:19:36 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Lewis Crawford (LewisCrawford)
<details>
<summary>Changes</summary>
The CUDA API only accepts kernel params in the global and generic address spaces, so display an error message when attempting to emit pointers outside those address-spaces from CUDA (but still allow them for OpenCL).
---
Full diff: https://github.com/llvm/llvm-project/pull/138706.diff
5 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp (+8)
- (modified) llvm/test/CodeGen/NVPTX/kernel-param-align.ll (+3-2)
- (added) llvm/test/CodeGen/NVPTX/lower-args-cuda.ll (+13)
- (added) llvm/test/CodeGen/NVPTX/lower-args-nvcl.ll (+17)
- (modified) llvm/test/CodeGen/NVPTX/lower-args.ll (-23)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 2f4b109e8e9e9..9e17adb6ac1ae 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1399,6 +1399,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (PTy) {
O << "\t.param .u" << PTySizeInBits << " .ptr";
+ bool IsCUDA = static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
+ NVPTX::CUDA;
switch (PTy->getAddressSpace()) {
default:
break;
@@ -1406,12 +1408,18 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << " .global";
break;
case ADDRESS_SPACE_SHARED:
+ if (IsCUDA)
+ report_fatal_error(".shared ptr kernel args unsupported in CUDA.");
O << " .shared";
break;
case ADDRESS_SPACE_CONST:
+ if (IsCUDA)
+ report_fatal_error(".const ptr kernel args unsupported in CUDA.");
O << " .const";
break;
case ADDRESS_SPACE_LOCAL:
+ if (IsCUDA)
+ report_fatal_error(".local ptr kernel args unsupported in CUDA.");
O << " .local";
break;
}
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index a56b85de80143..e85ccf34bb6ac 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas -arch=sm_60 - %}
+; RUN: llc < %s -mcpu=sm_60 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_60 | %ptxas -arch=sm_60 - %}
+target triple = "nvptx64-nvidia-nvcl"
%struct.Large = type { [16 x double] }
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-cuda.ll b/llvm/test/CodeGen/NVPTX/lower-args-cuda.ll
new file mode 100644
index 0000000000000..7361ab28badb9
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/lower-args-cuda.ll
@@ -0,0 +1,13 @@
+; RUN: not --crash llc < %s -mcpu=sm_75 -o /dev/null 2>&1 | FileCheck %s
+
+target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; Make sure we exit with an error message for this input, as pointers to the
+; shared address-space are only supported as kernel args in NVCL, not CUDA.
+; CHECK: .shared ptr kernel args unsupported in CUDA.
+define ptx_kernel void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(3) %in) {
+ %v = load i32, ptr addrspace(3) %in, align 4
+ store i32 %v, ptr addrspace(1) %out, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-nvcl.ll b/llvm/test/CodeGen/NVPTX/lower-args-nvcl.ll
new file mode 100644
index 0000000000000..44b44e0c17626
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/lower-args-nvcl.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -S -nvptx-lower-args | FileCheck %s --check-prefixes COMMON,IR
+; RUN: llc < %s -mcpu=sm_20 | FileCheck %s --check-prefixes COMMON,PTX
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 | %ptxas-verify %}
+
+target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-nvcl"
+
+; COMMON-LABEL: ptr_nongeneric
+define ptx_kernel void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(3) %in) {
+; IR-NOT: addrspacecast
+; PTX-NOT: cvta.to.global
+; PTX: ld.shared.u32
+; PTX st.global.u32
+ %v = load i32, ptr addrspace(3) %in, align 4
+ store i32 %v, ptr addrspace(1) %out, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index 8e879871e295b..44445a17d1eb3 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -140,29 +140,6 @@ define ptx_kernel void @ptr_generic(ptr %out, ptr %in) {
ret void
}
-define ptx_kernel void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(3) %in) {
-; IR-LABEL: define ptx_kernel void @ptr_nongeneric(
-; IR-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(3) [[IN:%.*]]) {
-; IR-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) [[IN]], align 4
-; IR-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT]], align 4
-; IR-NEXT: ret void
-;
-; PTX-LABEL: ptr_nongeneric(
-; PTX: {
-; PTX-NEXT: .reg .b32 %r<2>;
-; PTX-NEXT: .reg .b64 %rd<3>;
-; PTX-EMPTY:
-; PTX-NEXT: // %bb.0:
-; PTX-NEXT: ld.param.u64 %rd1, [ptr_nongeneric_param_0];
-; PTX-NEXT: ld.param.u64 %rd2, [ptr_nongeneric_param_1];
-; PTX-NEXT: ld.shared.u32 %r1, [%rd2];
-; PTX-NEXT: st.global.u32 [%rd1], %r1;
-; PTX-NEXT: ret;
- %v = load i32, ptr addrspace(3) %in, align 4
- store i32 %v, ptr addrspace(1) %out, align 4
- ret void
-}
-
define ptx_kernel void @ptr_as_int(i64 noundef %i, i32 noundef %v) {
; IRC-LABEL: define ptx_kernel void @ptr_as_int(
; IRC-SAME: i64 noundef [[I:%.*]], i32 noundef [[V:%.*]]) {
``````````
</details>
https://github.com/llvm/llvm-project/pull/138706
More information about the llvm-commits
mailing list