[llvm] [NVPTX] Fix `ptxas` failures (NFC) (PR #125147)
Justin Fargnoli via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 31 18:40:26 PST 2025
https://github.com/justinfargnoli updated https://github.com/llvm/llvm-project/pull/125147
>From 85571ba7b46f5dbd3bebcd3f99b96c11ebe461dc Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 30 Jan 2025 15:14:18 -0800
Subject: [PATCH 1/5] griddepcontrol requires sm_90
---
llvm/test/CodeGen/NVPTX/griddepcontrol.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/NVPTX/griddepcontrol.ll b/llvm/test/CodeGen/NVPTX/griddepcontrol.ll
index fe15b3fe4afbd93..0bf9196aa2902fe 100644
--- a/llvm/test/CodeGen/NVPTX/griddepcontrol.ll
+++ b/llvm/test/CodeGen/NVPTX/griddepcontrol.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mcpu=sm_90 -march=nvptx64 | FileCheck %s
-; RUN: %if ptxas-11.8 %{ llc < %s -mcpu=sm_90 -march=nvptx64 | %ptxas-verify %}
+; RUN: %if ptxas-11.8 %{ llc < %s -mcpu=sm_90 -march=nvptx64 | %ptxas-verify -arch=sm_90 %}
define void @griddepcontrol() {
; CHECK-LABEL: griddepcontrol(
>From 492bb835a4d96f8b77f4d080ad9f9adcaf8a0f15 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 30 Jan 2025 15:17:24 -0800
Subject: [PATCH 2/5] constant pointers not supported in Separate compilation
---
llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index 2889d2d89a85797..f324998d9b33dcc 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas -o %t %}
%struct.Large = type { [16 x double] }
>From 2ef92969885fd5be86f69f3bcef0ff0abd562569 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 30 Jan 2025 15:43:19 -0800
Subject: [PATCH 3/5] constant pointers not supported in Separate compilation -
lower-args.ll
---
llvm/test/CodeGen/NVPTX/lower-args.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index 269bba75dc5fb33..0deff5afd931043 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -2,7 +2,7 @@
; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes COMMON,IR,IRO
; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC
; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 | %ptxas -o %t %}
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
>From 77f3afafb8d9b6085ee79b2fc81b30111256efd4 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 30 Jan 2025 16:27:03 -0800
Subject: [PATCH 4/5] constant pointers not supported in Separate compilation
---
llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 2 +-
llvm/test/CodeGen/NVPTX/lower-args.ll | 10 ++++++----
2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index f324998d9b33dcc..a56b85de8014303 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas -o %t %}
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas -arch=sm_60 - %}
%struct.Large = type { [16 x double] }
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index 0deff5afd931043..925497748348c5f 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -1,8 +1,8 @@
; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,IR,IRC
; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes COMMON,IR,IRO
-; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC
-; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 | %ptxas -o %t %}
+; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC
+; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_52 | %ptxas -arch=sm_52 - %}
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
@@ -145,4 +145,6 @@ define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%st
; Function Attrs: convergent nounwind
-declare dso_local ptr @escape(ptr) local_unnamed_addr
+define dso_local ptr @escape(ptr) local_unnamed_addr {
+ ret ptr %0
+}
>From ce4cf633a565d63e3a2a3c79b7a7077fb0ee1f72 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Fri, 31 Jan 2025 18:39:38 -0800
Subject: [PATCH 5/5] Use non-const addrspace
---
llvm/test/CodeGen/NVPTX/lower-args.ll | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index 925497748348c5f..2c8496817e9d29b 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -2,7 +2,7 @@
; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes COMMON,IR,IRO
; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC
; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_52 | %ptxas -arch=sm_52 - %}
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_52 | %ptxas-verify %}
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
@@ -87,12 +87,12 @@ define ptx_kernel void @ptr_generic(ptr %out, ptr %in) {
}
; COMMON-LABEL: ptr_nongeneric
-define ptx_kernel void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(4) %in) {
+define ptx_kernel void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(3) %in) {
; IR-NOT: addrspacecast
; PTX-NOT: cvta.to.global
-; PTX: ld.const.u32
+; PTX: ld.shared.u32
; PTX st.global.u32
- %v = load i32, ptr addrspace(4) %in, align 4
+ %v = load i32, ptr addrspace(3) %in, align 4
store i32 %v, ptr addrspace(1) %out, align 4
ret void
}
@@ -145,6 +145,4 @@ define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%st
; Function Attrs: convergent nounwind
-define dso_local ptr @escape(ptr) local_unnamed_addr {
- ret ptr %0
-}
+declare dso_local ptr @escape(ptr) local_unnamed_addr
\ No newline at end of file
More information about the llvm-commits
mailing list