[llvm] [NVPTX] Fix `ptxas` failures (NFC) (PR #125147)

Justin Fargnoli via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 31 18:41:56 PST 2025


https://github.com/justinfargnoli updated https://github.com/llvm/llvm-project/pull/125147

>From 85571ba7b46f5dbd3bebcd3f99b96c11ebe461dc Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 30 Jan 2025 15:14:18 -0800
Subject: [PATCH 1/6] griddepcontrol requires sm_90

---
 llvm/test/CodeGen/NVPTX/griddepcontrol.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/NVPTX/griddepcontrol.ll b/llvm/test/CodeGen/NVPTX/griddepcontrol.ll
index fe15b3fe4afbd9..0bf9196aa2902f 100644
--- a/llvm/test/CodeGen/NVPTX/griddepcontrol.ll
+++ b/llvm/test/CodeGen/NVPTX/griddepcontrol.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mcpu=sm_90 -march=nvptx64 | FileCheck %s
-; RUN: %if ptxas-11.8 %{ llc < %s -mcpu=sm_90 -march=nvptx64 | %ptxas-verify %}
+; RUN: %if ptxas-11.8 %{ llc < %s -mcpu=sm_90 -march=nvptx64 | %ptxas-verify -arch=sm_90 %}
 
 define void @griddepcontrol() {
 ; CHECK-LABEL: griddepcontrol(

>From 492bb835a4d96f8b77f4d080ad9f9adcaf8a0f15 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 30 Jan 2025 15:17:24 -0800
Subject: [PATCH 2/6] constant pointers not supported in Separate compilation

---
 llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index 2889d2d89a8579..f324998d9b33dc 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas -o %t %}
 
 %struct.Large = type { [16 x double] }
 

>From 2ef92969885fd5be86f69f3bcef0ff0abd562569 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 30 Jan 2025 15:43:19 -0800
Subject: [PATCH 3/6] constant pointers not supported in Separate compilation -
 lower-args.ll

---
 llvm/test/CodeGen/NVPTX/lower-args.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index 269bba75dc5fb3..0deff5afd93104 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -2,7 +2,7 @@
 ; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes COMMON,IR,IRO
 ; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC
 ; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 | %ptxas -o %t %}
 
 target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-nvidia-cuda"

>From 77f3afafb8d9b6085ee79b2fc81b30111256efd4 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 30 Jan 2025 16:27:03 -0800
Subject: [PATCH 4/6] constant pointers not supported in Separate compilation

---
 llvm/test/CodeGen/NVPTX/kernel-param-align.ll |  2 +-
 llvm/test/CodeGen/NVPTX/lower-args.ll         | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index f324998d9b33dc..a56b85de801430 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas -o %t %}
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas -arch=sm_60 - %}
 
 %struct.Large = type { [16 x double] }
 
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index 0deff5afd93104..925497748348c5 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -1,8 +1,8 @@
 ; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,IR,IRC
 ; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes COMMON,IR,IRO
-; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC
-; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 | %ptxas -o %t %}
+; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC
+; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_52 | %ptxas -arch=sm_52 - %}
 
 target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-nvidia-cuda"
@@ -145,4 +145,6 @@ define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%st
 
 
 ; Function Attrs: convergent nounwind
-declare dso_local ptr @escape(ptr) local_unnamed_addr
+define dso_local ptr @escape(ptr) local_unnamed_addr {
+  ret ptr %0
+}

>From ce4cf633a565d63e3a2a3c79b7a7077fb0ee1f72 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Fri, 31 Jan 2025 18:39:38 -0800
Subject: [PATCH 5/6] Use non-const addrspace

---
 llvm/test/CodeGen/NVPTX/lower-args.ll | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index 925497748348c5..2c8496817e9d29 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -2,7 +2,7 @@
 ; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes COMMON,IR,IRO
 ; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes COMMON,PTX,PTXC
 ; RUN: llc < %s -mcpu=sm_52 --mtriple nvptx64-nvidia-nvcl| FileCheck %s --check-prefixes COMMON,PTX,PTXO
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_52 | %ptxas -arch=sm_52 - %}
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_52 | %ptxas-verify %}
 
 target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-nvidia-cuda"
@@ -87,12 +87,12 @@ define ptx_kernel void @ptr_generic(ptr %out, ptr %in) {
 }
 
 ; COMMON-LABEL: ptr_nongeneric
-define ptx_kernel void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(4) %in) {
+define ptx_kernel void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(3) %in) {
 ; IR-NOT: addrspacecast
 ; PTX-NOT: cvta.to.global
-; PTX:  ld.const.u32
+; PTX:  ld.shared.u32
 ; PTX   st.global.u32
-  %v = load i32, ptr addrspace(4) %in, align 4
+  %v = load i32, ptr addrspace(3) %in, align 4
   store i32 %v, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -145,6 +145,4 @@ define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%st
 
 
 ; Function Attrs: convergent nounwind
-define dso_local ptr @escape(ptr) local_unnamed_addr {
-  ret ptr %0
-}
+declare dso_local ptr @escape(ptr) local_unnamed_addr
\ No newline at end of file

>From f936cb6ccb97a56fc29a5ffa52e9251d6b7294f5 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Fri, 31 Jan 2025 18:40:46 -0800
Subject: [PATCH 6/6] Add EOF newline

---
 llvm/test/CodeGen/NVPTX/lower-args.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index 2c8496817e9d29..cb3799d5e9e67e 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -145,4 +145,4 @@ define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%st
 
 
 ; Function Attrs: convergent nounwind
-declare dso_local ptr @escape(ptr) local_unnamed_addr
\ No newline at end of file
+declare dso_local ptr @escape(ptr) local_unnamed_addr



More information about the llvm-commits mailing list