[llvm] [lit] Refactor `ptxas` usage (PR #154439)
Justin Fargnoli via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 20 14:47:30 PDT 2025
https://github.com/justinfargnoli updated https://github.com/llvm/llvm-project/pull/154439
>From b2e784e0ed0f7d5ca24596eb2345a8992bc1313d Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Mon, 18 Aug 2025 23:31:55 +0000
Subject: [PATCH 1/7] Initial commit
---
llvm/test/lit.cfg.py | 29 +++++++++++++++++++++++++----
1 file changed, 25 insertions(+), 4 deletions(-)
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 8c2d1a454e8f9..74a790bf370f3 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -306,9 +306,7 @@ def ptxas_version(ptxas):
return None
-# Enable %ptxas and %ptxas-verify tools.
-# %ptxas-verify defaults to sm_60 architecture. It can be overriden
-# by specifying required one, for instance: %ptxas-verify -arch=sm_80.
+# Enable %ptxas, %ptxas-verify, and %ptxas-legacy-verify tools.
def enable_ptxas(ptxas_executable):
version = ptxas_version(ptxas_executable)
if version:
@@ -339,6 +337,9 @@ def enable_ptxas(ptxas_executable):
(12, 5),
(12, 6),
(12, 8),
+ (12, 9),
+ (13, 0),
+ (13, 1),
]
def version_int(ver):
@@ -360,11 +361,31 @@ def version_int(ver):
major, minor = known_version
config.available_features.add("ptxas-{}.{}".format(major, minor))
+ # By default, compile the PTX for the latest supported architecture.
+ # You can override the default by specifying a different arch
+ # e.g. %ptxas-verify -arch=sm_75.
+ if version_int(version) >= version_int((13, 0)):
+ ptxas_default_arch = 100
+ elif version_int(version) >= version_int((12, 0)):
+ ptxas_default_arch = 90
+ elif version_int(version) >= version_int((11, 0)):
+ ptxas_default_arch = 80
+ else:
+ ptxas_default_arch = 70
+
+ if version_int(version) >= version_int((13, 0)):
+ ptxas_verify_tool = "%ptxas-verify"
+ else:
+ ptxas_verify_tool = "%ptxas-legacy-verify"
+
config.available_features.add("ptxas")
tools.extend(
[
ToolSubst("%ptxas", ptxas_executable),
- ToolSubst("%ptxas-verify", "{} -arch=sm_60 -c -".format(ptxas_executable)),
+ ToolSubst(
+ ptxas_verify_tool,
+ "{} -arch=sm_{} -c -".format(ptxas_executable, ptxas_default_arch),
+ ),
]
)
>From 2554a3e969dfb3d601bc97ae156ee479c991d589 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Mon, 18 Aug 2025 23:42:45 +0000
Subject: [PATCH 2/7] Removing CUDA 13.1
---
llvm/test/lit.cfg.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 74a790bf370f3..97690aef809ad 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -339,7 +339,6 @@ def enable_ptxas(ptxas_executable):
(12, 8),
(12, 9),
(13, 0),
- (13, 1),
]
def version_int(ver):
>From 3367dec999de47b7d9dd727865569cd9f4b8ac98 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 19 Aug 2025 21:03:14 +0000
Subject: [PATCH 3/7] Save work
---
llvm/test/lit.cfg.py | 24 +++++++++++++++---------
1 file changed, 15 insertions(+), 9 deletions(-)
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 97690aef809ad..c9d6ca443eca5 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -339,6 +339,7 @@ def enable_ptxas(ptxas_executable):
(12, 8),
(12, 9),
(13, 0),
+ (13, 1),
]
def version_int(ver):
@@ -358,26 +359,31 @@ def version_int(ver):
for known_version in ptxas_known_versions:
if version_int(known_version) <= version_int(version):
major, minor = known_version
- config.available_features.add("ptxas-{}.{}".format(major, minor))
+ if version_int(version) >= version_int((13, 0)):
+ config.available_features.add("ptxas-{}.{}".format(major, minor))
+ else:
+ config.available_features.add("ptxas-legacy-{}.{}".format(major, minor))
+ break
# By default, compile the PTX for the latest supported architecture.
# You can override the default by specifying a different arch
# e.g. %ptxas-verify -arch=sm_75.
- if version_int(version) >= version_int((13, 0)):
- ptxas_default_arch = 100
- elif version_int(version) >= version_int((12, 0)):
- ptxas_default_arch = 90
- elif version_int(version) >= version_int((11, 0)):
- ptxas_default_arch = 80
+ # if version_int(version) >= version_int((13, 0)):
+ # ptxas_default_arch = 100
+ # elif version_int(version) >= version_int((12, 0)):
+ # ptxas_default_arch = 90
+ if version_int(version) >= version_int((10, 0)):
+ ptxas_default_arch = 75
else:
- ptxas_default_arch = 70
+ ptxas_default_arch = 60
if version_int(version) >= version_int((13, 0)):
ptxas_verify_tool = "%ptxas-verify"
+ config.available_features.add("ptxas")
else:
ptxas_verify_tool = "%ptxas-legacy-verify"
+ config.available_features.add("ptxas-legacy")
- config.available_features.add("ptxas")
tools.extend(
[
ToolSubst("%ptxas", ptxas_executable),
>From 2948cdd4fc02cd66d82a60f79bcaacb1bc423348 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 19 Aug 2025 23:28:16 +0000
Subject: [PATCH 4/7] Refactor around extracting info from ptxas
---
llvm/test/lit.cfg.py | 132 +++++++++++++------------------------------
1 file changed, 38 insertions(+), 94 deletions(-)
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index c9d6ca443eca5..540e7b945e713 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -294,105 +294,49 @@ def get_asan_rtlib():
)
-# Find (major, minor) version of ptxas
def ptxas_version(ptxas):
- ptxas_cmd = subprocess.Popen([ptxas, "--version"], stdout=subprocess.PIPE)
- ptxas_out = ptxas_cmd.stdout.read().decode("ascii")
- ptxas_cmd.wait()
- match = re.search(r"release (\d+)\.(\d+)", ptxas_out)
- if match:
- return (int(match.group(1)), int(match.group(2)))
- print("couldn't determine ptxas version")
- return None
+ output = subprocess.check_output([ptxas, "--version"], text=True)
+ match = re.search(r"release (\d+)\.(\d+)", output)
+ if not match:
+ raise RuntimeError("Couldn't determine ptxas version")
+ return int(match.group(1)), int(match.group(2))
+
+
+def ptxas_supported_isa_versions(ptxas):
+ """Return list of (major, minor) tuples for supported PTX ISA versions."""
+ result = subprocess.run([ptxas, "--list-version"], capture_output=True, text=True, check=True)
+ versions = []
+ for line in result.stdout.splitlines():
+ line = line.strip()
+ if not line:
+ continue
+ match = re.match(r"(\d+)\.(\d+)", line)
+ if match:
+ versions.append((int(match.group(1)), int(match.group(2))))
+ return versions
+
+
+def ptxas_supported_sms(ptxas_executable):
+ """Extract supported SM architectures from ptxas help output."""
+ result = subprocess.run([ptxas_executable, "--help"], capture_output=True, text=True, check=True)
+ supported_sms = re.findall(r"'sm_(\d+(?:[af]?))'", result.stdout)
+ if not supported_sms:
+ raise RuntimeError("No SM architecture values found in ptxas help output")
+ return supported_sms
-# Enable %ptxas, %ptxas-verify, and %ptxas-legacy-verify tools.
def enable_ptxas(ptxas_executable):
- version = ptxas_version(ptxas_executable)
- if version:
- # ptxas is supposed to be backward compatible with previous
- # versions, so add a feature for every known version prior to
- # the current one.
- ptxas_known_versions = [
- (9, 0),
- (9, 1),
- (9, 2),
- (10, 0),
- (10, 1),
- (10, 2),
- (11, 0),
- (11, 1),
- (11, 2),
- (11, 3),
- (11, 4),
- (11, 5),
- (11, 6),
- (11, 7),
- (11, 8),
- (12, 0),
- (12, 1),
- (12, 2),
- (12, 3),
- (12, 4),
- (12, 5),
- (12, 6),
- (12, 8),
- (12, 9),
- (13, 0),
- (13, 1),
- ]
-
- def version_int(ver):
- return ver[0] * 100 + ver[1]
-
- # ignore ptxas if its version is below the minimum supported
- # version
- min_version = ptxas_known_versions[0]
- if version_int(version) < version_int(min_version):
- print(
- "Warning: ptxas version {}.{} is not supported".format(
- version[0], version[1]
- )
- )
- return
-
- for known_version in ptxas_known_versions:
- if version_int(known_version) <= version_int(version):
- major, minor = known_version
- if version_int(version) >= version_int((13, 0)):
- config.available_features.add("ptxas-{}.{}".format(major, minor))
- else:
- config.available_features.add("ptxas-legacy-{}.{}".format(major, minor))
- break
-
- # By default, compile the PTX for the latest supported architecture.
- # You can override the default by specifying a different arch
- # e.g. %ptxas-verify -arch=sm_75.
- # if version_int(version) >= version_int((13, 0)):
- # ptxas_default_arch = 100
- # elif version_int(version) >= version_int((12, 0)):
- # ptxas_default_arch = 90
- if version_int(version) >= version_int((10, 0)):
- ptxas_default_arch = 75
- else:
- ptxas_default_arch = 60
+ config.available_features.add("ptxas")
+ tools.append(ToolSubst("%ptxas", ptxas_executable))
- if version_int(version) >= version_int((13, 0)):
- ptxas_verify_tool = "%ptxas-verify"
- config.available_features.add("ptxas")
- else:
- ptxas_verify_tool = "%ptxas-legacy-verify"
- config.available_features.add("ptxas-legacy")
-
- tools.extend(
- [
- ToolSubst("%ptxas", ptxas_executable),
- ToolSubst(
- ptxas_verify_tool,
- "{} -arch=sm_{} -c -".format(ptxas_executable, ptxas_default_arch),
- ),
- ]
- )
+ major_version, minor_version = ptxas_version(ptxas_executable)
+ config.available_features.add("ptxas-{}.{}".format(major_version, minor_version))
+
+ for major_version, minor_version in ptxas_supported_isa_versions(ptxas_executable):
+ config.available_features.add("ptxas-isa-v{}-{}".format(major_version, minor_version))
+
+ for sm in ptxas_supported_sms(ptxas):
+ config.available_features.add("ptxas-sm-{}".format(sm))
ptxas_executable = (
>From 55fdd9a5a0c443afd1c64921e012adc68ea76934 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <justinfargnoli at gmail.com>
Date: Tue, 19 Aug 2025 16:32:05 -0700
Subject: [PATCH 5/7] Apply suggestion from @Copilot
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
llvm/test/lit.cfg.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 540e7b945e713..3b1b0a693b1fb 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -335,7 +335,7 @@ def enable_ptxas(ptxas_executable):
for major_version, minor_version in ptxas_supported_isa_versions(ptxas_executable):
config.available_features.add("ptxas-isa-v{}-{}".format(major_version, minor_version))
- for sm in ptxas_supported_sms(ptxas):
+ for sm in ptxas_supported_sms(ptxas_executable):
config.available_features.add("ptxas-sm-{}".format(sm))
>From ba1afdbebafb637715138610991708581c9ee6cd Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Wed, 20 Aug 2025 00:22:08 +0000
Subject: [PATCH 6/7] Update tests
---
llvm/test/CodeGen/NVPTX/access-non-generic.ll | 8 +++---
llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll | 2 +-
llvm/test/CodeGen/NVPTX/atomics-sm60.ll | 4 +--
llvm/test/CodeGen/NVPTX/atomics-sm70.ll | 6 ++---
llvm/test/CodeGen/NVPTX/atomics-with-scope.ll | 4 +--
llvm/test/CodeGen/NVPTX/b52037.ll | 2 +-
llvm/test/CodeGen/NVPTX/bmsk.ll | 2 +-
.../test/CodeGen/NVPTX/byval-arg-vectorize.ll | 2 +-
llvm/test/CodeGen/NVPTX/byval-const-global.ll | 2 +-
llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll | 2 +-
llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll | 2 +-
llvm/test/CodeGen/NVPTX/cmpxchg.ll | 2 +-
llvm/test/CodeGen/NVPTX/f16-abs.ll | 8 +++---
llvm/test/CodeGen/NVPTX/f16-instructions.ll | 8 +++---
llvm/test/CodeGen/NVPTX/f16x2-instructions.ll | 6 ++---
llvm/test/CodeGen/NVPTX/fence-nocluster.ll | 6 ++---
llvm/test/CodeGen/NVPTX/i16x2-instructions.ll | 4 +--
llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 2 +-
llvm/test/CodeGen/NVPTX/match.ll | 2 +-
.../CodeGen/NVPTX/math-intrins-sm53-ptx42.ll | 2 +-
llvm/test/CodeGen/NVPTX/nanosleep.ll | 2 +-
llvm/test/CodeGen/NVPTX/pr126337.ll | 2 +-
llvm/test/CodeGen/NVPTX/szext.ll | 2 +-
llvm/test/CodeGen/NVPTX/trunc-setcc.ll | 2 +-
llvm/test/CodeGen/NVPTX/trunc-tofp.ll | 2 +-
llvm/test/CodeGen/NVPTX/wmma-ptx60-sm70.py | 2 +-
llvm/test/lit.cfg.py | 25 +++++++++++++------
27 files changed, 62 insertions(+), 51 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/access-non-generic.ll b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
index 601a35288f54d..a723ab1cae20c 100644
--- a/llvm/test/CodeGen/NVPTX/access-non-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix PTX
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix PTX
+; RUN: llc < %s -mtriple=nvptx | FileCheck %s --check-prefix PTX
+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s --check-prefix PTX
; RUN: opt -mtriple=nvptx-- < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix IR
; RUN: opt -mtriple=nvptx64-- < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix IR
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
@array = internal addrspace(3) global [10 x float] zeroinitializer, align 4
@scalar = internal addrspace(3) global float 0.000000e+00, align 4
diff --git a/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll b/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll
index 94b3f0a2e1c3e..88fae7a3f78a0 100644
--- a/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll
+++ b/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefixes=ALL,SM30
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s --check-prefixes=ALL,SM60
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_30 | %ptxas-verify %}
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
; CHECK-LABEL: fadd_double
define void @fadd_double(ptr %0, double %1) {
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm60.ll b/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
index 2e11323d1b3e1..d90dcaf3400b0 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
; CHECK-LABEL: .func test(
define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, double %d) {
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
index f710d7f883a1b..82d652fb26588 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
@@ -2,9 +2,9 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=CHECK
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=CHECK64
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx62 | FileCheck %s --check-prefixes=CHECKPTX62
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx62 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx62 | %ptxas-verify -arch=sm_70 %}
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll b/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
index e6636d706b49d..94c416f5791b2 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s -check-prefixes=CHECK,CHECK32
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
; CHECK-LABEL: .func test_atomics_scope(
define void @test_atomics_scope(ptr %fp, float %f,
diff --git a/llvm/test/CodeGen/NVPTX/b52037.ll b/llvm/test/CodeGen/NVPTX/b52037.ll
index b6317dfb28597..268a8972ebd22 100644
--- a/llvm/test/CodeGen/NVPTX/b52037.ll
+++ b/llvm/test/CodeGen/NVPTX/b52037.ll
@@ -4,7 +4,7 @@
; https://bugs.llvm.org/show_bug.cgi?id=52037 for the gory details.
;
; RUN: llc -mtriple=nvptx64-nvidia-cuda -mcpu=sm_70 -O3 -o - %s | FileCheck %s
-; RUN: %if ptxas %{ llc -mtriple=nvptx64-nvidia-cuda -mcpu=sm_70 -O3 -o - %s | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc -mtriple=nvptx64-nvidia-cuda -mcpu=sm_70 -O3 -o - %s | %ptxas-verify -arch=sm_70 %}
; CHECK-LABEL: .visible .entry barney(
; CHECK-NOT: .local{{.*}}__local_depot
diff --git a/llvm/test/CodeGen/NVPTX/bmsk.ll b/llvm/test/CodeGen/NVPTX/bmsk.ll
index d5b278657bd52..530a78cb14102 100644
--- a/llvm/test/CodeGen/NVPTX/bmsk.ll
+++ b/llvm/test/CodeGen/NVPTX/bmsk.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -o - < %s -mcpu=sm_70 -mattr=+ptx76 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_70 -mattr=+ptx76 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -mcpu=sm_70 -mattr=+ptx76 | %ptxas-verify -arch=sm_70 %}
target triple = "nvptx64-unknown-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll b/llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll
index 9988d5b122cc1..e7f71f4ad52ea 100644
--- a/llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll
+++ b/llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mcpu=sm_70 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_70 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -mcpu=sm_70 | %ptxas-verify -arch=sm_70 %}
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/byval-const-global.ll b/llvm/test/CodeGen/NVPTX/byval-const-global.ll
index b4934e1a94d1b..81e7edfd8602e 100644
--- a/llvm/test/CodeGen/NVPTX/byval-const-global.ll
+++ b/llvm/test/CodeGen/NVPTX/byval-const-global.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mcpu=sm_70 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_70 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -mcpu=sm_70 | %ptxas-verify -arch=sm_70 %}
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
index 63c389c36e87e..3e784440dbdb6 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | FileCheck %s --check-prefix=SM60
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 %{ llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | %ptxas-verify -arch=sm_60 %}
define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-LABEL: monotonic_monotonic_i8_global_cta(
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
index 5cb344d5ded84..8b435c28c558f 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefix=SM70
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-LABEL: monotonic_monotonic_i8_global_cta(
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg.ll b/llvm/test/CodeGen/NVPTX/cmpxchg.ll
index 237e42394ba2f..f8ec071bd0a1f 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_32 | FileCheck %s --check-prefixes=SM30,CHECK
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_32 | %ptxas-verify %}
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=SM70,CHECK
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
; TODO: these are system scope, but are compiled to gpu scope..
; TODO: these are seq_cst, but are compiled to relaxed..
diff --git a/llvm/test/CodeGen/NVPTX/f16-abs.ll b/llvm/test/CodeGen/NVPTX/f16-abs.ll
index 4025b38c0f0e4..f5354a33a2c7a 100644
--- a/llvm/test/CodeGen/NVPTX/f16-abs.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-abs.ll
@@ -4,7 +4,7 @@
; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx60 \
; RUN: -O0 -disable-post-ra -verify-machineinstrs \
; RUN: | FileCheck -check-prefix CHECK-NOF16 %s
-; RUN: %if ptxas %{ \
+; RUN: %if ptxas-sm_53 %{ \
; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx60 \
; RUN: -O0 -disable-post-ra -verify-machineinstrs \
; RUN: | %ptxas-verify -arch=sm_53 \
@@ -14,7 +14,7 @@
; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx65 --nvptx-no-f16-math \
; RUN: -O0 -disable-post-ra -verify-machineinstrs \
; RUN: | FileCheck -check-prefix CHECK-NOF16 %s
-; RUN: %if ptxas %{ \
+; RUN: %if ptxas-sm_53 %{ \
; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx65 --nvptx-no-f16-math \
; RUN: -O0 -disable-post-ra -verify-machineinstrs \
; RUN: | %ptxas-verify -arch=sm_53 \
@@ -24,7 +24,7 @@
; RUN: llc < %s -mcpu=sm_52 -mattr=+ptx65 \
; RUN: -O0 -disable-post-ra -verify-machineinstrs \
; RUN: | FileCheck -check-prefix CHECK-NOF16 %s
-; RUN: %if ptxas %{ \
+; RUN: %if ptxas-sm_52 %{ \
; RUN: llc < %s -mcpu=sm_52 -mattr=+ptx65 \
; RUN: -O0 -disable-post-ra -verify-machineinstrs \
; RUN: | %ptxas-verify -arch=sm_52 \
@@ -34,7 +34,7 @@
; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx65 \
; RUN: -O0 -disable-post-ra -verify-machineinstrs \
; RUN: | FileCheck -check-prefix CHECK-F16-ABS %s
-; RUN: %if ptxas %{ \
+; RUN: %if ptxas-sm_53 %{ \
; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx65 \
; RUN: -O0 -disable-post-ra -verify-machineinstrs \
; RUN: | %ptxas-verify -arch=sm_53 \
diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index d4aec4f16f1ab..dd6839bdd440a 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -3,7 +3,7 @@
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: -mattr=+ptx60 \
; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-F16-NOFTZ %s
-; RUN: %if ptxas %{ \
+; RUN: %if ptxas-sm_53 && ptxas-isa-v6.0 %{ \
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: -mattr=+ptx60 \
@@ -14,7 +14,7 @@
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: -denormal-fp-math-f32=preserve-sign -mattr=+ptx60 \
; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16-FTZ %s
-; RUN: %if ptxas %{ \
+; RUN: %if ptxas-sm_53 && ptxas-isa-v6.0 %{ \
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: -denormal-fp-math-f32=preserve-sign -mattr=+ptx60 \
@@ -25,7 +25,7 @@
; RUN: -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \
; RUN: -verify-machineinstrs -mattr=+ptx60 \
; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-NOF16 %s
-; RUN: %if ptxas %{ \
+; RUN: %if ptxas-sm_53 %{ \
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
; RUN: -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \
; RUN: | %ptxas-verify -arch=sm_53 \
@@ -34,7 +34,7 @@
; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
; RUN: -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-NOF16 %s
-; RUN: %if ptxas %{ \
+; RUN: %if ptxas-sm_52 %{ \
; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
; RUN: -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: | %ptxas-verify -arch=sm_52 \
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index 991311f9492b9..368f9fe14d026 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -3,7 +3,7 @@
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-F16 %s
-; RUN: %if ptxas %{ \
+; RUN: %if ptxas-sm_53 %{ \
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: | %ptxas-verify -arch=sm_53 \
@@ -13,7 +13,7 @@
; RUN: -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \
; RUN: -verify-machineinstrs \
; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s
-; RUN: %if ptxas %{ \
+; RUN: %if ptxas-sm_53 %{ \
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \
; RUN: -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \
; RUN: -verify-machineinstrs \
@@ -23,7 +23,7 @@
; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 \
; RUN: -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s
-; RUN: %if ptxas %{ \
+; RUN: %if ptxas-sm_52 %{ \
; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 \
; RUN: -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: | %ptxas-verify -arch=sm_52 \
diff --git a/llvm/test/CodeGen/NVPTX/fence-nocluster.ll b/llvm/test/CodeGen/NVPTX/fence-nocluster.ll
index 1c6c1744b5375..ffffe96f0e63f 100644
--- a/llvm/test/CodeGen/NVPTX/fence-nocluster.ll
+++ b/llvm/test/CodeGen/NVPTX/fence-nocluster.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -mattr=+ptx50 | FileCheck %s --check-prefix=SM30
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_35 -mattr=+ptx50 | %ptxas-verify -arch=sm_35 %}
+; RUN: %if ptxas-sm_35 && !ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_35 -mattr=+ptx50 | %ptxas-verify -arch=sm_35 %}
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s --check-prefix=SM70
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %}
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | FileCheck %s --check-prefix=SM90
-; RUN: %if ptxas-12.8 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify -arch=sm_90 %}
define void @fence_acquire_sys() {
; SM30-LABEL: fence_acquire_sys(
diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
index 74136bbe478c9..c5bb4af954427 100644
--- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
@@ -3,7 +3,7 @@
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 \
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes COMMON,I16x2 %s
-; RUN: %if ptxas %{ \
+; RUN: %if ptxas-sm_90 %{ \
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 \
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: | %ptxas-verify -arch=sm_90 \
@@ -12,7 +12,7 @@
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes COMMON,NO-I16x2 %s
-; RUN: %if ptxas %{ \
+; RUN: %if ptxas-sm_53 %{ \
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: | %ptxas-verify -arch=sm_53 \
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index a56b85de80143..b66b843f4b838 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas -arch=sm_60 - %}
+; RUN: %if ptxas-sm_60 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas -arch=sm_60 - %}
%struct.Large = type { [16 x double] }
diff --git a/llvm/test/CodeGen/NVPTX/match.ll b/llvm/test/CodeGen/NVPTX/match.ll
index ae01b0d3cc7e0..b2200ab6e1f3f 100644
--- a/llvm/test/CodeGen/NVPTX/match.ll
+++ b/llvm/test/CodeGen/NVPTX/match.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %}
declare i32 @llvm.nvvm.match.any.sync.i32(i32, i32)
declare i32 @llvm.nvvm.match.any.sync.i64(i32, i64)
diff --git a/llvm/test/CodeGen/NVPTX/math-intrins-sm53-ptx42.ll b/llvm/test/CodeGen/NVPTX/math-intrins-sm53-ptx42.ll
index 236bf67f81821..462334d9bc6e7 100644
--- a/llvm/test/CodeGen/NVPTX/math-intrins-sm53-ptx42.ll
+++ b/llvm/test/CodeGen/NVPTX/math-intrins-sm53-ptx42.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_53 -mattr=+ptx42 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_53 -mattr=+ptx42 | %ptxas-verify -arch=sm_53 %}
+; RUN: %if ptxas-sm_53 && ptxas-isa-v4.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_53 -mattr=+ptx42 | %ptxas-verify -arch=sm_53 %}
declare half @llvm.nvvm.fma.rn.f16(half, half, half)
declare half @llvm.nvvm.fma.rn.ftz.f16(half, half, half)
diff --git a/llvm/test/CodeGen/NVPTX/nanosleep.ll b/llvm/test/CodeGen/NVPTX/nanosleep.ll
index de08c9fbdf417..148ccd72483db 100644
--- a/llvm/test/CodeGen/NVPTX/nanosleep.ll
+++ b/llvm/test/CodeGen/NVPTX/nanosleep.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=nvptx64 -O2 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 && ptxas-isa-v6.3 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
declare void @llvm.nvvm.nanosleep(i32)
diff --git a/llvm/test/CodeGen/NVPTX/pr126337.ll b/llvm/test/CodeGen/NVPTX/pr126337.ll
index 95258f7a3f360..6caef744373f3 100644
--- a/llvm/test/CodeGen/NVPTX/pr126337.ll
+++ b/llvm/test/CodeGen/NVPTX/pr126337.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 | %ptxas -arch=sm_70 -c - %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 | %ptxas -arch=sm_70 -c - %}
; This IR should compile without triggering assertions in LICM
; when the CopyToReg from %0 in the first BB gets eliminated
diff --git a/llvm/test/CodeGen/NVPTX/szext.ll b/llvm/test/CodeGen/NVPTX/szext.ll
index 5a4fe4ed7fc0b..6a30396215548 100644
--- a/llvm/test/CodeGen/NVPTX/szext.ll
+++ b/llvm/test/CodeGen/NVPTX/szext.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -o - < %s -mcpu=sm_70 -mattr=+ptx76 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_70 -mattr=+ptx76 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 && ptxas-isa-v7.6 %{ llc < %s -mcpu=sm_70 -mattr=+ptx76 | %ptxas-verify -arch=sm_70 %}
target triple = "nvptx64-unknown-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/trunc-setcc.ll b/llvm/test/CodeGen/NVPTX/trunc-setcc.ll
index f22e37e203966..f6a1c6bb60d6d 100644
--- a/llvm/test/CodeGen/NVPTX/trunc-setcc.ll
+++ b/llvm/test/CodeGen/NVPTX/trunc-setcc.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mcpu=sm_50 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_50 | %ptxas-verify -arch=sm_50 %}
+; RUN: %if ptxas-sm_50 %{ llc < %s -mcpu=sm_50 | %ptxas-verify -arch=sm_50 %}
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/trunc-tofp.ll b/llvm/test/CodeGen/NVPTX/trunc-tofp.ll
index 12502b6f29899..99a1e8a0630a8 100644
--- a/llvm/test/CodeGen/NVPTX/trunc-tofp.ll
+++ b/llvm/test/CodeGen/NVPTX/trunc-tofp.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mcpu=sm_50 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_50 | %ptxas-verify -arch=sm_50 %}
+; RUN: %if ptxas-sm_50 %{ llc < %s -mcpu=sm_50 | %ptxas-verify -arch=sm_50 %}
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/wmma-ptx60-sm70.py b/llvm/test/CodeGen/NVPTX/wmma-ptx60-sm70.py
index bc441bfa8180f..06094fcafc1cc 100644
--- a/llvm/test/CodeGen/NVPTX/wmma-ptx60-sm70.py
+++ b/llvm/test/CodeGen/NVPTX/wmma-ptx60-sm70.py
@@ -6,7 +6,7 @@
# RUN: --check-prefixes=INTRINSICS,NOEXTGEOM,NOINT,NOSUBINT,NOMMA,NODOUBLE,NOALTFLOAT,NOLDMATRIX
# RUN: llc < %t-ptx60-sm_70.ll -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx60 \
# RUN: | FileCheck %t-ptx60-sm_70.ll
-# RUN: %if ptxas %{ \
+# RUN: %if ptxas-sm_70 && ptxas-isa-v6.0 %{ \
# RUN: llc < %t-ptx60-sm_70.ll -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx60 \
# RUN: | %ptxas-verify -arch=sm_70 \
# RUN: %}
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 3b1b0a693b1fb..a0c48681d72f2 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -303,8 +303,12 @@ def ptxas_version(ptxas):
def ptxas_supported_isa_versions(ptxas):
- """Return list of (major, minor) tuples for supported PTX ISA versions."""
- result = subprocess.run([ptxas, "--list-version"], capture_output=True, text=True, check=True)
+ result = subprocess.run(
+ [ptxas, "--list-version"],
+ capture_output=True,
+ text=True,
+ check=True,
+ )
versions = []
for line in result.stdout.splitlines():
line = line.strip()
@@ -317,8 +321,12 @@ def ptxas_supported_isa_versions(ptxas):
def ptxas_supported_sms(ptxas_executable):
- """Extract supported SM architectures from ptxas help output."""
- result = subprocess.run([ptxas_executable, "--help"], capture_output=True, text=True, check=True)
+ result = subprocess.run(
+ [ptxas_executable, "--help"],
+ capture_output=True,
+ text=True,
+ check=True,
+ )
supported_sms = re.findall(r"'sm_(\d+(?:[af]?))'", result.stdout)
if not supported_sms:
raise RuntimeError("No SM architecture values found in ptxas help output")
@@ -327,16 +335,19 @@ def ptxas_supported_sms(ptxas_executable):
def enable_ptxas(ptxas_executable):
config.available_features.add("ptxas")
- tools.append(ToolSubst("%ptxas", ptxas_executable))
+ tools.extend([
+ ToolSubst("%ptxas", ptxas_executable),
+ ToolSubst("%ptxas-verify", f"{ptxas_executable} -c -"),
+ ])
major_version, minor_version = ptxas_version(ptxas_executable)
config.available_features.add("ptxas-{}.{}".format(major_version, minor_version))
for major_version, minor_version in ptxas_supported_isa_versions(ptxas_executable):
- config.available_features.add("ptxas-isa-v{}-{}".format(major_version, minor_version))
+ config.available_features.add("ptxas-isa-v{}.{}".format(major_version, minor_version))
for sm in ptxas_supported_sms(ptxas_executable):
- config.available_features.add("ptxas-sm-{}".format(sm))
+ config.available_features.add("ptxas-sm_{}".format(sm))
ptxas_executable = (
>From cde24e36b62f54791746f0d67eec514fac473386 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Wed, 20 Aug 2025 21:47:17 +0000
Subject: [PATCH 7/7] Explicitly check for 32 bit address size support
---
llvm/test/CodeGen/NVPTX/access-non-generic.ll | 2 +-
llvm/test/CodeGen/NVPTX/addrspacecast.ll | 2 +-
llvm/test/CodeGen/NVPTX/annotations.ll | 2 +-
llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll | 2 +-
llvm/test/CodeGen/NVPTX/arithmetic-int.ll | 2 +-
llvm/test/CodeGen/NVPTX/atomics-sm60.ll | 2 +-
llvm/test/CodeGen/NVPTX/atomics-sm70.ll | 4 ++--
llvm/test/CodeGen/NVPTX/atomics-sm90.ll | 4 ++--
llvm/test/CodeGen/NVPTX/atomics-with-scope.ll | 2 +-
llvm/test/CodeGen/NVPTX/calling-conv.ll | 2 +-
llvm/test/CodeGen/NVPTX/combine-mad.ll | 2 +-
llvm/test/CodeGen/NVPTX/compare-int.ll | 2 +-
llvm/test/CodeGen/NVPTX/convert-fp.ll | 2 +-
llvm/test/CodeGen/NVPTX/convert-int-sm20.ll | 2 +-
llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll | 2 +-
llvm/test/CodeGen/NVPTX/fence-nocluster.ll | 2 +-
llvm/test/CodeGen/NVPTX/fma-disable.ll | 4 ++--
llvm/test/CodeGen/NVPTX/global-addrspace.ll | 2 +-
llvm/test/CodeGen/NVPTX/global-ordering.ll | 2 +-
llvm/test/CodeGen/NVPTX/idioms.ll | 2 +-
llvm/test/CodeGen/NVPTX/intrinsic-old.ll | 2 +-
llvm/test/CodeGen/NVPTX/intrinsics.ll | 2 +-
llvm/test/CodeGen/NVPTX/ld-addrspace.ll | 2 +-
llvm/test/CodeGen/NVPTX/ld-generic.ll | 2 +-
llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py | 2 +-
llvm/test/CodeGen/NVPTX/local-stack-frame.ll | 2 +-
llvm/test/CodeGen/NVPTX/mbarrier.ll | 2 +-
llvm/test/CodeGen/NVPTX/nofunc.ll | 2 +-
llvm/test/CodeGen/NVPTX/packed-aggr.ll | 2 +-
llvm/test/CodeGen/NVPTX/param-overalign.ll | 2 +-
llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll | 2 +-
llvm/test/CodeGen/NVPTX/reg-types.ll | 2 +-
llvm/test/CodeGen/NVPTX/short-ptr.ll | 2 +-
llvm/test/CodeGen/NVPTX/simple-call.ll | 2 +-
llvm/test/CodeGen/NVPTX/st-addrspace.ll | 2 +-
llvm/test/CodeGen/NVPTX/st-generic.ll | 2 +-
llvm/test/CodeGen/NVPTX/st-param-imm.ll | 2 +-
llvm/test/CodeGen/NVPTX/symbol-naming.ll | 2 +-
llvm/test/CodeGen/NVPTX/unreachable.ll | 2 +-
llvm/test/CodeGen/NVPTX/vaargs.ll | 2 +-
llvm/test/CodeGen/NVPTX/vector-compare.ll | 2 +-
llvm/test/CodeGen/NVPTX/vector-select.ll | 2 +-
llvm/test/lit.cfg.py | 17 +++++++++++++++++
43 files changed, 62 insertions(+), 45 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/access-non-generic.ll b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
index a723ab1cae20c..ff9a07b8d760e 100644
--- a/llvm/test/CodeGen/NVPTX/access-non-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s --check-prefix PTX
; RUN: opt -mtriple=nvptx-- < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix IR
; RUN: opt -mtriple=nvptx64-- < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix IR
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
@array = internal addrspace(3) global [10 x float] zeroinitializer, align 4
diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast.ll b/llvm/test/CodeGen/NVPTX/addrspacecast.ll
index 86008a1b70058..546d22ca0e691 100644
--- a/llvm/test/CodeGen/NVPTX/addrspacecast.ll
+++ b/llvm/test/CodeGen/NVPTX/addrspacecast.ll
@@ -1,7 +1,7 @@
; RUN: llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,CLS32
; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,NOPTRCONV,CLS64
; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | FileCheck %s -check-prefixes=ALL,PTRCONV,CLS64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %}
diff --git a/llvm/test/CodeGen/NVPTX/annotations.ll b/llvm/test/CodeGen/NVPTX/annotations.ll
index 5360e8988777b..e4aa0552e8420 100644
--- a/llvm/test/CodeGen/NVPTX/annotations.ll
+++ b/llvm/test/CodeGen/NVPTX/annotations.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
@texture = internal addrspace(1) global i64 0, align 8
diff --git a/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll b/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
index ce71d3a78c0de..e88d0396f0858 100644
--- a/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
+++ b/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
;; These tests should run for all targets
diff --git a/llvm/test/CodeGen/NVPTX/arithmetic-int.ll b/llvm/test/CodeGen/NVPTX/arithmetic-int.ll
index 1fbfd0a987d7a..9e41e9e240902 100644
--- a/llvm/test/CodeGen/NVPTX/arithmetic-int.ll
+++ b/llvm/test/CodeGen/NVPTX/arithmetic-int.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
;; These tests should run for all targets
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm60.ll b/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
index d90dcaf3400b0..5a7a1823cb2a0 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas-sm_60 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 && ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
; RUN: %if ptxas-sm_60 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
; CHECK-LABEL: .func test(
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
index 82d652fb26588..abb0539f69c15 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
@@ -2,9 +2,9 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=CHECK
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=CHECK64
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx62 | FileCheck %s --check-prefixes=CHECKPTX62
-; RUN: %if ptxas-sm_70 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 && ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
; RUN: %if ptxas-sm_70 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
-; RUN: %if ptxas-sm_70 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx62 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 && ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx62 | %ptxas-verify -arch=sm_70 %}
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
index f96fd30019025..62f60dc2a2631 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
@@ -2,9 +2,9 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_90 -mattr=+ptx78 | FileCheck %s --check-prefixes=CHECK
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | FileCheck %s --check-prefixes=CHECK64
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_86 -mattr=+ptx71 | FileCheck %s --check-prefixes=CHECKPTX71
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %}
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_86 -mattr=+ptx71 | %ptxas-verify -arch=sm_86 %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_86 -mattr=+ptx71 | %ptxas-verify -arch=sm_86 %}
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll b/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
index 94c416f5791b2..9e30519b31cc3 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s -check-prefixes=CHECK,CHECK32
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas-sm_60 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 && ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
; RUN: %if ptxas-sm_60 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
; CHECK-LABEL: .func test_atomics_scope(
diff --git a/llvm/test/CodeGen/NVPTX/calling-conv.ll b/llvm/test/CodeGen/NVPTX/calling-conv.ll
index 74b99efcdadf7..be0a1e714bd50 100644
--- a/llvm/test/CodeGen/NVPTX/calling-conv.ll
+++ b/llvm/test/CodeGen/NVPTX/calling-conv.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
diff --git a/llvm/test/CodeGen/NVPTX/combine-mad.ll b/llvm/test/CodeGen/NVPTX/combine-mad.ll
index da303b7c38eb7..d39ab9e89d6a4 100644
--- a/llvm/test/CodeGen/NVPTX/combine-mad.ll
+++ b/llvm/test/CodeGen/NVPTX/combine-mad.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 -O1 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O1 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -O1 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -O1 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O1 | %ptxas-verify %}
define i32 @test1(i32 %n, i32 %m) {
diff --git a/llvm/test/CodeGen/NVPTX/compare-int.ll b/llvm/test/CodeGen/NVPTX/compare-int.ll
index 9338172d024ce..ccc8cff7be4b3 100644
--- a/llvm/test/CodeGen/NVPTX/compare-int.ll
+++ b/llvm/test/CodeGen/NVPTX/compare-int.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
;; These tests should run for all targets
diff --git a/llvm/test/CodeGen/NVPTX/convert-fp.ll b/llvm/test/CodeGen/NVPTX/convert-fp.ll
index debaadedce09a..4b33cd66ce8f0 100644
--- a/llvm/test/CodeGen/NVPTX/convert-fp.ll
+++ b/llvm/test/CodeGen/NVPTX/convert-fp.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
define i16 @cvt_u16_f32(float %x) {
diff --git a/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll b/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll
index a2fc8da3f1e61..2f3da349a3e05 100644
--- a/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll
+++ b/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
;; Integer conversions happen inplicitly by loading/storing the proper types
diff --git a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
index 06fb8d2c7c54d..ac4dad50e1d21 100644
--- a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
+++ b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
@@ -4,7 +4,7 @@
; RUN: llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK-32
; RUN: llc < %s -mtriple=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK-64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %}
; CHECK-FAILS: in function test_dynamic_stackalloc{{.*}}: Support for dynamic alloca introduced in PTX ISA version 7.3 and requires target sm_52.
diff --git a/llvm/test/CodeGen/NVPTX/fence-nocluster.ll b/llvm/test/CodeGen/NVPTX/fence-nocluster.ll
index ffffe96f0e63f..3c7b0fbece97a 100644
--- a/llvm/test/CodeGen/NVPTX/fence-nocluster.ll
+++ b/llvm/test/CodeGen/NVPTX/fence-nocluster.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -mattr=+ptx50 | FileCheck %s --check-prefix=SM30
-; RUN: %if ptxas-sm_35 && !ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_35 -mattr=+ptx50 | %ptxas-verify -arch=sm_35 %}
+; RUN: %if ptxas-sm_35 && ptxas-32 %{ llc < %s -march=nvptx64 -mcpu=sm_35 -mattr=+ptx50 | %ptxas-verify -arch=sm_35 %}
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s --check-prefix=SM70
; RUN: %if ptxas-sm_70 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %}
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | FileCheck %s --check-prefix=SM90
diff --git a/llvm/test/CodeGen/NVPTX/fma-disable.ll b/llvm/test/CodeGen/NVPTX/fma-disable.ll
index 0038b4b65e0f9..344fae6673f81 100644
--- a/llvm/test/CodeGen/NVPTX/fma-disable.ll
+++ b/llvm/test/CodeGen/NVPTX/fma-disable.ll
@@ -2,8 +2,8 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -nvptx-fma-level=1 | FileCheck %s -check-prefix=FMA
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -nvptx-fma-level=1 | %ptxas-verify %}
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -nvptx-fma-level=1 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -nvptx-fma-level=1 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -nvptx-fma-level=0 | %ptxas-verify %}
diff --git a/llvm/test/CodeGen/NVPTX/global-addrspace.ll b/llvm/test/CodeGen/NVPTX/global-addrspace.ll
index 3f9d321ab4406..7cfde62bd3f18 100644
--- a/llvm/test/CodeGen/NVPTX/global-addrspace.ll
+++ b/llvm/test/CodeGen/NVPTX/global-addrspace.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; PTX32: .visible .global .align 4 .u32 i;
diff --git a/llvm/test/CodeGen/NVPTX/global-ordering.ll b/llvm/test/CodeGen/NVPTX/global-ordering.ll
index 2815cff7d7b41..145d21c712eb6 100644
--- a/llvm/test/CodeGen/NVPTX/global-ordering.ll
+++ b/llvm/test/CodeGen/NVPTX/global-ordering.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; Make sure we emit these globals in def-use order
diff --git a/llvm/test/CodeGen/NVPTX/idioms.ll b/llvm/test/CodeGen/NVPTX/idioms.ll
index a3bf8922a98f4..a3df0995fb635 100644
--- a/llvm/test/CodeGen/NVPTX/idioms.ll
+++ b/llvm/test/CodeGen/NVPTX/idioms.ll
@@ -3,7 +3,7 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
%struct.S16 = type { i16, i16 }
diff --git a/llvm/test/CodeGen/NVPTX/intrinsic-old.ll b/llvm/test/CodeGen/NVPTX/intrinsic-old.ll
index f595df837f91f..1449e7fbd4186 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsic-old.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsic-old.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
define ptx_device i32 @test_tid_x() {
diff --git a/llvm/test/CodeGen/NVPTX/intrinsics.ll b/llvm/test/CodeGen/NVPTX/intrinsics.ll
index 4ed50632251cb..872c921eb7351 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsics.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsics.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s --check-prefixes=CHECK,CHECK32
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s --check-prefixes=CHECK,CHECK64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify %}
define float @test_fabsf(float %f) {
diff --git a/llvm/test/CodeGen/NVPTX/ld-addrspace.ll b/llvm/test/CodeGen/NVPTX/ld-addrspace.ll
index 24071b48143f2..590a7099ec7ee 100644
--- a/llvm/test/CodeGen/NVPTX/ld-addrspace.ll
+++ b/llvm/test/CodeGen/NVPTX/ld-addrspace.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G32,LS32
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G64,LS64
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | FileCheck %s --check-prefixes=G64,LS32
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %}
diff --git a/llvm/test/CodeGen/NVPTX/ld-generic.ll b/llvm/test/CodeGen/NVPTX/ld-generic.ll
index ee304ca1601f4..de1ee13c91686 100644
--- a/llvm/test/CodeGen/NVPTX/ld-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/ld-generic.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
diff --git a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py
index 2fa4c89f4d71c..b8d53ae0e5ee5 100644
--- a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py
+++ b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py
@@ -4,7 +4,7 @@
# RUN: %python %s > %t.ll
# RUN: llc < %t.ll -mtriple=nvptx -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P32 %t.ll
# RUN: llc < %t.ll -mtriple=nvptx64 -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P64 %t.ll
-# RUN: %if ptxas && !ptxas-12.0 %{ llc < %t.ll -mtriple=nvptx -mcpu=sm_30 | %ptxas-verify %}
+# RUN: %if ptxas-32 %{ llc < %t.ll -mtriple=nvptx -mcpu=sm_30 | %ptxas-verify %}
# RUN: %if ptxas %{ llc < %t.ll -mtriple=nvptx64 -mcpu=sm_30 | %ptxas-verify %}
from __future__ import print_function
diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
index ae069cf956c36..dbb93a8a74a8c 100644
--- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=PTX32
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=PTX64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
; Ensure we access the local stack properly
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier.ll b/llvm/test/CodeGen/NVPTX/mbarrier.ll
index 87a73aa4d4e2c..19ed927488298 100644
--- a/llvm/test/CodeGen/NVPTX/mbarrier.ll
+++ b/llvm/test/CodeGen/NVPTX/mbarrier.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_80 | FileCheck %s -check-prefix=CHECK_PTX32
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 | FileCheck %s -check-prefix=CHECK_PTX64
-; RUN: %if ptxas-11.0 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
+; RUN: %if ptxas-11.0 && ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
; RUN: %if ptxas-11.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
declare void @llvm.nvvm.mbarrier.init(ptr %a, i32 %b)
diff --git a/llvm/test/CodeGen/NVPTX/nofunc.ll b/llvm/test/CodeGen/NVPTX/nofunc.ll
index a8ce20ed91dc4..0708df9445dc7 100644
--- a/llvm/test/CodeGen/NVPTX/nofunc.ll
+++ b/llvm/test/CodeGen/NVPTX/nofunc.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; Test that we don't crash if we're compiling a module with function references,
diff --git a/llvm/test/CodeGen/NVPTX/packed-aggr.ll b/llvm/test/CodeGen/NVPTX/packed-aggr.ll
index 602bef299bb21..230094654580b 100644
--- a/llvm/test/CodeGen/NVPTX/packed-aggr.ll
+++ b/llvm/test/CodeGen/NVPTX/packed-aggr.ll
@@ -5,7 +5,7 @@
; RUN: FileCheck %s --check-prefixes=CHECK,CHECK32
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -mattr=+ptx71 | \
; RUN: FileCheck %s --check-prefixes=CHECK,CHECK64
-; RUN: %if ptxas-11.1 && !ptxas-12.0%{ llc < %s -mtriple=nvptx -mcpu=sm_20 -mattr=+ptx71 | %ptxas-verify %}
+; RUN: %if ptxas-11.1 && ptxas-32%{ llc < %s -mtriple=nvptx -mcpu=sm_20 -mattr=+ptx71 | %ptxas-verify %}
; RUN: %if ptxas-11.1 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -mattr=+ptx71 | %ptxas-verify %}
;; Test that packed structs with symbol references are represented using the
diff --git a/llvm/test/CodeGen/NVPTX/param-overalign.ll b/llvm/test/CodeGen/NVPTX/param-overalign.ll
index 2155fb4031c36..04d0ad00ef9db 100644
--- a/llvm/test/CodeGen/NVPTX/param-overalign.ll
+++ b/llvm/test/CodeGen/NVPTX/param-overalign.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %}
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll b/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll
index cd2505c20d39c..76b262930095c 100644
--- a/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll
+++ b/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
define ptx_kernel void @t1(ptr %a) {
diff --git a/llvm/test/CodeGen/NVPTX/reg-types.ll b/llvm/test/CodeGen/NVPTX/reg-types.ll
index ea45bfdc5e190..1f2fdb642fdb3 100644
--- a/llvm/test/CodeGen/NVPTX/reg-types.ll
+++ b/llvm/test/CodeGen/NVPTX/reg-types.ll
@@ -3,7 +3,7 @@
; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
; RUN: llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s -check-prefixes=NO8BIT
; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefixes=NO8BIT
-; RUN: %if ptxas && !ptxas-12.0 %{ llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; CHECK-LABEL: .visible .func func(
diff --git a/llvm/test/CodeGen/NVPTX/short-ptr.ll b/llvm/test/CodeGen/NVPTX/short-ptr.ll
index eb058955e0aa1..8510d44b0613d 100644
--- a/llvm/test/CodeGen/NVPTX/short-ptr.ll
+++ b/llvm/test/CodeGen/NVPTX/short-ptr.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix CHECK-DEFAULT-32
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -nvptx-short-ptr | FileCheck %s --check-prefixes CHECK-SHORT-SHARED,CHECK-SHORT-CONST,CHECK-SHORT-LOCAL
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -nvptx-short-ptr | %ptxas-verify %}
diff --git a/llvm/test/CodeGen/NVPTX/simple-call.ll b/llvm/test/CodeGen/NVPTX/simple-call.ll
index 991ae04b91b67..3d39f1246d2f1 100644
--- a/llvm/test/CodeGen/NVPTX/simple-call.ll
+++ b/llvm/test/CodeGen/NVPTX/simple-call.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
; CHECK: .func ({{.*}}) device_func
diff --git a/llvm/test/CodeGen/NVPTX/st-addrspace.ll b/llvm/test/CodeGen/NVPTX/st-addrspace.ll
index 1e0e75a041c14..a7fc48543bdc5 100644
--- a/llvm/test/CodeGen/NVPTX/st-addrspace.ll
+++ b/llvm/test/CodeGen/NVPTX/st-addrspace.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G32,LS32
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G64,LS64
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | FileCheck %s --check-prefixes=G64,LS32
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %}
diff --git a/llvm/test/CodeGen/NVPTX/st-generic.ll b/llvm/test/CodeGen/NVPTX/st-generic.ll
index 950da93f95217..31b26acd84d16 100644
--- a/llvm/test/CodeGen/NVPTX/st-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/st-generic.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
;; i8
diff --git a/llvm/test/CodeGen/NVPTX/st-param-imm.ll b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
index f90435abefbb5..41b342bc88789 100644
--- a/llvm/test/CodeGen/NVPTX/st-param-imm.ll
+++ b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -verify-machineinstrs | %ptxas-verify %}
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/symbol-naming.ll b/llvm/test/CodeGen/NVPTX/symbol-naming.ll
index 941378f120c32..8943cd7501075 100644
--- a/llvm/test/CodeGen/NVPTX/symbol-naming.ll
+++ b/llvm/test/CodeGen/NVPTX/symbol-naming.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %}
; Verify that the NVPTX target removes invalid symbol names prior to emitting
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index 618c7ed0c4997..f59a4686604e8 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -13,7 +13,7 @@
; RUN: | FileCheck %s --check-prefixes=CHECK,TRAP
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -mattr=+ptx83 \
; RUN: | FileCheck %s --check-prefixes=BUG-FIXED
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
target triple = "nvptx-unknown-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/vaargs.ll b/llvm/test/CodeGen/NVPTX/vaargs.ll
index 9e312a2fec60a..c25ff362c3369 100644
--- a/llvm/test/CodeGen/NVPTX/vaargs.ll
+++ b/llvm/test/CodeGen/NVPTX/vaargs.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -O0 -march=nvptx -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s --check-prefixes=CHECK,CHECK32
; RUN: llc < %s -O0 -march=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s --check-prefixes=CHECK,CHECK64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -O0 -march=nvptx -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -O0 -march=nvptx -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -O0 -march=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %}
; CHECK: .address_size [[BITS:32|64]]
diff --git a/llvm/test/CodeGen/NVPTX/vector-compare.ll b/llvm/test/CodeGen/NVPTX/vector-compare.ll
index 0e63ee96932d9..3b634bbd01ef7 100644
--- a/llvm/test/CodeGen/NVPTX/vector-compare.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-compare.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify -m32 %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify -m32 %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; This test makes sure that the result of vector compares are properly
diff --git a/llvm/test/CodeGen/NVPTX/vector-select.ll b/llvm/test/CodeGen/NVPTX/vector-select.ll
index 569da5e6628b0..70f77a921851a 100644
--- a/llvm/test/CodeGen/NVPTX/vector-select.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-select.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; This test makes sure that vector selects are scalarized by the type legalizer.
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index a0c48681d72f2..7e08a33ecbd3c 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -333,6 +333,20 @@ def ptxas_supported_sms(ptxas_executable):
return supported_sms
+def ptxas_supports_address_size_32(ptxas_executable):
+ result = subprocess.run(
+ [ptxas_executable, "-m 32"],
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+ if "is not defined for option 'machine'" in result.stderr:
+ return False
+ if "Missing .version directive at start of file" in result.stderr:
+ return True
+ raise RuntimeError("Unexpected ptxas output: {}".format(result.stderr))
+
+
def enable_ptxas(ptxas_executable):
config.available_features.add("ptxas")
tools.extend([
@@ -349,6 +363,9 @@ def enable_ptxas(ptxas_executable):
for sm in ptxas_supported_sms(ptxas_executable):
config.available_features.add("ptxas-sm_{}".format(sm))
+ if ptxas_supports_address_size_32(ptxas_executable):
+ config.available_features.add("ptxas-32")
+
ptxas_executable = (
os.environ.get("LLVM_PTXAS_EXECUTABLE", None) or config.ptxas_executable
More information about the llvm-commits
mailing list