[llvm] [lit] Refactor `ptxas` usage (PR #154439)

Wed Aug 20 14:47:30 PDT 2025

https://github.com/justinfargnoli updated https://github.com/llvm/llvm-project/pull/154439

>From b2e784e0ed0f7d5ca24596eb2345a8992bc1313d Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Mon, 18 Aug 2025 23:31:55 +0000
Subject: [PATCH 1/7] Initial commit

---
 llvm/test/lit.cfg.py | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 8c2d1a454e8f9..74a790bf370f3 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -306,9 +306,7 @@ def ptxas_version(ptxas):
     return None
 
 
-# Enable %ptxas and %ptxas-verify tools.
-# %ptxas-verify defaults to sm_60 architecture. It can be overriden
-# by specifying required one, for instance: %ptxas-verify -arch=sm_80.
+# Enable %ptxas, %ptxas-verify, and %ptxas-legacy-verify tools.
 def enable_ptxas(ptxas_executable):
     version = ptxas_version(ptxas_executable)
     if version:
@@ -339,6 +337,9 @@ def enable_ptxas(ptxas_executable):
             (12, 5),
             (12, 6),
             (12, 8),
+            (12, 9),
+            (13, 0),
+            (13, 1),
         ]
 
         def version_int(ver):
@@ -360,11 +361,31 @@ def version_int(ver):
                 major, minor = known_version
                 config.available_features.add("ptxas-{}.{}".format(major, minor))
 
+    # By default, compile the PTX for the latest supported architecture.
+    # You can override the default by specifying a different arch
+    # e.g. %ptxas-verify -arch=sm_75.
+    if version_int(version) >= version_int((13, 0)):
+        ptxas_default_arch = 100
+    elif version_int(version) >= version_int((12, 0)):
+        ptxas_default_arch = 90
+    elif version_int(version) >= version_int((11, 0)):
+        ptxas_default_arch = 80
+    else:
+        ptxas_default_arch = 70
+
+    if version_int(version) >= version_int((13, 0)):
+        ptxas_verify_tool = "%ptxas-verify"
+    else:
+        ptxas_verify_tool = "%ptxas-legacy-verify"
+
     config.available_features.add("ptxas")
     tools.extend(
         [
             ToolSubst("%ptxas", ptxas_executable),
-            ToolSubst("%ptxas-verify", "{} -arch=sm_60 -c -".format(ptxas_executable)),
+            ToolSubst(
+                ptxas_verify_tool,
+                "{} -arch=sm_{} -c -".format(ptxas_executable, ptxas_default_arch),
+            ),
         ]
     )
 

>From 2554a3e969dfb3d601bc97ae156ee479c991d589 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Mon, 18 Aug 2025 23:42:45 +0000
Subject: [PATCH 2/7] Removing CUDA 13.1

---
 llvm/test/lit.cfg.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 74a790bf370f3..97690aef809ad 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -339,7 +339,6 @@ def enable_ptxas(ptxas_executable):
             (12, 8),
             (12, 9),
             (13, 0),
-            (13, 1),
         ]
 
         def version_int(ver):

>From 3367dec999de47b7d9dd727865569cd9f4b8ac98 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 19 Aug 2025 21:03:14 +0000
Subject: [PATCH 3/7] Save work

---
 llvm/test/lit.cfg.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 97690aef809ad..c9d6ca443eca5 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -339,6 +339,7 @@ def enable_ptxas(ptxas_executable):
             (12, 8),
             (12, 9),
             (13, 0),
+            (13, 1),
         ]
 
         def version_int(ver):
@@ -358,26 +359,31 @@ def version_int(ver):
         for known_version in ptxas_known_versions:
             if version_int(known_version) <= version_int(version):
                 major, minor = known_version
-                config.available_features.add("ptxas-{}.{}".format(major, minor))
+                if version_int(version) >= version_int((13, 0)):
+                    config.available_features.add("ptxas-{}.{}".format(major, minor))
+                else:
+                    config.available_features.add("ptxas-legacy-{}.{}".format(major, minor))
+                break
 
     # By default, compile the PTX for the latest supported architecture.
     # You can override the default by specifying a different arch
     # e.g. %ptxas-verify -arch=sm_75.
-    if version_int(version) >= version_int((13, 0)):
-        ptxas_default_arch = 100
-    elif version_int(version) >= version_int((12, 0)):
-        ptxas_default_arch = 90
-    elif version_int(version) >= version_int((11, 0)):
-        ptxas_default_arch = 80
+    # if version_int(version) >= version_int((13, 0)):
+    #     ptxas_default_arch = 100
+    # elif version_int(version) >= version_int((12, 0)):
+    #     ptxas_default_arch = 90
+    if version_int(version) >= version_int((10, 0)):
+        ptxas_default_arch = 75
     else:
-        ptxas_default_arch = 70
+        ptxas_default_arch = 60
 
     if version_int(version) >= version_int((13, 0)):
         ptxas_verify_tool = "%ptxas-verify"
+        config.available_features.add("ptxas")
     else:
         ptxas_verify_tool = "%ptxas-legacy-verify"
+        config.available_features.add("ptxas-legacy")
 
-    config.available_features.add("ptxas")
     tools.extend(
         [
             ToolSubst("%ptxas", ptxas_executable),

>From 2948cdd4fc02cd66d82a60f79bcaacb1bc423348 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 19 Aug 2025 23:28:16 +0000
Subject: [PATCH 4/7] Refactor around extracting info from ptxas

---
 llvm/test/lit.cfg.py | 132 +++++++++++++------------------------------
 1 file changed, 38 insertions(+), 94 deletions(-)

diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index c9d6ca443eca5..540e7b945e713 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -294,105 +294,49 @@ def get_asan_rtlib():
 )
 
 
-# Find (major, minor) version of ptxas
 def ptxas_version(ptxas):
-    ptxas_cmd = subprocess.Popen([ptxas, "--version"], stdout=subprocess.PIPE)
-    ptxas_out = ptxas_cmd.stdout.read().decode("ascii")
-    ptxas_cmd.wait()
-    match = re.search(r"release (\d+)\.(\d+)", ptxas_out)
-    if match:
-        return (int(match.group(1)), int(match.group(2)))
-    print("couldn't determine ptxas version")
-    return None
+    output = subprocess.check_output([ptxas, "--version"], text=True)
+    match = re.search(r"release (\d+)\.(\d+)", output)
+    if not match:
+        raise RuntimeError("Couldn't determine ptxas version")
+    return int(match.group(1)), int(match.group(2))
+
+
+def ptxas_supported_isa_versions(ptxas):
+    """Return list of (major, minor) tuples for supported PTX ISA versions."""
+    result = subprocess.run([ptxas, "--list-version"], capture_output=True, text=True, check=True)
+    versions = []
+    for line in result.stdout.splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        match = re.match(r"(\d+)\.(\d+)", line)
+        if match:
+            versions.append((int(match.group(1)), int(match.group(2))))
+    return versions
+
+
+def ptxas_supported_sms(ptxas_executable):
+    """Extract supported SM architectures from ptxas help output."""
+    result = subprocess.run([ptxas_executable, "--help"], capture_output=True, text=True, check=True)
+    supported_sms = re.findall(r"'sm_(\d+(?:[af]?))'", result.stdout)
+    if not supported_sms:
+        raise RuntimeError("No SM architecture values found in ptxas help output")
+    return supported_sms
 
 
-# Enable %ptxas, %ptxas-verify, and %ptxas-legacy-verify tools.
 def enable_ptxas(ptxas_executable):
-    version = ptxas_version(ptxas_executable)
-    if version:
-        # ptxas is supposed to be backward compatible with previous
-        # versions, so add a feature for every known version prior to
-        # the current one.
-        ptxas_known_versions = [
-            (9, 0),
-            (9, 1),
-            (9, 2),
-            (10, 0),
-            (10, 1),
-            (10, 2),
-            (11, 0),
-            (11, 1),
-            (11, 2),
-            (11, 3),
-            (11, 4),
-            (11, 5),
-            (11, 6),
-            (11, 7),
-            (11, 8),
-            (12, 0),
-            (12, 1),
-            (12, 2),
-            (12, 3),
-            (12, 4),
-            (12, 5),
-            (12, 6),
-            (12, 8),
-            (12, 9),
-            (13, 0),
-            (13, 1),
-        ]
-
-        def version_int(ver):
-            return ver[0] * 100 + ver[1]
-
-        # ignore ptxas if its version is below the minimum supported
-        # version
-        min_version = ptxas_known_versions[0]
-        if version_int(version) < version_int(min_version):
-            print(
-                "Warning: ptxas version {}.{} is not supported".format(
-                    version[0], version[1]
-                )
-            )
-            return
-
-        for known_version in ptxas_known_versions:
-            if version_int(known_version) <= version_int(version):
-                major, minor = known_version
-                if version_int(version) >= version_int((13, 0)):
-                    config.available_features.add("ptxas-{}.{}".format(major, minor))
-                else:
-                    config.available_features.add("ptxas-legacy-{}.{}".format(major, minor))
-                break
-
-    # By default, compile the PTX for the latest supported architecture.
-    # You can override the default by specifying a different arch
-    # e.g. %ptxas-verify -arch=sm_75.
-    # if version_int(version) >= version_int((13, 0)):
-    #     ptxas_default_arch = 100
-    # elif version_int(version) >= version_int((12, 0)):
-    #     ptxas_default_arch = 90
-    if version_int(version) >= version_int((10, 0)):
-        ptxas_default_arch = 75
-    else:
-        ptxas_default_arch = 60
+    config.available_features.add("ptxas")
+    tools.append(ToolSubst("%ptxas", ptxas_executable))
 
-    if version_int(version) >= version_int((13, 0)):
-        ptxas_verify_tool = "%ptxas-verify"
-        config.available_features.add("ptxas")
-    else:
-        ptxas_verify_tool = "%ptxas-legacy-verify"
-        config.available_features.add("ptxas-legacy")
-
-    tools.extend(
-        [
-            ToolSubst("%ptxas", ptxas_executable),
-            ToolSubst(
-                ptxas_verify_tool,
-                "{} -arch=sm_{} -c -".format(ptxas_executable, ptxas_default_arch),
-            ),
-        ]
-    )
+    major_version, minor_version = ptxas_version(ptxas_executable)
+    config.available_features.add("ptxas-{}.{}".format(major_version, minor_version))
+
+    for major_version, minor_version in ptxas_supported_isa_versions(ptxas_executable):
+        config.available_features.add("ptxas-isa-v{}-{}".format(major_version, minor_version))
+
+    for sm in ptxas_supported_sms(ptxas):
+        config.available_features.add("ptxas-sm-{}".format(sm))
 
 
 ptxas_executable = (

>From 55fdd9a5a0c443afd1c64921e012adc68ea76934 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <justinfargnoli at gmail.com>
Date: Tue, 19 Aug 2025 16:32:05 -0700
Subject: [PATCH 5/7] Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
 llvm/test/lit.cfg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 540e7b945e713..3b1b0a693b1fb 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -335,7 +335,7 @@ def enable_ptxas(ptxas_executable):
     for major_version, minor_version in ptxas_supported_isa_versions(ptxas_executable):
         config.available_features.add("ptxas-isa-v{}-{}".format(major_version, minor_version))
 
-    for sm in ptxas_supported_sms(ptxas):
+    for sm in ptxas_supported_sms(ptxas_executable):
         config.available_features.add("ptxas-sm-{}".format(sm))
 
 

>From ba1afdbebafb637715138610991708581c9ee6cd Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Wed, 20 Aug 2025 00:22:08 +0000
Subject: [PATCH 6/7] Update tests

---
 llvm/test/CodeGen/NVPTX/access-non-generic.ll |  8 +++---
 llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll   |  2 +-
 llvm/test/CodeGen/NVPTX/atomics-sm60.ll       |  4 +--
 llvm/test/CodeGen/NVPTX/atomics-sm70.ll       |  6 ++---
 llvm/test/CodeGen/NVPTX/atomics-with-scope.ll |  4 +--
 llvm/test/CodeGen/NVPTX/b52037.ll             |  2 +-
 llvm/test/CodeGen/NVPTX/bmsk.ll               |  2 +-
 .../test/CodeGen/NVPTX/byval-arg-vectorize.ll |  2 +-
 llvm/test/CodeGen/NVPTX/byval-const-global.ll |  2 +-
 llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll       |  2 +-
 llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll       |  2 +-
 llvm/test/CodeGen/NVPTX/cmpxchg.ll            |  2 +-
 llvm/test/CodeGen/NVPTX/f16-abs.ll            |  8 +++---
 llvm/test/CodeGen/NVPTX/f16-instructions.ll   |  8 +++---
 llvm/test/CodeGen/NVPTX/f16x2-instructions.ll |  6 ++---
 llvm/test/CodeGen/NVPTX/fence-nocluster.ll    |  6 ++---
 llvm/test/CodeGen/NVPTX/i16x2-instructions.ll |  4 +--
 llvm/test/CodeGen/NVPTX/kernel-param-align.ll |  2 +-
 llvm/test/CodeGen/NVPTX/match.ll              |  2 +-
 .../CodeGen/NVPTX/math-intrins-sm53-ptx42.ll  |  2 +-
 llvm/test/CodeGen/NVPTX/nanosleep.ll          |  2 +-
 llvm/test/CodeGen/NVPTX/pr126337.ll           |  2 +-
 llvm/test/CodeGen/NVPTX/szext.ll              |  2 +-
 llvm/test/CodeGen/NVPTX/trunc-setcc.ll        |  2 +-
 llvm/test/CodeGen/NVPTX/trunc-tofp.ll         |  2 +-
 llvm/test/CodeGen/NVPTX/wmma-ptx60-sm70.py    |  2 +-
 llvm/test/lit.cfg.py                          | 25 +++++++++++++------
 27 files changed, 62 insertions(+), 51 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/access-non-generic.ll b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
index 601a35288f54d..a723ab1cae20c 100644
--- a/llvm/test/CodeGen/NVPTX/access-non-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix PTX
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix PTX
+; RUN: llc < %s -mtriple=nvptx | FileCheck %s --check-prefix PTX
+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s --check-prefix PTX
 ; RUN: opt -mtriple=nvptx-- < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix IR
 ; RUN: opt -mtriple=nvptx64-- < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix IR
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
 
 @array = internal addrspace(3) global [10 x float] zeroinitializer, align 4
 @scalar = internal addrspace(3) global float 0.000000e+00, align 4
diff --git a/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll b/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll
index 94b3f0a2e1c3e..88fae7a3f78a0 100644
--- a/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll
+++ b/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefixes=ALL,SM30
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s --check-prefixes=ALL,SM60
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_30 | %ptxas-verify %}
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
 
 ; CHECK-LABEL: fadd_double
 define void @fadd_double(ptr %0, double %1) {
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm60.ll b/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
index 2e11323d1b3e1..d90dcaf3400b0 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
 
 ; CHECK-LABEL: .func test(
 define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, double %d) {
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
index f710d7f883a1b..82d652fb26588 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
@@ -2,9 +2,9 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=CHECK
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=CHECK64
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx62 | FileCheck %s --check-prefixes=CHECKPTX62
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx62 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx62 | %ptxas-verify -arch=sm_70 %}
 
 target triple = "nvptx64-nvidia-cuda"
 
diff --git a/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll b/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
index e6636d706b49d..94c416f5791b2 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s -check-prefixes=CHECK,CHECK32
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
 
 ; CHECK-LABEL: .func test_atomics_scope(
 define void @test_atomics_scope(ptr %fp, float %f,
diff --git a/llvm/test/CodeGen/NVPTX/b52037.ll b/llvm/test/CodeGen/NVPTX/b52037.ll
index b6317dfb28597..268a8972ebd22 100644
--- a/llvm/test/CodeGen/NVPTX/b52037.ll
+++ b/llvm/test/CodeGen/NVPTX/b52037.ll
@@ -4,7 +4,7 @@
 ; https://bugs.llvm.org/show_bug.cgi?id=52037 for the gory details.
 ;
 ; RUN: llc -mtriple=nvptx64-nvidia-cuda -mcpu=sm_70 -O3 -o - %s | FileCheck %s
-; RUN: %if ptxas %{ llc -mtriple=nvptx64-nvidia-cuda -mcpu=sm_70 -O3 -o - %s | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc -mtriple=nvptx64-nvidia-cuda -mcpu=sm_70 -O3 -o - %s | %ptxas-verify -arch=sm_70 %}
 
 ; CHECK-LABEL: .visible .entry barney(
 ; CHECK-NOT:  .local{{.*}}__local_depot
diff --git a/llvm/test/CodeGen/NVPTX/bmsk.ll b/llvm/test/CodeGen/NVPTX/bmsk.ll
index d5b278657bd52..530a78cb14102 100644
--- a/llvm/test/CodeGen/NVPTX/bmsk.ll
+++ b/llvm/test/CodeGen/NVPTX/bmsk.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -o - < %s -mcpu=sm_70 -mattr=+ptx76 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_70 -mattr=+ptx76 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -mcpu=sm_70 -mattr=+ptx76 | %ptxas-verify -arch=sm_70 %}
 
 target triple = "nvptx64-unknown-cuda"
 
diff --git a/llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll b/llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll
index 9988d5b122cc1..e7f71f4ad52ea 100644
--- a/llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll
+++ b/llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mcpu=sm_70 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_70 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -mcpu=sm_70 | %ptxas-verify -arch=sm_70 %}
 
 target triple = "nvptx64-nvidia-cuda"
 
diff --git a/llvm/test/CodeGen/NVPTX/byval-const-global.ll b/llvm/test/CodeGen/NVPTX/byval-const-global.ll
index b4934e1a94d1b..81e7edfd8602e 100644
--- a/llvm/test/CodeGen/NVPTX/byval-const-global.ll
+++ b/llvm/test/CodeGen/NVPTX/byval-const-global.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mcpu=sm_70 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_70 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -mcpu=sm_70 | %ptxas-verify -arch=sm_70 %}
 
 target triple = "nvptx64-nvidia-cuda"
 
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
index 63c389c36e87e..3e784440dbdb6 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | FileCheck %s --check-prefix=SM60
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 %{ llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | %ptxas-verify -arch=sm_60 %}
 
 define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM60-LABEL: monotonic_monotonic_i8_global_cta(
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
index 5cb344d5ded84..8b435c28c558f 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefix=SM70
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
 
 define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM70-LABEL: monotonic_monotonic_i8_global_cta(
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg.ll b/llvm/test/CodeGen/NVPTX/cmpxchg.ll
index 237e42394ba2f..f8ec071bd0a1f 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_32 | FileCheck %s --check-prefixes=SM30,CHECK
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_32 | %ptxas-verify %}
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=SM70,CHECK
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
 
 ; TODO: these are system scope, but are compiled to gpu scope..
 ; TODO: these are seq_cst, but are compiled to relaxed..
diff --git a/llvm/test/CodeGen/NVPTX/f16-abs.ll b/llvm/test/CodeGen/NVPTX/f16-abs.ll
index 4025b38c0f0e4..f5354a33a2c7a 100644
--- a/llvm/test/CodeGen/NVPTX/f16-abs.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-abs.ll
@@ -4,7 +4,7 @@
 ; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx60                                      \
 ; RUN:          -O0 -disable-post-ra -verify-machineinstrs                     \
 ; RUN: | FileCheck -check-prefix CHECK-NOF16 %s
-; RUN: %if ptxas %{                                                            \
+; RUN: %if ptxas-sm_53 %{                                                            \
 ; RUN:   llc < %s -mcpu=sm_53 -mattr=+ptx60                                    \
 ; RUN:            -O0 -disable-post-ra -verify-machineinstrs                   \
 ; RUN:   | %ptxas-verify -arch=sm_53                                           \
@@ -14,7 +14,7 @@
 ; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx65 --nvptx-no-f16-math                  \
 ; RUN:          -O0 -disable-post-ra -verify-machineinstrs                     \
 ; RUN: | FileCheck -check-prefix CHECK-NOF16 %s
-; RUN: %if ptxas %{                                                            \
+; RUN: %if ptxas-sm_53 %{                                                            \
 ; RUN:   llc < %s -mcpu=sm_53 -mattr=+ptx65 --nvptx-no-f16-math                \
 ; RUN:            -O0 -disable-post-ra -verify-machineinstrs                   \
 ; RUN:   | %ptxas-verify -arch=sm_53                                           \
@@ -24,7 +24,7 @@
 ; RUN: llc < %s -mcpu=sm_52 -mattr=+ptx65                                      \
 ; RUN:          -O0 -disable-post-ra -verify-machineinstrs                     \
 ; RUN: | FileCheck -check-prefix CHECK-NOF16 %s
-; RUN: %if ptxas %{                                                            \
+; RUN: %if ptxas-sm_52 %{                                                            \
 ; RUN:   llc < %s -mcpu=sm_52 -mattr=+ptx65                                    \
 ; RUN:          -O0 -disable-post-ra -verify-machineinstrs                     \
 ; RUN:   | %ptxas-verify -arch=sm_52                                           \
@@ -34,7 +34,7 @@
 ; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx65                                      \
 ; RUN:          -O0 -disable-post-ra -verify-machineinstrs                     \
 ; RUN: | FileCheck -check-prefix CHECK-F16-ABS %s
-; RUN: %if ptxas %{                                                            \
+; RUN: %if ptxas-sm_53 %{                                                            \
 ; RUN:   llc < %s -mcpu=sm_53 -mattr=+ptx65                                    \
 ; RUN:          -O0 -disable-post-ra -verify-machineinstrs                     \
 ; RUN:   | %ptxas-verify -arch=sm_53                                           \
diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index d4aec4f16f1ab..dd6839bdd440a 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -3,7 +3,7 @@
 ; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
 ; RUN:          -mattr=+ptx60                                                 \
 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-F16-NOFTZ %s
-; RUN: %if ptxas %{                                                           \
+; RUN: %if ptxas-sm_53 && ptxas-isa-v6.0 %{                                                           \
 ; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
 ; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
 ; RUN:          -mattr=+ptx60                                                 \
@@ -14,7 +14,7 @@
 ; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
 ; RUN:          -denormal-fp-math-f32=preserve-sign -mattr=+ptx60             \
 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16-FTZ %s
-; RUN: %if ptxas %{                                                           \
+; RUN: %if ptxas-sm_53 && ptxas-isa-v6.0 %{                                                           \
 ; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
 ; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
 ; RUN:          -denormal-fp-math-f32=preserve-sign -mattr=+ptx60             \
@@ -25,7 +25,7 @@
 ; RUN:          -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \
 ; RUN:          -verify-machineinstrs -mattr=+ptx60                         \
 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-NOF16 %s
-; RUN: %if ptxas %{                                                           \
+; RUN: %if ptxas-sm_53 %{                                                           \
 ; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
 ; RUN:          -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math   \
 ; RUN:   | %ptxas-verify -arch=sm_53                                          \
@@ -34,7 +34,7 @@
 ; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
 ; RUN:          -disable-post-ra -frame-pointer=all -verify-machineinstrs \
 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-NOF16 %s
-; RUN: %if ptxas %{                                                               \
+; RUN: %if ptxas-sm_52 %{                                                               \
 ; RUN:   llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
 ; RUN:          -disable-post-ra -frame-pointer=all -verify-machineinstrs         \
 ; RUN:   | %ptxas-verify -arch=sm_52                                              \
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index 991311f9492b9..368f9fe14d026 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \
 ; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-F16 %s
-; RUN: %if ptxas %{                                                           \
+; RUN: %if ptxas-sm_53 %{                                                           \
 ; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \
 ; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
 ; RUN:   | %ptxas-verify -arch=sm_53                                          \
@@ -13,7 +13,7 @@
 ; RUN:          -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \
 ; RUN:           -verify-machineinstrs \
 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s
-; RUN: %if ptxas %{                                                           \
+; RUN: %if ptxas-sm_53 %{                                                           \
 ; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \
 ; RUN:          -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math   \
 ; RUN:           -verify-machineinstrs                                        \
@@ -23,7 +23,7 @@
 ; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 \
 ; RUN:          -disable-post-ra -frame-pointer=all -verify-machineinstrs \
 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s
-; RUN: %if ptxas %{                                                               \
+; RUN: %if ptxas-sm_52 %{                                                               \
 ; RUN:   llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 \
 ; RUN:          -disable-post-ra -frame-pointer=all -verify-machineinstrs         \
 ; RUN:   | %ptxas-verify -arch=sm_52                                              \
diff --git a/llvm/test/CodeGen/NVPTX/fence-nocluster.ll b/llvm/test/CodeGen/NVPTX/fence-nocluster.ll
index 1c6c1744b5375..ffffe96f0e63f 100644
--- a/llvm/test/CodeGen/NVPTX/fence-nocluster.ll
+++ b/llvm/test/CodeGen/NVPTX/fence-nocluster.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -mattr=+ptx50 | FileCheck %s --check-prefix=SM30
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_35 -mattr=+ptx50 | %ptxas-verify -arch=sm_35 %}
+; RUN: %if ptxas-sm_35 && !ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_35 -mattr=+ptx50 | %ptxas-verify -arch=sm_35 %}
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s --check-prefix=SM70
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %}
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | FileCheck %s --check-prefix=SM90
-; RUN: %if ptxas-12.8 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify -arch=sm_90 %}
 
 define void @fence_acquire_sys() {
 ; SM30-LABEL: fence_acquire_sys(
diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
index 74136bbe478c9..c5bb4af954427 100644
--- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80        \
 ; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes COMMON,I16x2 %s
-; RUN: %if ptxas %{                                                           \
+; RUN: %if ptxas-sm_90 %{                                                           \
 ; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90                    \
 ; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
 ; RUN:   | %ptxas-verify -arch=sm_90                                          \
@@ -12,7 +12,7 @@
 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53                      \
 ; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
 ; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes COMMON,NO-I16x2 %s
-; RUN: %if ptxas %{                                                           \
+; RUN: %if ptxas-sm_53 %{                                                           \
 ; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53                    \
 ; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
 ; RUN:   | %ptxas-verify -arch=sm_53                                          \
diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
index a56b85de80143..b66b843f4b838 100644
--- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas -arch=sm_60 - %}
+; RUN: %if ptxas-sm_60 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas -arch=sm_60 - %}
 
 %struct.Large = type { [16 x double] }
 
diff --git a/llvm/test/CodeGen/NVPTX/match.ll b/llvm/test/CodeGen/NVPTX/match.ll
index ae01b0d3cc7e0..b2200ab6e1f3f 100644
--- a/llvm/test/CodeGen/NVPTX/match.ll
+++ b/llvm/test/CodeGen/NVPTX/match.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %}
 
 declare i32 @llvm.nvvm.match.any.sync.i32(i32, i32)
 declare i32 @llvm.nvvm.match.any.sync.i64(i32, i64)
diff --git a/llvm/test/CodeGen/NVPTX/math-intrins-sm53-ptx42.ll b/llvm/test/CodeGen/NVPTX/math-intrins-sm53-ptx42.ll
index 236bf67f81821..462334d9bc6e7 100644
--- a/llvm/test/CodeGen/NVPTX/math-intrins-sm53-ptx42.ll
+++ b/llvm/test/CodeGen/NVPTX/math-intrins-sm53-ptx42.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_53 -mattr=+ptx42 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_53 -mattr=+ptx42 | %ptxas-verify -arch=sm_53 %}
+; RUN: %if ptxas-sm_53 && ptxas-isa-v4.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_53 -mattr=+ptx42 | %ptxas-verify -arch=sm_53 %}
 
 declare half @llvm.nvvm.fma.rn.f16(half, half, half)
 declare half @llvm.nvvm.fma.rn.ftz.f16(half, half, half)
diff --git a/llvm/test/CodeGen/NVPTX/nanosleep.ll b/llvm/test/CodeGen/NVPTX/nanosleep.ll
index de08c9fbdf417..148ccd72483db 100644
--- a/llvm/test/CodeGen/NVPTX/nanosleep.ll
+++ b/llvm/test/CodeGen/NVPTX/nanosleep.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=nvptx64 -O2 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 && ptxas-isa-v6.3 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
 
 declare void @llvm.nvvm.nanosleep(i32)
 
diff --git a/llvm/test/CodeGen/NVPTX/pr126337.ll b/llvm/test/CodeGen/NVPTX/pr126337.ll
index 95258f7a3f360..6caef744373f3 100644
--- a/llvm/test/CodeGen/NVPTX/pr126337.ll
+++ b/llvm/test/CodeGen/NVPTX/pr126337.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 | %ptxas -arch=sm_70 -c - %}
+; RUN: %if ptxas-sm_70 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 | %ptxas -arch=sm_70 -c - %}
 
 ; This IR should compile without triggering assertions in LICM
 ; when the CopyToReg from %0 in the first BB gets eliminated
diff --git a/llvm/test/CodeGen/NVPTX/szext.ll b/llvm/test/CodeGen/NVPTX/szext.ll
index 5a4fe4ed7fc0b..6a30396215548 100644
--- a/llvm/test/CodeGen/NVPTX/szext.ll
+++ b/llvm/test/CodeGen/NVPTX/szext.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -o - < %s -mcpu=sm_70 -mattr=+ptx76 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_70 -mattr=+ptx76 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 && ptxas-isa-v7.6 %{ llc < %s -mcpu=sm_70 -mattr=+ptx76 | %ptxas-verify -arch=sm_70 %}
 
 target triple = "nvptx64-unknown-cuda"
 
diff --git a/llvm/test/CodeGen/NVPTX/trunc-setcc.ll b/llvm/test/CodeGen/NVPTX/trunc-setcc.ll
index f22e37e203966..f6a1c6bb60d6d 100644
--- a/llvm/test/CodeGen/NVPTX/trunc-setcc.ll
+++ b/llvm/test/CodeGen/NVPTX/trunc-setcc.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mcpu=sm_50 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_50 | %ptxas-verify -arch=sm_50 %}
+; RUN: %if ptxas-sm_50 %{ llc < %s -mcpu=sm_50 | %ptxas-verify -arch=sm_50 %}
 
 target triple = "nvptx64-nvidia-cuda"
 
diff --git a/llvm/test/CodeGen/NVPTX/trunc-tofp.ll b/llvm/test/CodeGen/NVPTX/trunc-tofp.ll
index 12502b6f29899..99a1e8a0630a8 100644
--- a/llvm/test/CodeGen/NVPTX/trunc-tofp.ll
+++ b/llvm/test/CodeGen/NVPTX/trunc-tofp.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mcpu=sm_50 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mcpu=sm_50 | %ptxas-verify -arch=sm_50 %}
+; RUN: %if ptxas-sm_50 %{ llc < %s -mcpu=sm_50 | %ptxas-verify -arch=sm_50 %}
 
 target triple = "nvptx64-nvidia-cuda"
 
diff --git a/llvm/test/CodeGen/NVPTX/wmma-ptx60-sm70.py b/llvm/test/CodeGen/NVPTX/wmma-ptx60-sm70.py
index bc441bfa8180f..06094fcafc1cc 100644
--- a/llvm/test/CodeGen/NVPTX/wmma-ptx60-sm70.py
+++ b/llvm/test/CodeGen/NVPTX/wmma-ptx60-sm70.py
@@ -6,7 +6,7 @@
 # RUN:           --check-prefixes=INTRINSICS,NOEXTGEOM,NOINT,NOSUBINT,NOMMA,NODOUBLE,NOALTFLOAT,NOLDMATRIX
 # RUN: llc < %t-ptx60-sm_70.ll -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx60 \
 # RUN:           | FileCheck %t-ptx60-sm_70.ll
-# RUN: %if ptxas %{                                                       \
+# RUN: %if ptxas-sm_70 && ptxas-isa-v6.0 %{                                                       \
 # RUN: llc < %t-ptx60-sm_70.ll -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx60 \
 # RUN:           | %ptxas-verify -arch=sm_70                              \
 # RUN: %}
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 3b1b0a693b1fb..a0c48681d72f2 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -303,8 +303,12 @@ def ptxas_version(ptxas):
 
 
 def ptxas_supported_isa_versions(ptxas):
-    """Return list of (major, minor) tuples for supported PTX ISA versions."""
-    result = subprocess.run([ptxas, "--list-version"], capture_output=True, text=True, check=True)
+    result = subprocess.run(
+        [ptxas, "--list-version"],
+        capture_output=True,
+        text=True,
+        check=True,
+    )
     versions = []
     for line in result.stdout.splitlines():
         line = line.strip()
@@ -317,8 +321,12 @@ def ptxas_supported_isa_versions(ptxas):
 
 
 def ptxas_supported_sms(ptxas_executable):
-    """Extract supported SM architectures from ptxas help output."""
-    result = subprocess.run([ptxas_executable, "--help"], capture_output=True, text=True, check=True)
+    result = subprocess.run(
+        [ptxas_executable, "--help"],
+        capture_output=True,
+        text=True,
+        check=True,
+    )
     supported_sms = re.findall(r"'sm_(\d+(?:[af]?))'", result.stdout)
     if not supported_sms:
         raise RuntimeError("No SM architecture values found in ptxas help output")
@@ -327,16 +335,19 @@ def ptxas_supported_sms(ptxas_executable):
 
 def enable_ptxas(ptxas_executable):
     config.available_features.add("ptxas")
-    tools.append(ToolSubst("%ptxas", ptxas_executable))
+    tools.extend([
+        ToolSubst("%ptxas", ptxas_executable),
+        ToolSubst("%ptxas-verify", f"{ptxas_executable} -c -"),
+    ])
 
     major_version, minor_version = ptxas_version(ptxas_executable)
     config.available_features.add("ptxas-{}.{}".format(major_version, minor_version))
 
     for major_version, minor_version in ptxas_supported_isa_versions(ptxas_executable):
-        config.available_features.add("ptxas-isa-v{}-{}".format(major_version, minor_version))
+        config.available_features.add("ptxas-isa-v{}.{}".format(major_version, minor_version))
 
     for sm in ptxas_supported_sms(ptxas_executable):
-        config.available_features.add("ptxas-sm-{}".format(sm))
+        config.available_features.add("ptxas-sm_{}".format(sm))
 
 
 ptxas_executable = (

>From cde24e36b62f54791746f0d67eec514fac473386 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Wed, 20 Aug 2025 21:47:17 +0000
Subject: [PATCH 7/7] Explicitly check for 32 bit address size support

---
 llvm/test/CodeGen/NVPTX/access-non-generic.ll |  2 +-
 llvm/test/CodeGen/NVPTX/addrspacecast.ll      |  2 +-
 llvm/test/CodeGen/NVPTX/annotations.ll        |  2 +-
 llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll |  2 +-
 llvm/test/CodeGen/NVPTX/arithmetic-int.ll     |  2 +-
 llvm/test/CodeGen/NVPTX/atomics-sm60.ll       |  2 +-
 llvm/test/CodeGen/NVPTX/atomics-sm70.ll       |  4 ++--
 llvm/test/CodeGen/NVPTX/atomics-sm90.ll       |  4 ++--
 llvm/test/CodeGen/NVPTX/atomics-with-scope.ll |  2 +-
 llvm/test/CodeGen/NVPTX/calling-conv.ll       |  2 +-
 llvm/test/CodeGen/NVPTX/combine-mad.ll        |  2 +-
 llvm/test/CodeGen/NVPTX/compare-int.ll        |  2 +-
 llvm/test/CodeGen/NVPTX/convert-fp.ll         |  2 +-
 llvm/test/CodeGen/NVPTX/convert-int-sm20.ll   |  2 +-
 llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll |  2 +-
 llvm/test/CodeGen/NVPTX/fence-nocluster.ll    |  2 +-
 llvm/test/CodeGen/NVPTX/fma-disable.ll        |  4 ++--
 llvm/test/CodeGen/NVPTX/global-addrspace.ll   |  2 +-
 llvm/test/CodeGen/NVPTX/global-ordering.ll    |  2 +-
 llvm/test/CodeGen/NVPTX/idioms.ll             |  2 +-
 llvm/test/CodeGen/NVPTX/intrinsic-old.ll      |  2 +-
 llvm/test/CodeGen/NVPTX/intrinsics.ll         |  2 +-
 llvm/test/CodeGen/NVPTX/ld-addrspace.ll       |  2 +-
 llvm/test/CodeGen/NVPTX/ld-generic.ll         |  2 +-
 llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py   |  2 +-
 llvm/test/CodeGen/NVPTX/local-stack-frame.ll  |  2 +-
 llvm/test/CodeGen/NVPTX/mbarrier.ll           |  2 +-
 llvm/test/CodeGen/NVPTX/nofunc.ll             |  2 +-
 llvm/test/CodeGen/NVPTX/packed-aggr.ll        |  2 +-
 llvm/test/CodeGen/NVPTX/param-overalign.ll    |  2 +-
 llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll   |  2 +-
 llvm/test/CodeGen/NVPTX/reg-types.ll          |  2 +-
 llvm/test/CodeGen/NVPTX/short-ptr.ll          |  2 +-
 llvm/test/CodeGen/NVPTX/simple-call.ll        |  2 +-
 llvm/test/CodeGen/NVPTX/st-addrspace.ll       |  2 +-
 llvm/test/CodeGen/NVPTX/st-generic.ll         |  2 +-
 llvm/test/CodeGen/NVPTX/st-param-imm.ll       |  2 +-
 llvm/test/CodeGen/NVPTX/symbol-naming.ll      |  2 +-
 llvm/test/CodeGen/NVPTX/unreachable.ll        |  2 +-
 llvm/test/CodeGen/NVPTX/vaargs.ll             |  2 +-
 llvm/test/CodeGen/NVPTX/vector-compare.ll     |  2 +-
 llvm/test/CodeGen/NVPTX/vector-select.ll      |  2 +-
 llvm/test/lit.cfg.py                          | 17 +++++++++++++++++
 43 files changed, 62 insertions(+), 45 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/access-non-generic.ll b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
index a723ab1cae20c..ff9a07b8d760e 100644
--- a/llvm/test/CodeGen/NVPTX/access-non-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s --check-prefix PTX
 ; RUN: opt -mtriple=nvptx-- < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix IR
 ; RUN: opt -mtriple=nvptx64-- < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix IR
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
 
 @array = internal addrspace(3) global [10 x float] zeroinitializer, align 4
diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast.ll b/llvm/test/CodeGen/NVPTX/addrspacecast.ll
index 86008a1b70058..546d22ca0e691 100644
--- a/llvm/test/CodeGen/NVPTX/addrspacecast.ll
+++ b/llvm/test/CodeGen/NVPTX/addrspacecast.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,CLS32
 ; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,NOPTRCONV,CLS64
 ; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | FileCheck %s -check-prefixes=ALL,PTRCONV,CLS64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %}
 
diff --git a/llvm/test/CodeGen/NVPTX/annotations.ll b/llvm/test/CodeGen/NVPTX/annotations.ll
index 5360e8988777b..e4aa0552e8420 100644
--- a/llvm/test/CodeGen/NVPTX/annotations.ll
+++ b/llvm/test/CodeGen/NVPTX/annotations.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 @texture = internal addrspace(1) global i64 0, align 8
diff --git a/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll b/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
index ce71d3a78c0de..e88d0396f0858 100644
--- a/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
+++ b/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
 
 ;; These tests should run for all targets
diff --git a/llvm/test/CodeGen/NVPTX/arithmetic-int.ll b/llvm/test/CodeGen/NVPTX/arithmetic-int.ll
index 1fbfd0a987d7a..9e41e9e240902 100644
--- a/llvm/test/CodeGen/NVPTX/arithmetic-int.ll
+++ b/llvm/test/CodeGen/NVPTX/arithmetic-int.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ;; These tests should run for all targets
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm60.ll b/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
index d90dcaf3400b0..5a7a1823cb2a0 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas-sm_60 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 && ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
 ; RUN: %if ptxas-sm_60 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
 
 ; CHECK-LABEL: .func test(
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
index 82d652fb26588..abb0539f69c15 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
@@ -2,9 +2,9 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=CHECK
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=CHECK64
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx62 | FileCheck %s --check-prefixes=CHECKPTX62
-; RUN: %if ptxas-sm_70 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 && ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
 ; RUN: %if ptxas-sm_70 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
-; RUN: %if ptxas-sm_70 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx62 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 && ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_70 -mattr=+ptx62 | %ptxas-verify -arch=sm_70 %}
 
 target triple = "nvptx64-nvidia-cuda"
 
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
index f96fd30019025..62f60dc2a2631 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
@@ -2,9 +2,9 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_90 -mattr=+ptx78 | FileCheck %s --check-prefixes=CHECK
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | FileCheck %s --check-prefixes=CHECK64
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_86 -mattr=+ptx71 | FileCheck %s --check-prefixes=CHECKPTX71
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %}
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_86 -mattr=+ptx71 | %ptxas-verify -arch=sm_86 %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_86 -mattr=+ptx71 | %ptxas-verify -arch=sm_86 %}
 
 target triple = "nvptx64-nvidia-cuda"
 
diff --git a/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll b/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
index 94c416f5791b2..9e30519b31cc3 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s -check-prefixes=CHECK,CHECK32
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas-sm_60 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 && ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
 ; RUN: %if ptxas-sm_60 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
 
 ; CHECK-LABEL: .func test_atomics_scope(
diff --git a/llvm/test/CodeGen/NVPTX/calling-conv.ll b/llvm/test/CodeGen/NVPTX/calling-conv.ll
index 74b99efcdadf7..be0a1e714bd50 100644
--- a/llvm/test/CodeGen/NVPTX/calling-conv.ll
+++ b/llvm/test/CodeGen/NVPTX/calling-conv.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 
diff --git a/llvm/test/CodeGen/NVPTX/combine-mad.ll b/llvm/test/CodeGen/NVPTX/combine-mad.ll
index da303b7c38eb7..d39ab9e89d6a4 100644
--- a/llvm/test/CodeGen/NVPTX/combine-mad.ll
+++ b/llvm/test/CodeGen/NVPTX/combine-mad.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 -O1 | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O1 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -O1 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -O1 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O1 | %ptxas-verify %}
 
 define i32 @test1(i32 %n, i32 %m) {
diff --git a/llvm/test/CodeGen/NVPTX/compare-int.ll b/llvm/test/CodeGen/NVPTX/compare-int.ll
index 9338172d024ce..ccc8cff7be4b3 100644
--- a/llvm/test/CodeGen/NVPTX/compare-int.ll
+++ b/llvm/test/CodeGen/NVPTX/compare-int.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ;; These tests should run for all targets
diff --git a/llvm/test/CodeGen/NVPTX/convert-fp.ll b/llvm/test/CodeGen/NVPTX/convert-fp.ll
index debaadedce09a..4b33cd66ce8f0 100644
--- a/llvm/test/CodeGen/NVPTX/convert-fp.ll
+++ b/llvm/test/CodeGen/NVPTX/convert-fp.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 define i16 @cvt_u16_f32(float %x) {
diff --git a/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll b/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll
index a2fc8da3f1e61..2f3da349a3e05 100644
--- a/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll
+++ b/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ;; Integer conversions happen inplicitly by loading/storing the proper types
diff --git a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
index 06fb8d2c7c54d..ac4dad50e1d21 100644
--- a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
+++ b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
@@ -4,7 +4,7 @@
 
 ; RUN: llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK-32
 ; RUN: llc < %s -mtriple=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK-64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %}
 
 ; CHECK-FAILS: in function test_dynamic_stackalloc{{.*}}: Support for dynamic alloca introduced in PTX ISA version 7.3 and requires target sm_52.
diff --git a/llvm/test/CodeGen/NVPTX/fence-nocluster.ll b/llvm/test/CodeGen/NVPTX/fence-nocluster.ll
index ffffe96f0e63f..3c7b0fbece97a 100644
--- a/llvm/test/CodeGen/NVPTX/fence-nocluster.ll
+++ b/llvm/test/CodeGen/NVPTX/fence-nocluster.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -mattr=+ptx50 | FileCheck %s --check-prefix=SM30
-; RUN: %if ptxas-sm_35 && !ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_35 -mattr=+ptx50 | %ptxas-verify -arch=sm_35 %}
+; RUN: %if ptxas-sm_35 && ptxas-32 %{ llc < %s -march=nvptx64 -mcpu=sm_35 -mattr=+ptx50 | %ptxas-verify -arch=sm_35 %}
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s --check-prefix=SM70
 ; RUN: %if ptxas-sm_70 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %}
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | FileCheck %s --check-prefix=SM90
diff --git a/llvm/test/CodeGen/NVPTX/fma-disable.ll b/llvm/test/CodeGen/NVPTX/fma-disable.ll
index 0038b4b65e0f9..344fae6673f81 100644
--- a/llvm/test/CodeGen/NVPTX/fma-disable.ll
+++ b/llvm/test/CodeGen/NVPTX/fma-disable.ll
@@ -2,8 +2,8 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -nvptx-fma-level=1 | FileCheck %s -check-prefix=FMA
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -nvptx-fma-level=1 | %ptxas-verify %}
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -nvptx-fma-level=1 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -nvptx-fma-level=1 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -nvptx-fma-level=0 | %ptxas-verify %}
 
diff --git a/llvm/test/CodeGen/NVPTX/global-addrspace.ll b/llvm/test/CodeGen/NVPTX/global-addrspace.ll
index 3f9d321ab4406..7cfde62bd3f18 100644
--- a/llvm/test/CodeGen/NVPTX/global-addrspace.ll
+++ b/llvm/test/CodeGen/NVPTX/global-addrspace.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ; PTX32: .visible .global .align 4 .u32 i;
diff --git a/llvm/test/CodeGen/NVPTX/global-ordering.ll b/llvm/test/CodeGen/NVPTX/global-ordering.ll
index 2815cff7d7b41..145d21c712eb6 100644
--- a/llvm/test/CodeGen/NVPTX/global-ordering.ll
+++ b/llvm/test/CodeGen/NVPTX/global-ordering.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ; Make sure we emit these globals in def-use order
diff --git a/llvm/test/CodeGen/NVPTX/idioms.ll b/llvm/test/CodeGen/NVPTX/idioms.ll
index a3bf8922a98f4..a3df0995fb635 100644
--- a/llvm/test/CodeGen/NVPTX/idioms.ll
+++ b/llvm/test/CodeGen/NVPTX/idioms.ll
@@ -3,7 +3,7 @@
 
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 %struct.S16 = type { i16, i16 }
diff --git a/llvm/test/CodeGen/NVPTX/intrinsic-old.ll b/llvm/test/CodeGen/NVPTX/intrinsic-old.ll
index f595df837f91f..1449e7fbd4186 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsic-old.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsic-old.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 define ptx_device i32 @test_tid_x() {
diff --git a/llvm/test/CodeGen/NVPTX/intrinsics.ll b/llvm/test/CodeGen/NVPTX/intrinsics.ll
index 4ed50632251cb..872c921eb7351 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsics.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsics.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s --check-prefixes=CHECK,CHECK32
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s --check-prefixes=CHECK,CHECK64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify %}
 
 define float @test_fabsf(float %f) {
diff --git a/llvm/test/CodeGen/NVPTX/ld-addrspace.ll b/llvm/test/CodeGen/NVPTX/ld-addrspace.ll
index 24071b48143f2..590a7099ec7ee 100644
--- a/llvm/test/CodeGen/NVPTX/ld-addrspace.ll
+++ b/llvm/test/CodeGen/NVPTX/ld-addrspace.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G32,LS32
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G64,LS64
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | FileCheck %s --check-prefixes=G64,LS32
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %}
 
diff --git a/llvm/test/CodeGen/NVPTX/ld-generic.ll b/llvm/test/CodeGen/NVPTX/ld-generic.ll
index ee304ca1601f4..de1ee13c91686 100644
--- a/llvm/test/CodeGen/NVPTX/ld-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/ld-generic.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 
diff --git a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py
index 2fa4c89f4d71c..b8d53ae0e5ee5 100644
--- a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py
+++ b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py
@@ -4,7 +4,7 @@
 # RUN: %python %s > %t.ll
 # RUN: llc < %t.ll -mtriple=nvptx -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P32 %t.ll
 # RUN: llc < %t.ll -mtriple=nvptx64 -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P64 %t.ll
-# RUN: %if ptxas && !ptxas-12.0 %{ llc < %t.ll -mtriple=nvptx -mcpu=sm_30 | %ptxas-verify %}
+# RUN: %if ptxas-32 %{ llc < %t.ll -mtriple=nvptx -mcpu=sm_30 | %ptxas-verify %}
 # RUN: %if ptxas %{ llc < %t.ll -mtriple=nvptx64 -mcpu=sm_30 | %ptxas-verify %}
 
 from __future__ import print_function
diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
index ae069cf956c36..dbb93a8a74a8c 100644
--- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=PTX32
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=PTX64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 
 ; Ensure we access the local stack properly
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier.ll b/llvm/test/CodeGen/NVPTX/mbarrier.ll
index 87a73aa4d4e2c..19ed927488298 100644
--- a/llvm/test/CodeGen/NVPTX/mbarrier.ll
+++ b/llvm/test/CodeGen/NVPTX/mbarrier.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_80 | FileCheck %s -check-prefix=CHECK_PTX32
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 | FileCheck %s -check-prefix=CHECK_PTX64
-; RUN: %if ptxas-11.0 && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
+; RUN: %if ptxas-11.0 && ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
 ; RUN: %if ptxas-11.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
 
 declare void @llvm.nvvm.mbarrier.init(ptr %a, i32 %b)
diff --git a/llvm/test/CodeGen/NVPTX/nofunc.ll b/llvm/test/CodeGen/NVPTX/nofunc.ll
index a8ce20ed91dc4..0708df9445dc7 100644
--- a/llvm/test/CodeGen/NVPTX/nofunc.ll
+++ b/llvm/test/CodeGen/NVPTX/nofunc.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ; Test that we don't crash if we're compiling a module with function references,
diff --git a/llvm/test/CodeGen/NVPTX/packed-aggr.ll b/llvm/test/CodeGen/NVPTX/packed-aggr.ll
index 602bef299bb21..230094654580b 100644
--- a/llvm/test/CodeGen/NVPTX/packed-aggr.ll
+++ b/llvm/test/CodeGen/NVPTX/packed-aggr.ll
@@ -5,7 +5,7 @@
 ; RUN:   FileCheck %s --check-prefixes=CHECK,CHECK32
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -mattr=+ptx71 | \
 ; RUN:   FileCheck %s --check-prefixes=CHECK,CHECK64
-; RUN: %if ptxas-11.1 && !ptxas-12.0%{ llc < %s -mtriple=nvptx -mcpu=sm_20 -mattr=+ptx71 | %ptxas-verify %}
+; RUN: %if ptxas-11.1 && ptxas-32%{ llc < %s -mtriple=nvptx -mcpu=sm_20 -mattr=+ptx71 | %ptxas-verify %}
 ; RUN: %if ptxas-11.1 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -mattr=+ptx71 | %ptxas-verify %}
 
 ;; Test that packed structs with symbol references are represented using the
diff --git a/llvm/test/CodeGen/NVPTX/param-overalign.ll b/llvm/test/CodeGen/NVPTX/param-overalign.ll
index 2155fb4031c36..04d0ad00ef9db 100644
--- a/llvm/test/CodeGen/NVPTX/param-overalign.ll
+++ b/llvm/test/CodeGen/NVPTX/param-overalign.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=nvptx | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %}
 
 target triple = "nvptx64-nvidia-cuda"
 
diff --git a/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll b/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll
index cd2505c20d39c..76b262930095c 100644
--- a/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll
+++ b/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 define ptx_kernel void @t1(ptr %a) {
diff --git a/llvm/test/CodeGen/NVPTX/reg-types.ll b/llvm/test/CodeGen/NVPTX/reg-types.ll
index ea45bfdc5e190..1f2fdb642fdb3 100644
--- a/llvm/test/CodeGen/NVPTX/reg-types.ll
+++ b/llvm/test/CodeGen/NVPTX/reg-types.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
 ; RUN: llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s -check-prefixes=NO8BIT
 ; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefixes=NO8BIT
-; RUN: %if ptxas && !ptxas-12.0 %{ llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ; CHECK-LABEL: .visible .func func(
diff --git a/llvm/test/CodeGen/NVPTX/short-ptr.ll b/llvm/test/CodeGen/NVPTX/short-ptr.ll
index eb058955e0aa1..8510d44b0613d 100644
--- a/llvm/test/CodeGen/NVPTX/short-ptr.ll
+++ b/llvm/test/CodeGen/NVPTX/short-ptr.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix CHECK-DEFAULT-32
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -nvptx-short-ptr | FileCheck %s --check-prefixes CHECK-SHORT-SHARED,CHECK-SHORT-CONST,CHECK-SHORT-LOCAL
 
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -nvptx-short-ptr | %ptxas-verify %}
 
diff --git a/llvm/test/CodeGen/NVPTX/simple-call.ll b/llvm/test/CodeGen/NVPTX/simple-call.ll
index 991ae04b91b67..3d39f1246d2f1 100644
--- a/llvm/test/CodeGen/NVPTX/simple-call.ll
+++ b/llvm/test/CodeGen/NVPTX/simple-call.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 
 ; CHECK: .func ({{.*}}) device_func
diff --git a/llvm/test/CodeGen/NVPTX/st-addrspace.ll b/llvm/test/CodeGen/NVPTX/st-addrspace.ll
index 1e0e75a041c14..a7fc48543bdc5 100644
--- a/llvm/test/CodeGen/NVPTX/st-addrspace.ll
+++ b/llvm/test/CodeGen/NVPTX/st-addrspace.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G32,LS32
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G64,LS64
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | FileCheck %s --check-prefixes=G64,LS32
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %}
 
diff --git a/llvm/test/CodeGen/NVPTX/st-generic.ll b/llvm/test/CodeGen/NVPTX/st-generic.ll
index 950da93f95217..31b26acd84d16 100644
--- a/llvm/test/CodeGen/NVPTX/st-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/st-generic.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ;; i8
diff --git a/llvm/test/CodeGen/NVPTX/st-param-imm.ll b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
index f90435abefbb5..41b342bc88789 100644
--- a/llvm/test/CodeGen/NVPTX/st-param-imm.ll
+++ b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -verify-machineinstrs | %ptxas-verify %}
 
 target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/symbol-naming.ll b/llvm/test/CodeGen/NVPTX/symbol-naming.ll
index 941378f120c32..8943cd7501075 100644
--- a/llvm/test/CodeGen/NVPTX/symbol-naming.ll
+++ b/llvm/test/CodeGen/NVPTX/symbol-naming.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %}
 
 ; Verify that the NVPTX target removes invalid symbol names prior to emitting
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index 618c7ed0c4997..f59a4686604e8 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -13,7 +13,7 @@
 ; RUN:     | FileCheck %s --check-prefixes=CHECK,TRAP
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -mattr=+ptx83 \
 ; RUN:     | FileCheck %s --check-prefixes=BUG-FIXED
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 
 target triple = "nvptx-unknown-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/vaargs.ll b/llvm/test/CodeGen/NVPTX/vaargs.ll
index 9e312a2fec60a..c25ff362c3369 100644
--- a/llvm/test/CodeGen/NVPTX/vaargs.ll
+++ b/llvm/test/CodeGen/NVPTX/vaargs.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -O0 -march=nvptx -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s --check-prefixes=CHECK,CHECK32
 ; RUN: llc < %s -O0 -march=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s --check-prefixes=CHECK,CHECK64
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -O0 -march=nvptx -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -O0 -march=nvptx -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -O0 -march=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %}
 
 ; CHECK: .address_size [[BITS:32|64]]
diff --git a/llvm/test/CodeGen/NVPTX/vector-compare.ll b/llvm/test/CodeGen/NVPTX/vector-compare.ll
index 0e63ee96932d9..3b634bbd01ef7 100644
--- a/llvm/test/CodeGen/NVPTX/vector-compare.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-compare.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify -m32 %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify -m32 %}
 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ; This test makes sure that the result of vector compares are properly
diff --git a/llvm/test/CodeGen/NVPTX/vector-select.ll b/llvm/test/CodeGen/NVPTX/vector-select.ll
index 569da5e6628b0..70f77a921851a 100644
--- a/llvm/test/CodeGen/NVPTX/vector-select.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-select.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas-32 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ; This test makes sure that vector selects are scalarized by the type legalizer.
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index a0c48681d72f2..7e08a33ecbd3c 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -333,6 +333,20 @@ def ptxas_supported_sms(ptxas_executable):
     return supported_sms
 
 
+def ptxas_supports_address_size_32(ptxas_executable):
+    result = subprocess.run(
+        [ptxas_executable, "-m 32"],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    if "is not defined for option 'machine'" in result.stderr:
+        return False
+    if "Missing .version directive at start of file" in result.stderr:
+        return True
+    raise RuntimeError("Unexpected ptxas output: {}".format(result.stderr))
+
+
 def enable_ptxas(ptxas_executable):
     config.available_features.add("ptxas")
     tools.extend([
@@ -349,6 +363,9 @@ def enable_ptxas(ptxas_executable):
     for sm in ptxas_supported_sms(ptxas_executable):
         config.available_features.add("ptxas-sm_{}".format(sm))
 
+    if ptxas_supports_address_size_32(ptxas_executable):
+        config.available_features.add("ptxas-32")
+
 
 ptxas_executable = (
     os.environ.get("LLVM_PTXAS_EXECUTABLE", None) or config.ptxas_executable