[llvm] [NVPTX] Aggressively try to replace image handles with references (PR #119730)

Alex MacLean via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 12 14:32:01 PST 2024


https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/119730

>From 821bb59c4b401f8a1f6d2284a719b54c52bcc36a Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 12 Dec 2024 17:48:30 +0000
Subject: [PATCH 1/3] pre-commit update tests

---
 llvm/test/CodeGen/NVPTX/surf-read-cuda.ll  |  81 +++++++--
 llvm/test/CodeGen/NVPTX/surf-write-cuda.ll |  58 ++++--
 llvm/test/CodeGen/NVPTX/tex-read-cuda.ll   | 141 ++++++++++++---
 llvm/test/CodeGen/NVPTX/texsurf-queries.ll | 201 +++++++++++++++++----
 4 files changed, 384 insertions(+), 97 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
index a4ab7892469163..811b396d9d0a29 100644
--- a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
@@ -9,38 +10,80 @@ declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)
 declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
 
 
-; SM20-LABEL: .entry foo
-; SM30-LABEL: .entry foo
 define void @foo(i64 %img, ptr %red, i32 %idx) {
-; SM20: ld.param.u64    %rd[[SURFREG:[0-9]+]], [foo_param_0];
-; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
-; SM30: ld.param.u64    %rd[[SURFREG:[0-9]+]], [foo_param_0];
-; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
+; SM20-LABEL: foo(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<3>;
+; SM20-NEXT:    .reg .f32 %f<2>;
+; SM20-NEXT:    .reg .b64 %rd<4>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    ld.param.u64 %rd1, [foo_param_0];
+; SM20-NEXT:    ld.param.u64 %rd2, [foo_param_1];
+; SM20-NEXT:    cvta.to.global.u64 %rd3, %rd2;
+; SM20-NEXT:    ld.param.u32 %r1, [foo_param_2];
+; SM20-NEXT:    suld.b.1d.b32.trap {%r2}, [%rd1, {%r1}];
+; SM20-NEXT:    cvt.rn.f32.s32 %f1, %r2;
+; SM20-NEXT:    st.global.f32 [%rd3], %f1;
+; SM20-NEXT:    ret;
+;
+; SM30-LABEL: foo(
+; SM30:       {
+; SM30-NEXT:    .reg .b32 %r<3>;
+; SM30-NEXT:    .reg .f32 %f<2>;
+; SM30-NEXT:    .reg .b64 %rd<4>;
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    ld.param.u64 %rd1, [foo_param_0];
+; SM30-NEXT:    ld.param.u64 %rd2, [foo_param_1];
+; SM30-NEXT:    cvta.to.global.u64 %rd3, %rd2;
+; SM30-NEXT:    ld.param.u32 %r1, [foo_param_2];
+; SM30-NEXT:    suld.b.1d.b32.trap {%r2}, [%rd1, {%r1}];
+; SM30-NEXT:    cvt.rn.f32.s32 %f1, %r2;
+; SM30-NEXT:    st.global.f32 [%rd3], %f1;
+; SM30-NEXT:    ret;
   %val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx)
-; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
-; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
   %ret = sitofp i32 %val to float
-; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
-; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
   store float %ret, ptr %red
   ret void
 }
 
 @surf0 = internal addrspace(1) global i64 0, align 8
 
-; SM20-LABEL: .entry bar
-; SM30-LABEL: .entry bar
 define void @bar(ptr %red, i32 %idx) {
-; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0
+; SM20-LABEL: bar(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<3>;
+; SM20-NEXT:    .reg .f32 %f<2>;
+; SM20-NEXT:    .reg .b64 %rd<4>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    ld.param.u64 %rd1, [bar_param_0];
+; SM20-NEXT:    cvta.to.global.u64 %rd2, %rd1;
+; SM20-NEXT:    ld.param.u32 %r1, [bar_param_1];
+; SM20-NEXT:    suld.b.1d.b32.trap {%r2}, [surf0, {%r1}];
+; SM20-NEXT:    cvt.rn.f32.s32 %f1, %r2;
+; SM20-NEXT:    st.global.f32 [%rd2], %f1;
+; SM20-NEXT:    ret;
+;
+; SM30-LABEL: bar(
+; SM30:       {
+; SM30-NEXT:    .reg .b32 %r<3>;
+; SM30-NEXT:    .reg .f32 %f<2>;
+; SM30-NEXT:    .reg .b64 %rd<4>;
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    ld.param.u64 %rd1, [bar_param_0];
+; SM30-NEXT:    cvta.to.global.u64 %rd2, %rd1;
+; SM30-NEXT:    ld.param.u32 %r1, [bar_param_1];
+; SM30-NEXT:    mov.u64 %rd3, surf0;
+; SM30-NEXT:    suld.b.1d.b32.trap {%r2}, [%rd3, {%r1}];
+; SM30-NEXT:    cvt.rn.f32.s32 %f1, %r2;
+; SM30-NEXT:    st.global.f32 [%rd2], %f1;
+; SM30-NEXT:    ret;
   %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
-; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [surf0, {%r{{[0-9]+}}}]
-; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFHANDLE]], {%r{{[0-9]+}}}]
   %val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %surfHandle, i32 %idx)
-; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
-; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
   %ret = sitofp i32 %val to float
-; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
-; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
   store float %ret, ptr %red
   ret void
 }
diff --git a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
index 9d840ce24e7af8..b79632bf2e5330 100644
--- a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
@@ -9,13 +10,30 @@ declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32)
 declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
 
 
-; SM20-LABEL: .entry foo
-; SM30-LABEL: .entry foo
 define void @foo(i64 %img, i32 %val, i32 %idx) {
-; SM20: ld.param.u64    %rd[[SURFREG:[0-9]+]], [foo_param_0];
-; SM20: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
-; SM30: ld.param.u64    %rd[[SURFREG:[0-9]+]], [foo_param_0];
-; SM30: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
+; SM20-LABEL: foo(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<3>;
+; SM20-NEXT:    .reg .b64 %rd<2>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    ld.param.u64 %rd1, [foo_param_0];
+; SM20-NEXT:    ld.param.u32 %r1, [foo_param_1];
+; SM20-NEXT:    ld.param.u32 %r2, [foo_param_2];
+; SM20-NEXT:    sust.b.1d.b32.trap [%rd1, {%r2}], {%r1};
+; SM20-NEXT:    ret;
+;
+; SM30-LABEL: foo(
+; SM30:       {
+; SM30-NEXT:    .reg .b32 %r<3>;
+; SM30-NEXT:    .reg .b64 %rd<2>;
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    ld.param.u64 %rd1, [foo_param_0];
+; SM30-NEXT:    ld.param.u32 %r1, [foo_param_1];
+; SM30-NEXT:    ld.param.u32 %r2, [foo_param_2];
+; SM30-NEXT:    sust.b.1d.b32.trap [%rd1, {%r2}], {%r1};
+; SM30-NEXT:    ret;
   tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %img, i32 %idx, i32 %val)
   ret void
 }
@@ -24,14 +42,30 @@ define void @foo(i64 %img, i32 %val, i32 %idx) {
 @surf0 = internal addrspace(1) global i64 0, align 8
 
 
-
-; SM20-LABEL: .entry bar
-; SM30-LABEL: .entry bar
 define void @bar(i32 %val, i32 %idx) {
-; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0
+; SM20-LABEL: bar(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<3>;
+; SM20-NEXT:    .reg .b64 %rd<2>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    ld.param.u32 %r1, [bar_param_0];
+; SM20-NEXT:    ld.param.u32 %r2, [bar_param_1];
+; SM20-NEXT:    sust.b.1d.b32.trap [surf0, {%r2}], {%r1};
+; SM20-NEXT:    ret;
+;
+; SM30-LABEL: bar(
+; SM30:       {
+; SM30-NEXT:    .reg .b32 %r<3>;
+; SM30-NEXT:    .reg .b64 %rd<2>;
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    ld.param.u32 %r1, [bar_param_0];
+; SM30-NEXT:    ld.param.u32 %r2, [bar_param_1];
+; SM30-NEXT:    mov.u64 %rd1, surf0;
+; SM30-NEXT:    sust.b.1d.b32.trap [%rd1, {%r2}], {%r1};
+; SM30-NEXT:    ret;
   %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
-; SM20: sust.b.1d.b32.trap [surf0, {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
-; SM30: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
   tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %surfHandle, i32 %idx, i32 %val)
   ret void
 }
diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
index 473bc28ed4ee7c..7ba31e306c37df 100644
--- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
@@ -9,17 +10,38 @@ target triple = "nvptx-unknown-cuda"
 declare { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64, i32)
 declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
 
-; SM20-LABEL: .entry foo
-; SM30-LABEL: .entry foo
 define void @foo(i64 %img, ptr %red, i32 %idx) {
-; SM20: ld.param.u64    %rd[[TEXREG:[0-9]+]], [foo_param_0];
-; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
-; SM30: ld.param.u64    %rd[[TEXREG:[0-9]+]], [foo_param_0];
-; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
+; SM20-LABEL: foo(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<2>;
+; SM20-NEXT:    .reg .f32 %f<5>;
+; SM20-NEXT:    .reg .b64 %rd<4>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    ld.param.u64 %rd1, [foo_param_0];
+; SM20-NEXT:    ld.param.u64 %rd2, [foo_param_1];
+; SM20-NEXT:    cvta.to.global.u64 %rd3, %rd2;
+; SM20-NEXT:    ld.param.u32 %r1, [foo_param_2];
+; SM20-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd1, {%r1}];
+; SM20-NEXT:    st.global.f32 [%rd3], %f1;
+; SM20-NEXT:    ret;
+;
+; SM30-LABEL: foo(
+; SM30:       {
+; SM30-NEXT:    .reg .b32 %r<2>;
+; SM30-NEXT:    .reg .f32 %f<5>;
+; SM30-NEXT:    .reg .b64 %rd<4>;
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    ld.param.u64 %rd1, [foo_param_0];
+; SM30-NEXT:    ld.param.u64 %rd2, [foo_param_1];
+; SM30-NEXT:    cvta.to.global.u64 %rd3, %rd2;
+; SM30-NEXT:    ld.param.u32 %r1, [foo_param_2];
+; SM30-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd1, {%r1}];
+; SM30-NEXT:    st.global.f32 [%rd3], %f1;
+; SM30-NEXT:    ret;
   %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %img, i32 %idx)
   %ret = extractvalue { float, float, float, float } %val, 0
-; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]]
-; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]]
   store float %ret, ptr %red
   ret void
 }
@@ -27,44 +49,103 @@ define void @foo(i64 %img, ptr %red, i32 %idx) {
 
 @tex0 = internal addrspace(1) global i64 0, align 8
 
-; SM20-LABEL: .entry bar
-; SM30-LABEL: .entry bar
 define void @bar(ptr %red, i32 %idx) {
-; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0 
+; SM20-LABEL: bar(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<2>;
+; SM20-NEXT:    .reg .f32 %f<5>;
+; SM20-NEXT:    .reg .b64 %rd<4>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    ld.param.u64 %rd1, [bar_param_0];
+; SM20-NEXT:    cvta.to.global.u64 %rd2, %rd1;
+; SM20-NEXT:    ld.param.u32 %r1, [bar_param_1];
+; SM20-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
+; SM20-NEXT:    st.global.f32 [%rd2], %f1;
+; SM20-NEXT:    ret;
+;
+; SM30-LABEL: bar(
+; SM30:       {
+; SM30-NEXT:    .reg .b32 %r<2>;
+; SM30-NEXT:    .reg .f32 %f<5>;
+; SM30-NEXT:    .reg .b64 %rd<4>;
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    ld.param.u64 %rd1, [bar_param_0];
+; SM30-NEXT:    cvta.to.global.u64 %rd2, %rd1;
+; SM30-NEXT:    ld.param.u32 %r1, [bar_param_1];
+; SM30-NEXT:    mov.u64 %rd3, tex0;
+; SM30-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd3, {%r1}];
+; SM30-NEXT:    st.global.f32 [%rd2], %f1;
+; SM30-NEXT:    ret;
   %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
-; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}]
-; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
   %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx)
   %ret = extractvalue { float, float, float, float } %val, 0
-; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]]
-; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]]
   store float %ret, ptr %red
   ret void
 }
 
 declare float @texfunc(i64)
 
-; SM20-LABEL: .entry baz
-; SM30-LABEL: .entry baz
 define void @baz(ptr %red, i32 %idx) {
-; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0
+; SM20-LABEL: baz(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<2>;
+; SM20-NEXT:    .reg .f32 %f<8>;
+; SM20-NEXT:    .reg .b64 %rd<4>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    ld.param.u64 %rd1, [baz_param_0];
+; SM20-NEXT:    cvta.to.global.u64 %rd2, %rd1;
+; SM20-NEXT:    ld.param.u32 %r1, [baz_param_1];
+; SM20-NEXT:    mov.u64 %rd3, tex0;
+; SM20-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
+; SM20-NEXT:    { // callseq 0, 0
+; SM20-NEXT:    .param .b64 param0;
+; SM20-NEXT:    st.param.b64 [param0], %rd3;
+; SM20-NEXT:    .param .b32 retval0;
+; SM20-NEXT:    call.uni (retval0),
+; SM20-NEXT:    texfunc,
+; SM20-NEXT:    (
+; SM20-NEXT:    param0
+; SM20-NEXT:    );
+; SM20-NEXT:    ld.param.f32 %f5, [retval0];
+; SM20-NEXT:    } // callseq 0
+; SM20-NEXT:    add.rn.f32 %f7, %f1, %f5;
+; SM20-NEXT:    st.global.f32 [%rd2], %f7;
+; SM20-NEXT:    ret;
+;
+; SM30-LABEL: baz(
+; SM30:       {
+; SM30-NEXT:    .reg .b32 %r<2>;
+; SM30-NEXT:    .reg .f32 %f<8>;
+; SM30-NEXT:    .reg .b64 %rd<4>;
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    ld.param.u64 %rd1, [baz_param_0];
+; SM30-NEXT:    cvta.to.global.u64 %rd2, %rd1;
+; SM30-NEXT:    ld.param.u32 %r1, [baz_param_1];
+; SM30-NEXT:    mov.u64 %rd3, tex0;
+; SM30-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd3, {%r1}];
+; SM30-NEXT:    { // callseq 0, 0
+; SM30-NEXT:    .param .b64 param0;
+; SM30-NEXT:    st.param.b64 [param0], %rd3;
+; SM30-NEXT:    .param .b32 retval0;
+; SM30-NEXT:    call.uni (retval0),
+; SM30-NEXT:    texfunc,
+; SM30-NEXT:    (
+; SM30-NEXT:    param0
+; SM30-NEXT:    );
+; SM30-NEXT:    ld.param.f32 %f5, [retval0];
+; SM30-NEXT:    } // callseq 0
+; SM30-NEXT:    add.rn.f32 %f7, %f1, %f5;
+; SM30-NEXT:    st.global.f32 [%rd2], %f7;
+; SM30-NEXT:    ret;
   %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
-; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}]
-; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
   %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx)
   %ret = extractvalue { float, float, float, float } %val, 0
-; SM20: call.uni ([[RETVAL:.*]]),
-; SM30: call.uni ([[RETVAL:.*]]),
-; SM20: texfunc,
-; SM30: texfunc,
   %texcall = tail call float @texfunc(i64 %texHandle)
-; SM20: ld.param.f32 %f[[TEXCALL:[0-9]+]], [[[RETVAL]]]
-; SM30: ld.param.f32 %f[[TEXCALL:[0-9]+]], [[[RETVAL]]]
-; SM20: add.rn.f32 %f[[RET2:[0-9]+]], %f[[RED]], %f[[TEXCALL]]
-; SM30: add.rn.f32 %f[[RET2:[0-9]+]], %f[[RED]], %f[[TEXCALL]]
   %ret2 = fadd float %ret, %texcall
-; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[RET2]]
-; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[RET2]]
   store float %ret2, ptr %red
   ret void
 }
diff --git a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll
index 7c3043855c1cc8..7fa2b728e7383a 100644
--- a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll
+++ b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
@@ -15,85 +16,213 @@ declare i32 @llvm.nvvm.suq.height(i64)
 declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
 
 
-; SM20-LABEL: @t0
-; SM30-LABEL: @t0
 define i32 @t0(i64 %texHandle) {
-; SM20: txq.width.b32
-; SM30: txq.width.b32
+; SM20-LABEL: t0(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<2>;
+; SM20-NEXT:    .reg .b64 %rd<2>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    ld.param.u64 %rd1, [t0_param_0];
+; SM20-NEXT:    txq.width.b32 %r1, [%rd1];
+; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM20-NEXT:    ret;
+;
+; SM30-LABEL: t0(
+; SM30:       {
+; SM30-NEXT:    .reg .b32 %r<2>;
+; SM30-NEXT:    .reg .b64 %rd<2>;
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    ld.param.u64 %rd1, [t0_param_0];
+; SM30-NEXT:    txq.width.b32 %r1, [%rd1];
+; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM30-NEXT:    ret;
   %width = tail call i32 @llvm.nvvm.txq.width(i64 %texHandle)
   ret i32 %width
 }
 
-; SM20-LABEL: @t1
-; SM30-LABEL: @t1
 define i32 @t1() {
-; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], tex0
+; SM20-LABEL: t1(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<2>;
+; SM20-NEXT:    .reg .b64 %rd<2>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    txq.width.b32 %r1, [tex0];
+; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM20-NEXT:    ret;
+;
+; SM30-LABEL: t1(
+; SM30:       {
+; SM30-NEXT:    .reg .b32 %r<2>;
+; SM30-NEXT:    .reg .b64 %rd<2>;
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    mov.u64 %rd1, tex0;
+; SM30-NEXT:    txq.width.b32 %r1, [%rd1];
+; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM30-NEXT:    ret;
   %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
-; SM20: txq.width.b32 %r{{[0-9]+}}, [tex0]
-; SM30: txq.width.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]]
   %width = tail call i32 @llvm.nvvm.txq.width(i64 %texHandle)
   ret i32 %width
 }
 
 
-; SM20-LABEL: @t2
-; SM30-LABEL: @t2
 define i32 @t2(i64 %texHandle) {
-; SM20: txq.height.b32
-; SM30: txq.height.b32
+; SM20-LABEL: t2(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<2>;
+; SM20-NEXT:    .reg .b64 %rd<2>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    ld.param.u64 %rd1, [t2_param_0];
+; SM20-NEXT:    txq.height.b32 %r1, [%rd1];
+; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM20-NEXT:    ret;
+;
+; SM30-LABEL: t2(
+; SM30:       {
+; SM30-NEXT:    .reg .b32 %r<2>;
+; SM30-NEXT:    .reg .b64 %rd<2>;
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    ld.param.u64 %rd1, [t2_param_0];
+; SM30-NEXT:    txq.height.b32 %r1, [%rd1];
+; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM30-NEXT:    ret;
   %height = tail call i32 @llvm.nvvm.txq.height(i64 %texHandle)
   ret i32 %height
 }
 
-; SM20-LABEL: @t3
-; SM30-LABEL: @t3
 define i32 @t3() {
-; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], tex0
+; SM20-LABEL: t3(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<2>;
+; SM20-NEXT:    .reg .b64 %rd<2>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    txq.height.b32 %r1, [tex0];
+; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM20-NEXT:    ret;
+;
+; SM30-LABEL: t3(
+; SM30:       {
+; SM30-NEXT:    .reg .b32 %r<2>;
+; SM30-NEXT:    .reg .b64 %rd<2>;
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    mov.u64 %rd1, tex0;
+; SM30-NEXT:    txq.height.b32 %r1, [%rd1];
+; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM30-NEXT:    ret;
   %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
-; SM20: txq.height.b32 %r{{[0-9]+}}, [tex0]
-; SM30: txq.height.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]]
   %height = tail call i32 @llvm.nvvm.txq.height(i64 %texHandle)
   ret i32 %height
 }
 
 
-; SM20-LABEL: @s0
-; SM30-LABEL: @s0
 define i32 @s0(i64 %surfHandle) {
-; SM20: suq.width.b32
-; SM30: suq.width.b32
+; SM20-LABEL: s0(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<2>;
+; SM20-NEXT:    .reg .b64 %rd<2>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    ld.param.u64 %rd1, [s0_param_0];
+; SM20-NEXT:    suq.width.b32 %r1, [%rd1];
+; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM20-NEXT:    ret;
+;
+; SM30-LABEL: s0(
+; SM30:       {
+; SM30-NEXT:    .reg .b32 %r<2>;
+; SM30-NEXT:    .reg .b64 %rd<2>;
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    ld.param.u64 %rd1, [s0_param_0];
+; SM30-NEXT:    suq.width.b32 %r1, [%rd1];
+; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM30-NEXT:    ret;
   %width = tail call i32 @llvm.nvvm.suq.width(i64 %surfHandle)
   ret i32 %width
 }
 
-; SM20-LABEL: @s1
-; SM30-LABEL: @s1
 define i32 @s1() {
-; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], surf0
+; SM20-LABEL: s1(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<2>;
+; SM20-NEXT:    .reg .b64 %rd<2>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    suq.width.b32 %r1, [surf0];
+; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM20-NEXT:    ret;
+;
+; SM30-LABEL: s1(
+; SM30:       {
+; SM30-NEXT:    .reg .b32 %r<2>;
+; SM30-NEXT:    .reg .b64 %rd<2>;
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    mov.u64 %rd1, surf0;
+; SM30-NEXT:    suq.width.b32 %r1, [%rd1];
+; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM30-NEXT:    ret;
   %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
-; SM20: suq.width.b32 %r{{[0-9]+}}, [surf0]
-; SM30: suq.width.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]]
   %width = tail call i32 @llvm.nvvm.suq.width(i64 %surfHandle)
   ret i32 %width
 }
 
 
-; SM20-LABEL: @s2
-; SM30-LABEL: @s2
 define i32 @s2(i64 %surfHandle) {
-; SM20: suq.height.b32
-; SM30: suq.height.b32
+; SM20-LABEL: s2(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<2>;
+; SM20-NEXT:    .reg .b64 %rd<2>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    ld.param.u64 %rd1, [s2_param_0];
+; SM20-NEXT:    suq.height.b32 %r1, [%rd1];
+; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM20-NEXT:    ret;
+;
+; SM30-LABEL: s2(
+; SM30:       {
+; SM30-NEXT:    .reg .b32 %r<2>;
+; SM30-NEXT:    .reg .b64 %rd<2>;
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    ld.param.u64 %rd1, [s2_param_0];
+; SM30-NEXT:    suq.height.b32 %r1, [%rd1];
+; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM30-NEXT:    ret;
   %height = tail call i32 @llvm.nvvm.suq.height(i64 %surfHandle)
   ret i32 %height
 }
 
-; SM20-LABEL: @s3
-; SM30-LABEL: @s3
 define i32 @s3() {
-; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], surf0
+; SM20-LABEL: s3(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<2>;
+; SM20-NEXT:    .reg .b64 %rd<2>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    suq.height.b32 %r1, [surf0];
+; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM20-NEXT:    ret;
+;
+; SM30-LABEL: s3(
+; SM30:       {
+; SM30-NEXT:    .reg .b32 %r<2>;
+; SM30-NEXT:    .reg .b64 %rd<2>;
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    mov.u64 %rd1, surf0;
+; SM30-NEXT:    suq.height.b32 %r1, [%rd1];
+; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
+; SM30-NEXT:    ret;
   %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
-; SM20: suq.height.b32 %r{{[0-9]+}}, [surf0]
-; SM30: suq.height.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]]
   %height = tail call i32 @llvm.nvvm.suq.height(i64 %surfHandle)
   ret i32 %height
 }

>From dfad91ec809beebed116550886b785de1b8080ed Mon Sep 17 00:00:00 2001
From: Alex MacLean <amaclean at nvidia.com>
Date: Thu, 28 Mar 2024 00:37:42 +0000
Subject: [PATCH 2/3] [NVPTX] Always try to replace image handles with
 references

---
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp     | 27 ++++++++++---------
 .../Target/NVPTX/NVPTXMachineFunctionInfo.h   | 22 ++++++++++-----
 .../Target/NVPTX/NVPTXReplaceImageHandles.cpp |  4 +--
 llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp      | 12 +--------
 llvm/lib/Target/NVPTX/NVPTXSubtarget.h        |  2 --
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp  |  6 +----
 llvm/test/CodeGen/NVPTX/surf-read-cuda.ll     |  3 +--
 llvm/test/CodeGen/NVPTX/surf-tex.py           | 24 +++++------------
 llvm/test/CodeGen/NVPTX/surf-write-cuda.ll    |  3 +--
 llvm/test/CodeGen/NVPTX/tex-read-cuda.ll      |  5 ++--
 llvm/test/CodeGen/NVPTX/texsurf-queries.ll    | 12 +++------
 11 files changed, 48 insertions(+), 72 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 7cac4d787778f2..cb756246b8d116 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -209,7 +209,7 @@ void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
   TargetMachine &TM = const_cast<TargetMachine &>(MF->getTarget());
   NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine &>(TM);
   const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
-  const char *Sym = MFI->getImageHandleSymbol(Index);
+  StringRef Sym = MFI->getImageHandleSymbol(Index);
   StringRef SymName = nvTM.getStrPool().save(Sym);
   MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName));
 }
@@ -224,16 +224,13 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
     return;
   }
 
-  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
 
     MCOperand MCOp;
-    if (!STI.hasImageHandles()) {
-      if (lowerImageHandleOperand(MI, i, MCOp)) {
-        OutMI.addOperand(MCOp);
-        continue;
-      }
+    if (lowerImageHandleOperand(MI, i, MCOp)) {
+      OutMI.addOperand(MCOp);
+      continue;
     }
 
     if (lowerOperand(MO, MCOp))
@@ -1509,13 +1506,14 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
   const AttributeList &PAL = F->getAttributes();
   const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
   const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
+  const NVPTXMachineFunctionInfo *MFI =
+      MF ? MF->getInfo<NVPTXMachineFunctionInfo>() : nullptr;
 
   Function::const_arg_iterator I, E;
   unsigned paramIndex = 0;
   bool first = true;
   bool isKernelFunc = isKernelFunction(*F);
   bool isABI = (STI.getSmVersion() >= 20);
-  bool hasImageHandles = STI.hasImageHandles();
 
   if (F->arg_empty() && !F->isVarArg()) {
     O << "()";
@@ -1533,25 +1531,30 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
     first = false;
 
     // Handle image/sampler parameters
-    if (isKernelFunction(*F)) {
+    if (isKernelFunc) {
       if (isSampler(*I) || isImage(*I)) {
+        std::string ParamSym;
+        raw_string_ostream ParamStr(ParamSym);
+        ParamStr << F->getName() << "_param_" << paramIndex;
+        ParamStr.flush();
+        bool EmitImagePtr = !MFI || !MFI->checkImageHandleSymbol(ParamSym);
         if (isImage(*I)) {
           if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
-            if (hasImageHandles)
+            if (EmitImagePtr)
               O << "\t.param .u64 .ptr .surfref ";
             else
               O << "\t.param .surfref ";
             O << TLI->getParamName(F, paramIndex);
           }
           else { // Default image is read_only
-            if (hasImageHandles)
+            if (EmitImagePtr)
               O << "\t.param .u64 .ptr .texref ";
             else
               O << "\t.param .texref ";
             O << TLI->getParamName(F, paramIndex);
           }
         } else {
-          if (hasImageHandles)
+          if (EmitImagePtr)
             O << "\t.param .u64 .ptr .samplerref ";
           else
             O << "\t.param .samplerref ";
diff --git a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index 77426f7f6da71e..6670cb296f2160 100644
--- a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -14,13 +14,14 @@
 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H
 #define LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineFunction.h"
 
 namespace llvm {
 class NVPTXMachineFunctionInfo : public MachineFunctionInfo {
 private:
-  /// Stores a mapping from index to symbol name for removing image handles
-  /// on Fermi.
+  /// Stores a mapping from index to symbol name for image handles that are
+  /// replaced with image references
   SmallVector<std::string, 8> ImageHandleList;
 
 public:
@@ -36,20 +37,27 @@ class NVPTXMachineFunctionInfo : public MachineFunctionInfo {
   /// Returns the index for the symbol \p Symbol. If the symbol was previously,
   /// added, the same index is returned. Otherwise, the symbol is added and the
   /// new index is returned.
-  unsigned getImageHandleSymbolIndex(const char *Symbol) {
+  unsigned getImageHandleSymbolIndex(StringRef Symbol) {
     // Is the symbol already present?
     for (unsigned i = 0, e = ImageHandleList.size(); i != e; ++i)
-      if (ImageHandleList[i] == std::string(Symbol))
+      if (ImageHandleList[i] == Symbol)
         return i;
     // Nope, insert it
-    ImageHandleList.push_back(Symbol);
+    ImageHandleList.push_back(Symbol.str());
     return ImageHandleList.size()-1;
   }
 
   /// Returns the symbol name at the given index.
-  const char *getImageHandleSymbol(unsigned Idx) const {
+  StringRef getImageHandleSymbol(unsigned Idx) const {
     assert(ImageHandleList.size() > Idx && "Bad index");
-    return ImageHandleList[Idx].c_str();
+    return ImageHandleList[Idx];
+  }
+
+  /// Check if the symbol has a mapping. Having a mapping means the handle is
+  /// replaced with a reference
+  bool checkImageHandleSymbol(StringRef Symbol) const {
+    return ImageHandleList.end() !=
+           std::find(ImageHandleList.begin(), ImageHandleList.end(), Symbol);
   }
 };
 }
diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index f66504b09cb63f..a3e3978cbbfe29 100644
--- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -1830,7 +1830,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
     NewSymStr << MF.getName() << "_param_" << Param;
 
     InstrsToRemove.insert(&TexHandleDef);
-    Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str());
+    Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str());
     return true;
   }
   case NVPTX::texsurf_handles: {
@@ -1839,7 +1839,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
     const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
     assert(GV->hasName() && "Global sampler must be named!");
     InstrsToRemove.insert(&TexHandleDef);
-    Idx = MFI->getImageHandleSymbolIndex(GV->getName().data());
+    Idx = MFI->getImageHandleSymbolIndex(GV->getName());
     return true;
   }
   case NVPTX::nvvm_move_i64:
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 0e6b75e622c6ad..abd7070ef0153b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -55,19 +55,9 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
                                const std::string &FS,
                                const NVPTXTargetMachine &TM)
     : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
-      FullSmVersion(200), SmVersion(getSmVersion()), TM(TM),
+      FullSmVersion(200), SmVersion(getSmVersion()),
       TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {}
 
-bool NVPTXSubtarget::hasImageHandles() const {
-  // Enable handles for Kepler+, where CUDA supports indirect surfaces and
-  // textures
-  if (TM.getDrvInterface() == NVPTX::CUDA)
-    return (SmVersion >= 30);
-
-  // Disabled, otherwise
-  return false;
-}
-
 bool NVPTXSubtarget::allowFP16Math() const {
   return hasFP16Math() && NoF16Math == false;
 }
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index e785bbf830da62..b90ebde8a0ba1d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -43,7 +43,6 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   // FullSmVersion.
   unsigned int SmVersion;
 
-  const NVPTXTargetMachine &TM;
   NVPTXInstrInfo InstrInfo;
   NVPTXTargetLowering TLInfo;
   SelectionDAGTargetInfo TSInfo;
@@ -81,7 +80,6 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool hasClusters() const { return SmVersion >= 90 && PTXVersion >= 78; }
   bool hasLDG() const { return SmVersion >= 32; }
   bool hasHWROT32() const { return SmVersion >= 32; }
-  bool hasImageHandles() const;
   bool hasFP16Math() const { return SmVersion >= 53; }
   bool hasBF16Math() const { return SmVersion >= 80; }
   bool allowFP16Math() const;
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a5c5e9420ee737..b3b2880588cc59 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -404,14 +404,10 @@ void NVPTXPassConfig::addIRPasses() {
 }
 
 bool NVPTXPassConfig::addInstSelector() {
-  const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
-
   addPass(createLowerAggrCopies());
   addPass(createAllocaHoisting());
   addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
-
-  if (!ST.hasImageHandles())
-    addPass(createNVPTXReplaceImageHandlesPass());
+  addPass(createNVPTXReplaceImageHandlesPass());
 
   return false;
 }
diff --git a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
index 811b396d9d0a29..30c3c0fc17c0a0 100644
--- a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
@@ -76,8 +76,7 @@ define void @bar(ptr %red, i32 %idx) {
 ; SM30-NEXT:    ld.param.u64 %rd1, [bar_param_0];
 ; SM30-NEXT:    cvta.to.global.u64 %rd2, %rd1;
 ; SM30-NEXT:    ld.param.u32 %r1, [bar_param_1];
-; SM30-NEXT:    mov.u64 %rd3, surf0;
-; SM30-NEXT:    suld.b.1d.b32.trap {%r2}, [%rd3, {%r1}];
+; SM30-NEXT:    suld.b.1d.b32.trap {%r2}, [surf0, {%r1}];
 ; SM30-NEXT:    cvt.rn.f32.s32 %f1, %r2;
 ; SM30-NEXT:    st.global.f32 [%rd2], %f1;
 ; SM30-NEXT:    ret;
diff --git a/llvm/test/CodeGen/NVPTX/surf-tex.py b/llvm/test/CodeGen/NVPTX/surf-tex.py
index 7d86696087438b..9607a58856bac8 100644
--- a/llvm/test/CodeGen/NVPTX/surf-tex.py
+++ b/llvm/test/CodeGen/NVPTX/surf-tex.py
@@ -1,12 +1,12 @@
 # RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll
-# RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll --check-prefixes=CHECK,CHECK-CUDA
+# RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll
 # RUN: %if ptxas %{ llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
 
 # We only need to run this second time for texture tests, because
 # there is a difference between unified and non-unified intrinsics.
 #
 # RUN: %python %s --target=nvcl --tests=suld,sust,tex,tld4 --gen-list-append --gen-list=%t.list > %t-nvcl.ll
-# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll --check-prefixes=CHECK,CHECK-NVCL
+# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll
 # RUN: %if ptxas %{ llc %t-nvcl.ll -verify-machineinstrs -o - | %ptxas-verify %}
 
 # Verify that all instructions and intrinsics defined in TableGen
@@ -269,9 +269,7 @@ def gen_suld_tests(target, global_surf):
     ret void
   }
   ; CHECK-LABEL: .entry ${test_name}_global
-  ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
-  ; CHECK-CUDA: ${instruction} ${reg_ret}, [[[REG${reg_id}]], ${reg_access}]
-  ; CHECK-NVCL: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
+  ; CHECK: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
   define void @${test_name}_global(${retty}* %ret, ${access}) {
     %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
     %val = tail call ${retty} @${intrinsic}(i64 %gs, ${access})
@@ -314,7 +312,6 @@ def gen_suld_tests(target, global_surf):
             "reg_ret": get_ptx_vec_reg(vec, dtype),
             "reg_surf": get_ptx_surface(target),
             "reg_access": get_ptx_surface_access(geom),
-            "reg_id": get_table_gen_id(),
         }
         gen_test(template, params)
         generated_items.append((params["intrinsic"], params["instruction"]))
@@ -364,9 +361,7 @@ def gen_sust_tests(target, global_surf):
     ret void
   }
   ; CHECK-LABEL: .entry ${test_name}_global
-  ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
-  ; CHECK-CUDA: ${instruction} [[[REG${reg_id}]], ${reg_access}], ${reg_value}
-  ; CHECK-NVCL: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
+  ; CHECK: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
   define void @${test_name}_global(${value}, ${access}) {
     %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
     tail call void @${intrinsic}(i64 %gs, ${access}, ${value})
@@ -420,7 +415,6 @@ def gen_sust_tests(target, global_surf):
             "reg_value": get_ptx_vec_reg(vec, ctype),
             "reg_surf": get_ptx_surface(target),
             "reg_access": get_ptx_surface_access(geom),
-            "reg_id": get_table_gen_id(),
         }
         gen_test(template, params)
         generated_items.append((params["intrinsic"], params["instruction"]))
@@ -627,9 +621,7 @@ def gen_tex_tests(target, global_tex, global_sampler):
     ret void
   }
   ; CHECK-LABEL: .entry ${test_name}_global
-  ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
-  ; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
-  ; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
+  ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
   define void @${test_name}_global(${retty}* %ret, ${access}) {
     %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
     ${get_sampler_handle}
@@ -713,7 +705,6 @@ def gen_tex_tests(target, global_tex, global_sampler):
             "ptx_tex": get_ptx_texture(target),
             "ptx_access": get_ptx_texture_access(geom, ctype),
             "ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
-            "reg_id": get_table_gen_id(),
         }
         gen_test(template, params)
         generated_items.append((params["intrinsic"], params["instruction"]))
@@ -814,9 +805,7 @@ def gen_tld4_tests(target, global_tex, global_sampler):
     ret void
   }
   ; CHECK-LABEL: .entry ${test_name}_global
-  ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
-  ; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
-  ; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
+  ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
   define void @${test_name}_global(${retty}* %ret, ${access}) {
     %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
     ${get_sampler_handle}
@@ -862,7 +851,6 @@ def gen_tld4_tests(target, global_tex, global_sampler):
             "ptx_tex": get_ptx_texture(target),
             "ptx_access": get_ptx_tld4_access(geom),
             "ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
-            "reg_id": get_table_gen_id(),
         }
         gen_test(template, params)
         generated_items.append((params["intrinsic"], params["instruction"]))
diff --git a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
index b79632bf2e5330..d6f3956d68db45 100644
--- a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
@@ -62,8 +62,7 @@ define void @bar(i32 %val, i32 %idx) {
 ; SM30-NEXT:  // %bb.0:
 ; SM30-NEXT:    ld.param.u32 %r1, [bar_param_0];
 ; SM30-NEXT:    ld.param.u32 %r2, [bar_param_1];
-; SM30-NEXT:    mov.u64 %rd1, surf0;
-; SM30-NEXT:    sust.b.1d.b32.trap [%rd1, {%r2}], {%r1};
+; SM30-NEXT:    sust.b.1d.b32.trap [surf0, {%r2}], {%r1};
 ; SM30-NEXT:    ret;
   %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
   tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %surfHandle, i32 %idx, i32 %val)
diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
index 7ba31e306c37df..fa01e067cac723 100644
--- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
@@ -74,8 +74,7 @@ define void @bar(ptr %red, i32 %idx) {
 ; SM30-NEXT:    ld.param.u64 %rd1, [bar_param_0];
 ; SM30-NEXT:    cvta.to.global.u64 %rd2, %rd1;
 ; SM30-NEXT:    ld.param.u32 %r1, [bar_param_1];
-; SM30-NEXT:    mov.u64 %rd3, tex0;
-; SM30-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd3, {%r1}];
+; SM30-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
 ; SM30-NEXT:    st.global.f32 [%rd2], %f1;
 ; SM30-NEXT:    ret;
   %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
@@ -126,7 +125,7 @@ define void @baz(ptr %red, i32 %idx) {
 ; SM30-NEXT:    cvta.to.global.u64 %rd2, %rd1;
 ; SM30-NEXT:    ld.param.u32 %r1, [baz_param_1];
 ; SM30-NEXT:    mov.u64 %rd3, tex0;
-; SM30-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd3, {%r1}];
+; SM30-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
 ; SM30-NEXT:    { // callseq 0, 0
 ; SM30-NEXT:    .param .b64 param0;
 ; SM30-NEXT:    st.param.b64 [param0], %rd3;
diff --git a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll
index 7fa2b728e7383a..bcf78b6d9d3cde 100644
--- a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll
+++ b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll
@@ -59,8 +59,7 @@ define i32 @t1() {
 ; SM30-NEXT:    .reg .b64 %rd<2>;
 ; SM30-EMPTY:
 ; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    mov.u64 %rd1, tex0;
-; SM30-NEXT:    txq.width.b32 %r1, [%rd1];
+; SM30-NEXT:    txq.width.b32 %r1, [tex0];
 ; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
 ; SM30-NEXT:    ret;
   %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
@@ -112,8 +111,7 @@ define i32 @t3() {
 ; SM30-NEXT:    .reg .b64 %rd<2>;
 ; SM30-EMPTY:
 ; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    mov.u64 %rd1, tex0;
-; SM30-NEXT:    txq.height.b32 %r1, [%rd1];
+; SM30-NEXT:    txq.height.b32 %r1, [tex0];
 ; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
 ; SM30-NEXT:    ret;
   %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
@@ -165,8 +163,7 @@ define i32 @s1() {
 ; SM30-NEXT:    .reg .b64 %rd<2>;
 ; SM30-EMPTY:
 ; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    mov.u64 %rd1, surf0;
-; SM30-NEXT:    suq.width.b32 %r1, [%rd1];
+; SM30-NEXT:    suq.width.b32 %r1, [surf0];
 ; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
 ; SM30-NEXT:    ret;
   %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
@@ -218,8 +215,7 @@ define i32 @s3() {
 ; SM30-NEXT:    .reg .b64 %rd<2>;
 ; SM30-EMPTY:
 ; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    mov.u64 %rd1, surf0;
-; SM30-NEXT:    suq.height.b32 %r1, [%rd1];
+; SM30-NEXT:    suq.height.b32 %r1, [surf0];
 ; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
 ; SM30-NEXT:    ret;
   %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)

>From 2cd7a6d61dafb9b7ea59a908c264724fab361ba6 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 12 Dec 2024 22:31:46 +0000
Subject: [PATCH 3/3] address comments - unify FileChecks

---
 llvm/test/CodeGen/NVPTX/surf-read-cuda.ll  |  93 +++-----
 llvm/test/CodeGen/NVPTX/surf-write-cuda.ll |  69 ++----
 llvm/test/CodeGen/NVPTX/tex-read-cuda.ll   | 166 +++++---------
 llvm/test/CodeGen/NVPTX/texsurf-queries.ll | 240 +++++++--------------
 4 files changed, 187 insertions(+), 381 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
index 30c3c0fc17c0a0..b3b67a507ade7e 100644
--- a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %}
 
@@ -11,37 +11,21 @@ declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
 
 
 define void @foo(i64 %img, ptr %red, i32 %idx) {
-; SM20-LABEL: foo(
-; SM20:       {
-; SM20-NEXT:    .reg .b32 %r<3>;
-; SM20-NEXT:    .reg .f32 %f<2>;
-; SM20-NEXT:    .reg .b64 %rd<4>;
-; SM20-EMPTY:
-; SM20-NEXT:  // %bb.0:
-; SM20-NEXT:    ld.param.u64 %rd1, [foo_param_0];
-; SM20-NEXT:    ld.param.u64 %rd2, [foo_param_1];
-; SM20-NEXT:    cvta.to.global.u64 %rd3, %rd2;
-; SM20-NEXT:    ld.param.u32 %r1, [foo_param_2];
-; SM20-NEXT:    suld.b.1d.b32.trap {%r2}, [%rd1, {%r1}];
-; SM20-NEXT:    cvt.rn.f32.s32 %f1, %r2;
-; SM20-NEXT:    st.global.f32 [%rd3], %f1;
-; SM20-NEXT:    ret;
-;
-; SM30-LABEL: foo(
-; SM30:       {
-; SM30-NEXT:    .reg .b32 %r<3>;
-; SM30-NEXT:    .reg .f32 %f<2>;
-; SM30-NEXT:    .reg .b64 %rd<4>;
-; SM30-EMPTY:
-; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    ld.param.u64 %rd1, [foo_param_0];
-; SM30-NEXT:    ld.param.u64 %rd2, [foo_param_1];
-; SM30-NEXT:    cvta.to.global.u64 %rd3, %rd2;
-; SM30-NEXT:    ld.param.u32 %r1, [foo_param_2];
-; SM30-NEXT:    suld.b.1d.b32.trap {%r2}, [%rd1, {%r1}];
-; SM30-NEXT:    cvt.rn.f32.s32 %f1, %r2;
-; SM30-NEXT:    st.global.f32 [%rd3], %f1;
-; SM30-NEXT:    ret;
+; CHECK-LABEL: foo(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-NEXT:    .reg .f32 %f<2>;
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [foo_param_0];
+; CHECK-NEXT:    ld.param.u64 %rd2, [foo_param_1];
+; CHECK-NEXT:    cvta.to.global.u64 %rd3, %rd2;
+; CHECK-NEXT:    ld.param.u32 %r1, [foo_param_2];
+; CHECK-NEXT:    suld.b.1d.b32.trap {%r2}, [%rd1, {%r1}];
+; CHECK-NEXT:    cvt.rn.f32.s32 %f1, %r2;
+; CHECK-NEXT:    st.global.f32 [%rd3], %f1;
+; CHECK-NEXT:    ret;
   %val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx)
   %ret = sitofp i32 %val to float
   store float %ret, ptr %red
@@ -51,35 +35,20 @@ define void @foo(i64 %img, ptr %red, i32 %idx) {
 @surf0 = internal addrspace(1) global i64 0, align 8
 
 define void @bar(ptr %red, i32 %idx) {
-; SM20-LABEL: bar(
-; SM20:       {
-; SM20-NEXT:    .reg .b32 %r<3>;
-; SM20-NEXT:    .reg .f32 %f<2>;
-; SM20-NEXT:    .reg .b64 %rd<4>;
-; SM20-EMPTY:
-; SM20-NEXT:  // %bb.0:
-; SM20-NEXT:    ld.param.u64 %rd1, [bar_param_0];
-; SM20-NEXT:    cvta.to.global.u64 %rd2, %rd1;
-; SM20-NEXT:    ld.param.u32 %r1, [bar_param_1];
-; SM20-NEXT:    suld.b.1d.b32.trap {%r2}, [surf0, {%r1}];
-; SM20-NEXT:    cvt.rn.f32.s32 %f1, %r2;
-; SM20-NEXT:    st.global.f32 [%rd2], %f1;
-; SM20-NEXT:    ret;
-;
-; SM30-LABEL: bar(
-; SM30:       {
-; SM30-NEXT:    .reg .b32 %r<3>;
-; SM30-NEXT:    .reg .f32 %f<2>;
-; SM30-NEXT:    .reg .b64 %rd<4>;
-; SM30-EMPTY:
-; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    ld.param.u64 %rd1, [bar_param_0];
-; SM30-NEXT:    cvta.to.global.u64 %rd2, %rd1;
-; SM30-NEXT:    ld.param.u32 %r1, [bar_param_1];
-; SM30-NEXT:    suld.b.1d.b32.trap {%r2}, [surf0, {%r1}];
-; SM30-NEXT:    cvt.rn.f32.s32 %f1, %r2;
-; SM30-NEXT:    st.global.f32 [%rd2], %f1;
-; SM30-NEXT:    ret;
+; CHECK-LABEL: bar(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-NEXT:    .reg .f32 %f<2>;
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [bar_param_0];
+; CHECK-NEXT:    cvta.to.global.u64 %rd2, %rd1;
+; CHECK-NEXT:    ld.param.u32 %r1, [bar_param_1];
+; CHECK-NEXT:    suld.b.1d.b32.trap {%r2}, [surf0, {%r1}];
+; CHECK-NEXT:    cvt.rn.f32.s32 %f1, %r2;
+; CHECK-NEXT:    st.global.f32 [%rd2], %f1;
+; CHECK-NEXT:    ret;
   %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
   %val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %surfHandle, i32 %idx)
   %ret = sitofp i32 %val to float
diff --git a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
index d6f3956d68db45..acb6076ced98c5 100644
--- a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %}
 
@@ -11,29 +11,17 @@ declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
 
 
 define void @foo(i64 %img, i32 %val, i32 %idx) {
-; SM20-LABEL: foo(
-; SM20:       {
-; SM20-NEXT:    .reg .b32 %r<3>;
-; SM20-NEXT:    .reg .b64 %rd<2>;
-; SM20-EMPTY:
-; SM20-NEXT:  // %bb.0:
-; SM20-NEXT:    ld.param.u64 %rd1, [foo_param_0];
-; SM20-NEXT:    ld.param.u32 %r1, [foo_param_1];
-; SM20-NEXT:    ld.param.u32 %r2, [foo_param_2];
-; SM20-NEXT:    sust.b.1d.b32.trap [%rd1, {%r2}], {%r1};
-; SM20-NEXT:    ret;
-;
-; SM30-LABEL: foo(
-; SM30:       {
-; SM30-NEXT:    .reg .b32 %r<3>;
-; SM30-NEXT:    .reg .b64 %rd<2>;
-; SM30-EMPTY:
-; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    ld.param.u64 %rd1, [foo_param_0];
-; SM30-NEXT:    ld.param.u32 %r1, [foo_param_1];
-; SM30-NEXT:    ld.param.u32 %r2, [foo_param_2];
-; SM30-NEXT:    sust.b.1d.b32.trap [%rd1, {%r2}], {%r1};
-; SM30-NEXT:    ret;
+; CHECK-LABEL: foo(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [foo_param_0];
+; CHECK-NEXT:    ld.param.u32 %r1, [foo_param_1];
+; CHECK-NEXT:    ld.param.u32 %r2, [foo_param_2];
+; CHECK-NEXT:    sust.b.1d.b32.trap [%rd1, {%r2}], {%r1};
+; CHECK-NEXT:    ret;
   tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %img, i32 %idx, i32 %val)
   ret void
 }
@@ -43,27 +31,16 @@ define void @foo(i64 %img, i32 %val, i32 %idx) {
 
 
 define void @bar(i32 %val, i32 %idx) {
-; SM20-LABEL: bar(
-; SM20:       {
-; SM20-NEXT:    .reg .b32 %r<3>;
-; SM20-NEXT:    .reg .b64 %rd<2>;
-; SM20-EMPTY:
-; SM20-NEXT:  // %bb.0:
-; SM20-NEXT:    ld.param.u32 %r1, [bar_param_0];
-; SM20-NEXT:    ld.param.u32 %r2, [bar_param_1];
-; SM20-NEXT:    sust.b.1d.b32.trap [surf0, {%r2}], {%r1};
-; SM20-NEXT:    ret;
-;
-; SM30-LABEL: bar(
-; SM30:       {
-; SM30-NEXT:    .reg .b32 %r<3>;
-; SM30-NEXT:    .reg .b64 %rd<2>;
-; SM30-EMPTY:
-; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    ld.param.u32 %r1, [bar_param_0];
-; SM30-NEXT:    ld.param.u32 %r2, [bar_param_1];
-; SM30-NEXT:    sust.b.1d.b32.trap [surf0, {%r2}], {%r1};
-; SM30-NEXT:    ret;
+; CHECK-LABEL: bar(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [bar_param_0];
+; CHECK-NEXT:    ld.param.u32 %r2, [bar_param_1];
+; CHECK-NEXT:    sust.b.1d.b32.trap [surf0, {%r2}], {%r1};
+; CHECK-NEXT:    ret;
   %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
   tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %surfHandle, i32 %idx, i32 %val)
   ret void
diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
index fa01e067cac723..6d42240b2031a3 100644
--- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %}
 
@@ -11,35 +11,20 @@ declare { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64,
 declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
 
 define void @foo(i64 %img, ptr %red, i32 %idx) {
-; SM20-LABEL: foo(
-; SM20:       {
-; SM20-NEXT:    .reg .b32 %r<2>;
-; SM20-NEXT:    .reg .f32 %f<5>;
-; SM20-NEXT:    .reg .b64 %rd<4>;
-; SM20-EMPTY:
-; SM20-NEXT:  // %bb.0:
-; SM20-NEXT:    ld.param.u64 %rd1, [foo_param_0];
-; SM20-NEXT:    ld.param.u64 %rd2, [foo_param_1];
-; SM20-NEXT:    cvta.to.global.u64 %rd3, %rd2;
-; SM20-NEXT:    ld.param.u32 %r1, [foo_param_2];
-; SM20-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd1, {%r1}];
-; SM20-NEXT:    st.global.f32 [%rd3], %f1;
-; SM20-NEXT:    ret;
-;
-; SM30-LABEL: foo(
-; SM30:       {
-; SM30-NEXT:    .reg .b32 %r<2>;
-; SM30-NEXT:    .reg .f32 %f<5>;
-; SM30-NEXT:    .reg .b64 %rd<4>;
-; SM30-EMPTY:
-; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    ld.param.u64 %rd1, [foo_param_0];
-; SM30-NEXT:    ld.param.u64 %rd2, [foo_param_1];
-; SM30-NEXT:    cvta.to.global.u64 %rd3, %rd2;
-; SM30-NEXT:    ld.param.u32 %r1, [foo_param_2];
-; SM30-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd1, {%r1}];
-; SM30-NEXT:    st.global.f32 [%rd3], %f1;
-; SM30-NEXT:    ret;
+; CHECK-LABEL: foo(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .f32 %f<5>;
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [foo_param_0];
+; CHECK-NEXT:    ld.param.u64 %rd2, [foo_param_1];
+; CHECK-NEXT:    cvta.to.global.u64 %rd3, %rd2;
+; CHECK-NEXT:    ld.param.u32 %r1, [foo_param_2];
+; CHECK-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd1, {%r1}];
+; CHECK-NEXT:    st.global.f32 [%rd3], %f1;
+; CHECK-NEXT:    ret;
   %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %img, i32 %idx)
   %ret = extractvalue { float, float, float, float } %val, 0
   store float %ret, ptr %red
@@ -50,33 +35,19 @@ define void @foo(i64 %img, ptr %red, i32 %idx) {
 @tex0 = internal addrspace(1) global i64 0, align 8
 
 define void @bar(ptr %red, i32 %idx) {
-; SM20-LABEL: bar(
-; SM20:       {
-; SM20-NEXT:    .reg .b32 %r<2>;
-; SM20-NEXT:    .reg .f32 %f<5>;
-; SM20-NEXT:    .reg .b64 %rd<4>;
-; SM20-EMPTY:
-; SM20-NEXT:  // %bb.0:
-; SM20-NEXT:    ld.param.u64 %rd1, [bar_param_0];
-; SM20-NEXT:    cvta.to.global.u64 %rd2, %rd1;
-; SM20-NEXT:    ld.param.u32 %r1, [bar_param_1];
-; SM20-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
-; SM20-NEXT:    st.global.f32 [%rd2], %f1;
-; SM20-NEXT:    ret;
-;
-; SM30-LABEL: bar(
-; SM30:       {
-; SM30-NEXT:    .reg .b32 %r<2>;
-; SM30-NEXT:    .reg .f32 %f<5>;
-; SM30-NEXT:    .reg .b64 %rd<4>;
-; SM30-EMPTY:
-; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    ld.param.u64 %rd1, [bar_param_0];
-; SM30-NEXT:    cvta.to.global.u64 %rd2, %rd1;
-; SM30-NEXT:    ld.param.u32 %r1, [bar_param_1];
-; SM30-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
-; SM30-NEXT:    st.global.f32 [%rd2], %f1;
-; SM30-NEXT:    ret;
+; CHECK-LABEL: bar(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .f32 %f<5>;
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [bar_param_0];
+; CHECK-NEXT:    cvta.to.global.u64 %rd2, %rd1;
+; CHECK-NEXT:    ld.param.u32 %r1, [bar_param_1];
+; CHECK-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
+; CHECK-NEXT:    st.global.f32 [%rd2], %f1;
+; CHECK-NEXT:    ret;
   %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
   %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx)
   %ret = extractvalue { float, float, float, float } %val, 0
@@ -87,59 +58,32 @@ define void @bar(ptr %red, i32 %idx) {
 declare float @texfunc(i64)
 
 define void @baz(ptr %red, i32 %idx) {
-; SM20-LABEL: baz(
-; SM20:       {
-; SM20-NEXT:    .reg .b32 %r<2>;
-; SM20-NEXT:    .reg .f32 %f<8>;
-; SM20-NEXT:    .reg .b64 %rd<4>;
-; SM20-EMPTY:
-; SM20-NEXT:  // %bb.0:
-; SM20-NEXT:    ld.param.u64 %rd1, [baz_param_0];
-; SM20-NEXT:    cvta.to.global.u64 %rd2, %rd1;
-; SM20-NEXT:    ld.param.u32 %r1, [baz_param_1];
-; SM20-NEXT:    mov.u64 %rd3, tex0;
-; SM20-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
-; SM20-NEXT:    { // callseq 0, 0
-; SM20-NEXT:    .param .b64 param0;
-; SM20-NEXT:    st.param.b64 [param0], %rd3;
-; SM20-NEXT:    .param .b32 retval0;
-; SM20-NEXT:    call.uni (retval0),
-; SM20-NEXT:    texfunc,
-; SM20-NEXT:    (
-; SM20-NEXT:    param0
-; SM20-NEXT:    );
-; SM20-NEXT:    ld.param.f32 %f5, [retval0];
-; SM20-NEXT:    } // callseq 0
-; SM20-NEXT:    add.rn.f32 %f7, %f1, %f5;
-; SM20-NEXT:    st.global.f32 [%rd2], %f7;
-; SM20-NEXT:    ret;
-;
-; SM30-LABEL: baz(
-; SM30:       {
-; SM30-NEXT:    .reg .b32 %r<2>;
-; SM30-NEXT:    .reg .f32 %f<8>;
-; SM30-NEXT:    .reg .b64 %rd<4>;
-; SM30-EMPTY:
-; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    ld.param.u64 %rd1, [baz_param_0];
-; SM30-NEXT:    cvta.to.global.u64 %rd2, %rd1;
-; SM30-NEXT:    ld.param.u32 %r1, [baz_param_1];
-; SM30-NEXT:    mov.u64 %rd3, tex0;
-; SM30-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
-; SM30-NEXT:    { // callseq 0, 0
-; SM30-NEXT:    .param .b64 param0;
-; SM30-NEXT:    st.param.b64 [param0], %rd3;
-; SM30-NEXT:    .param .b32 retval0;
-; SM30-NEXT:    call.uni (retval0),
-; SM30-NEXT:    texfunc,
-; SM30-NEXT:    (
-; SM30-NEXT:    param0
-; SM30-NEXT:    );
-; SM30-NEXT:    ld.param.f32 %f5, [retval0];
-; SM30-NEXT:    } // callseq 0
-; SM30-NEXT:    add.rn.f32 %f7, %f1, %f5;
-; SM30-NEXT:    st.global.f32 [%rd2], %f7;
-; SM30-NEXT:    ret;
+; CHECK-LABEL: baz(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .f32 %f<8>;
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [baz_param_0];
+; CHECK-NEXT:    cvta.to.global.u64 %rd2, %rd1;
+; CHECK-NEXT:    ld.param.u32 %r1, [baz_param_1];
+; CHECK-NEXT:    mov.u64 %rd3, tex0;
+; CHECK-NEXT:    tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
+; CHECK-NEXT:    { // callseq 0, 0
+; CHECK-NEXT:    .param .b64 param0;
+; CHECK-NEXT:    st.param.b64 [param0], %rd3;
+; CHECK-NEXT:    .param .b32 retval0;
+; CHECK-NEXT:    call.uni (retval0),
+; CHECK-NEXT:    texfunc,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    ld.param.f32 %f5, [retval0];
+; CHECK-NEXT:    } // callseq 0
+; CHECK-NEXT:    add.rn.f32 %f7, %f1, %f5;
+; CHECK-NEXT:    st.global.f32 [%rd2], %f7;
+; CHECK-NEXT:    ret;
   %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
   %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx)
   %ret = extractvalue { float, float, float, float } %val, 0
diff --git a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll
index bcf78b6d9d3cde..57393a13466b90 100644
--- a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll
+++ b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %}
 
@@ -17,51 +17,30 @@ declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
 
 
 define i32 @t0(i64 %texHandle) {
-; SM20-LABEL: t0(
-; SM20:       {
-; SM20-NEXT:    .reg .b32 %r<2>;
-; SM20-NEXT:    .reg .b64 %rd<2>;
-; SM20-EMPTY:
-; SM20-NEXT:  // %bb.0:
-; SM20-NEXT:    ld.param.u64 %rd1, [t0_param_0];
-; SM20-NEXT:    txq.width.b32 %r1, [%rd1];
-; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM20-NEXT:    ret;
-;
-; SM30-LABEL: t0(
-; SM30:       {
-; SM30-NEXT:    .reg .b32 %r<2>;
-; SM30-NEXT:    .reg .b64 %rd<2>;
-; SM30-EMPTY:
-; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    ld.param.u64 %rd1, [t0_param_0];
-; SM30-NEXT:    txq.width.b32 %r1, [%rd1];
-; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM30-NEXT:    ret;
+; CHECK-LABEL: t0(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [t0_param_0];
+; CHECK-NEXT:    txq.width.b32 %r1, [%rd1];
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %width = tail call i32 @llvm.nvvm.txq.width(i64 %texHandle)
   ret i32 %width
 }
 
 define i32 @t1() {
-; SM20-LABEL: t1(
-; SM20:       {
-; SM20-NEXT:    .reg .b32 %r<2>;
-; SM20-NEXT:    .reg .b64 %rd<2>;
-; SM20-EMPTY:
-; SM20-NEXT:  // %bb.0:
-; SM20-NEXT:    txq.width.b32 %r1, [tex0];
-; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM20-NEXT:    ret;
-;
-; SM30-LABEL: t1(
-; SM30:       {
-; SM30-NEXT:    .reg .b32 %r<2>;
-; SM30-NEXT:    .reg .b64 %rd<2>;
-; SM30-EMPTY:
-; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    txq.width.b32 %r1, [tex0];
-; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM30-NEXT:    ret;
+; CHECK-LABEL: t1(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    txq.width.b32 %r1, [tex0];
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
   %width = tail call i32 @llvm.nvvm.txq.width(i64 %texHandle)
   ret i32 %width
@@ -69,51 +48,30 @@ define i32 @t1() {
 
 
 define i32 @t2(i64 %texHandle) {
-; SM20-LABEL: t2(
-; SM20:       {
-; SM20-NEXT:    .reg .b32 %r<2>;
-; SM20-NEXT:    .reg .b64 %rd<2>;
-; SM20-EMPTY:
-; SM20-NEXT:  // %bb.0:
-; SM20-NEXT:    ld.param.u64 %rd1, [t2_param_0];
-; SM20-NEXT:    txq.height.b32 %r1, [%rd1];
-; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM20-NEXT:    ret;
-;
-; SM30-LABEL: t2(
-; SM30:       {
-; SM30-NEXT:    .reg .b32 %r<2>;
-; SM30-NEXT:    .reg .b64 %rd<2>;
-; SM30-EMPTY:
-; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    ld.param.u64 %rd1, [t2_param_0];
-; SM30-NEXT:    txq.height.b32 %r1, [%rd1];
-; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM30-NEXT:    ret;
+; CHECK-LABEL: t2(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [t2_param_0];
+; CHECK-NEXT:    txq.height.b32 %r1, [%rd1];
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %height = tail call i32 @llvm.nvvm.txq.height(i64 %texHandle)
   ret i32 %height
 }
 
 define i32 @t3() {
-; SM20-LABEL: t3(
-; SM20:       {
-; SM20-NEXT:    .reg .b32 %r<2>;
-; SM20-NEXT:    .reg .b64 %rd<2>;
-; SM20-EMPTY:
-; SM20-NEXT:  // %bb.0:
-; SM20-NEXT:    txq.height.b32 %r1, [tex0];
-; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM20-NEXT:    ret;
-;
-; SM30-LABEL: t3(
-; SM30:       {
-; SM30-NEXT:    .reg .b32 %r<2>;
-; SM30-NEXT:    .reg .b64 %rd<2>;
-; SM30-EMPTY:
-; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    txq.height.b32 %r1, [tex0];
-; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM30-NEXT:    ret;
+; CHECK-LABEL: t3(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    txq.height.b32 %r1, [tex0];
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
   %height = tail call i32 @llvm.nvvm.txq.height(i64 %texHandle)
   ret i32 %height
@@ -121,51 +79,30 @@ define i32 @t3() {
 
 
 define i32 @s0(i64 %surfHandle) {
-; SM20-LABEL: s0(
-; SM20:       {
-; SM20-NEXT:    .reg .b32 %r<2>;
-; SM20-NEXT:    .reg .b64 %rd<2>;
-; SM20-EMPTY:
-; SM20-NEXT:  // %bb.0:
-; SM20-NEXT:    ld.param.u64 %rd1, [s0_param_0];
-; SM20-NEXT:    suq.width.b32 %r1, [%rd1];
-; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM20-NEXT:    ret;
-;
-; SM30-LABEL: s0(
-; SM30:       {
-; SM30-NEXT:    .reg .b32 %r<2>;
-; SM30-NEXT:    .reg .b64 %rd<2>;
-; SM30-EMPTY:
-; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    ld.param.u64 %rd1, [s0_param_0];
-; SM30-NEXT:    suq.width.b32 %r1, [%rd1];
-; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM30-NEXT:    ret;
+; CHECK-LABEL: s0(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [s0_param_0];
+; CHECK-NEXT:    suq.width.b32 %r1, [%rd1];
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %width = tail call i32 @llvm.nvvm.suq.width(i64 %surfHandle)
   ret i32 %width
 }
 
 define i32 @s1() {
-; SM20-LABEL: s1(
-; SM20:       {
-; SM20-NEXT:    .reg .b32 %r<2>;
-; SM20-NEXT:    .reg .b64 %rd<2>;
-; SM20-EMPTY:
-; SM20-NEXT:  // %bb.0:
-; SM20-NEXT:    suq.width.b32 %r1, [surf0];
-; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM20-NEXT:    ret;
-;
-; SM30-LABEL: s1(
-; SM30:       {
-; SM30-NEXT:    .reg .b32 %r<2>;
-; SM30-NEXT:    .reg .b64 %rd<2>;
-; SM30-EMPTY:
-; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    suq.width.b32 %r1, [surf0];
-; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM30-NEXT:    ret;
+; CHECK-LABEL: s1(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    suq.width.b32 %r1, [surf0];
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
   %width = tail call i32 @llvm.nvvm.suq.width(i64 %surfHandle)
   ret i32 %width
@@ -173,51 +110,30 @@ define i32 @s1() {
 
 
 define i32 @s2(i64 %surfHandle) {
-; SM20-LABEL: s2(
-; SM20:       {
-; SM20-NEXT:    .reg .b32 %r<2>;
-; SM20-NEXT:    .reg .b64 %rd<2>;
-; SM20-EMPTY:
-; SM20-NEXT:  // %bb.0:
-; SM20-NEXT:    ld.param.u64 %rd1, [s2_param_0];
-; SM20-NEXT:    suq.height.b32 %r1, [%rd1];
-; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM20-NEXT:    ret;
-;
-; SM30-LABEL: s2(
-; SM30:       {
-; SM30-NEXT:    .reg .b32 %r<2>;
-; SM30-NEXT:    .reg .b64 %rd<2>;
-; SM30-EMPTY:
-; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    ld.param.u64 %rd1, [s2_param_0];
-; SM30-NEXT:    suq.height.b32 %r1, [%rd1];
-; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM30-NEXT:    ret;
+; CHECK-LABEL: s2(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [s2_param_0];
+; CHECK-NEXT:    suq.height.b32 %r1, [%rd1];
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %height = tail call i32 @llvm.nvvm.suq.height(i64 %surfHandle)
   ret i32 %height
 }
 
 define i32 @s3() {
-; SM20-LABEL: s3(
-; SM20:       {
-; SM20-NEXT:    .reg .b32 %r<2>;
-; SM20-NEXT:    .reg .b64 %rd<2>;
-; SM20-EMPTY:
-; SM20-NEXT:  // %bb.0:
-; SM20-NEXT:    suq.height.b32 %r1, [surf0];
-; SM20-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM20-NEXT:    ret;
-;
-; SM30-LABEL: s3(
-; SM30:       {
-; SM30-NEXT:    .reg .b32 %r<2>;
-; SM30-NEXT:    .reg .b64 %rd<2>;
-; SM30-EMPTY:
-; SM30-NEXT:  // %bb.0:
-; SM30-NEXT:    suq.height.b32 %r1, [surf0];
-; SM30-NEXT:    st.param.b32 [func_retval0], %r1;
-; SM30-NEXT:    ret;
+; CHECK-LABEL: s3(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    suq.height.b32 %r1, [surf0];
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
   %height = tail call i32 @llvm.nvvm.suq.height(i64 %surfHandle)
   ret i32 %height



More information about the llvm-commits mailing list