[llvm] [NVPTX] Aggressively try to replace image handles with references (PR #119730)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 12 10:02:33 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Alex MacLean (AlexMaclean)
<details>
<summary>Changes</summary>
Even in cases where handles are supported, references are still preferable for performance. This is because, a ref uses one
less register and can avoid the handle creating code associated with taking the address of a tex/surf/sampler.
---
Patch is 37.39 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119730.diff
11 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp (+15-12)
- (modified) llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h (+15-7)
- (modified) llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp (+2-2)
- (modified) llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp (+1-11)
- (modified) llvm/lib/Target/NVPTX/NVPTXSubtarget.h (-2)
- (modified) llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp (+1-5)
- (modified) llvm/test/CodeGen/NVPTX/surf-read-cuda.ll (+61-19)
- (modified) llvm/test/CodeGen/NVPTX/surf-tex.py (+6-18)
- (modified) llvm/test/CodeGen/NVPTX/surf-write-cuda.ll (+45-12)
- (modified) llvm/test/CodeGen/NVPTX/tex-read-cuda.ll (+110-30)
- (modified) llvm/test/CodeGen/NVPTX/texsurf-queries.ll (+161-36)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 7cac4d787778f2..cb756246b8d116 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -209,7 +209,7 @@ void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
TargetMachine &TM = const_cast<TargetMachine &>(MF->getTarget());
NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine &>(TM);
const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
- const char *Sym = MFI->getImageHandleSymbol(Index);
+ StringRef Sym = MFI->getImageHandleSymbol(Index);
StringRef SymName = nvTM.getStrPool().save(Sym);
MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName));
}
@@ -224,16 +224,13 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
return;
}
- const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp;
- if (!STI.hasImageHandles()) {
- if (lowerImageHandleOperand(MI, i, MCOp)) {
- OutMI.addOperand(MCOp);
- continue;
- }
+ if (lowerImageHandleOperand(MI, i, MCOp)) {
+ OutMI.addOperand(MCOp);
+ continue;
}
if (lowerOperand(MO, MCOp))
@@ -1509,13 +1506,14 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
const AttributeList &PAL = F->getAttributes();
const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
+ const NVPTXMachineFunctionInfo *MFI =
+ MF ? MF->getInfo<NVPTXMachineFunctionInfo>() : nullptr;
Function::const_arg_iterator I, E;
unsigned paramIndex = 0;
bool first = true;
bool isKernelFunc = isKernelFunction(*F);
bool isABI = (STI.getSmVersion() >= 20);
- bool hasImageHandles = STI.hasImageHandles();
if (F->arg_empty() && !F->isVarArg()) {
O << "()";
@@ -1533,25 +1531,30 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
first = false;
// Handle image/sampler parameters
- if (isKernelFunction(*F)) {
+ if (isKernelFunc) {
if (isSampler(*I) || isImage(*I)) {
+ std::string ParamSym;
+ raw_string_ostream ParamStr(ParamSym);
+ ParamStr << F->getName() << "_param_" << paramIndex;
+ ParamStr.flush();
+ bool EmitImagePtr = !MFI || !MFI->checkImageHandleSymbol(ParamSym);
if (isImage(*I)) {
if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
- if (hasImageHandles)
+ if (EmitImagePtr)
O << "\t.param .u64 .ptr .surfref ";
else
O << "\t.param .surfref ";
O << TLI->getParamName(F, paramIndex);
}
else { // Default image is read_only
- if (hasImageHandles)
+ if (EmitImagePtr)
O << "\t.param .u64 .ptr .texref ";
else
O << "\t.param .texref ";
O << TLI->getParamName(F, paramIndex);
}
} else {
- if (hasImageHandles)
+ if (EmitImagePtr)
O << "\t.param .u64 .ptr .samplerref ";
else
O << "\t.param .samplerref ";
diff --git a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index 77426f7f6da71e..6670cb296f2160 100644
--- a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -14,13 +14,14 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
class NVPTXMachineFunctionInfo : public MachineFunctionInfo {
private:
- /// Stores a mapping from index to symbol name for removing image handles
- /// on Fermi.
+ /// Stores a mapping from index to symbol name for image handles that are
+ /// replaced with image references
SmallVector<std::string, 8> ImageHandleList;
public:
@@ -36,20 +37,27 @@ class NVPTXMachineFunctionInfo : public MachineFunctionInfo {
/// Returns the index for the symbol \p Symbol. If the symbol was previously,
/// added, the same index is returned. Otherwise, the symbol is added and the
/// new index is returned.
- unsigned getImageHandleSymbolIndex(const char *Symbol) {
+ unsigned getImageHandleSymbolIndex(StringRef Symbol) {
// Is the symbol already present?
for (unsigned i = 0, e = ImageHandleList.size(); i != e; ++i)
- if (ImageHandleList[i] == std::string(Symbol))
+ if (ImageHandleList[i] == Symbol)
return i;
// Nope, insert it
- ImageHandleList.push_back(Symbol);
+ ImageHandleList.push_back(Symbol.str());
return ImageHandleList.size()-1;
}
/// Returns the symbol name at the given index.
- const char *getImageHandleSymbol(unsigned Idx) const {
+ StringRef getImageHandleSymbol(unsigned Idx) const {
assert(ImageHandleList.size() > Idx && "Bad index");
- return ImageHandleList[Idx].c_str();
+ return ImageHandleList[Idx];
+ }
+
+ /// Check if the symbol has a mapping. Having a mapping means the handle is
+ /// replaced with a reference
+ bool checkImageHandleSymbol(StringRef Symbol) const {
+ return ImageHandleList.end() !=
+ std::find(ImageHandleList.begin(), ImageHandleList.end(), Symbol);
}
};
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index f66504b09cb63f..a3e3978cbbfe29 100644
--- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -1830,7 +1830,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
NewSymStr << MF.getName() << "_param_" << Param;
InstrsToRemove.insert(&TexHandleDef);
- Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str());
+ Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str());
return true;
}
case NVPTX::texsurf_handles: {
@@ -1839,7 +1839,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
assert(GV->hasName() && "Global sampler must be named!");
InstrsToRemove.insert(&TexHandleDef);
- Idx = MFI->getImageHandleSymbolIndex(GV->getName().data());
+ Idx = MFI->getImageHandleSymbolIndex(GV->getName());
return true;
}
case NVPTX::nvvm_move_i64:
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 0e6b75e622c6ad..abd7070ef0153b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -55,19 +55,9 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const NVPTXTargetMachine &TM)
: NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
- FullSmVersion(200), SmVersion(getSmVersion()), TM(TM),
+ FullSmVersion(200), SmVersion(getSmVersion()),
TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {}
-bool NVPTXSubtarget::hasImageHandles() const {
- // Enable handles for Kepler+, where CUDA supports indirect surfaces and
- // textures
- if (TM.getDrvInterface() == NVPTX::CUDA)
- return (SmVersion >= 30);
-
- // Disabled, otherwise
- return false;
-}
-
bool NVPTXSubtarget::allowFP16Math() const {
return hasFP16Math() && NoF16Math == false;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index e785bbf830da62..b90ebde8a0ba1d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -43,7 +43,6 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
// FullSmVersion.
unsigned int SmVersion;
- const NVPTXTargetMachine &TM;
NVPTXInstrInfo InstrInfo;
NVPTXTargetLowering TLInfo;
SelectionDAGTargetInfo TSInfo;
@@ -81,7 +80,6 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool hasClusters() const { return SmVersion >= 90 && PTXVersion >= 78; }
bool hasLDG() const { return SmVersion >= 32; }
bool hasHWROT32() const { return SmVersion >= 32; }
- bool hasImageHandles() const;
bool hasFP16Math() const { return SmVersion >= 53; }
bool hasBF16Math() const { return SmVersion >= 80; }
bool allowFP16Math() const;
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a5c5e9420ee737..b3b2880588cc59 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -404,14 +404,10 @@ void NVPTXPassConfig::addIRPasses() {
}
bool NVPTXPassConfig::addInstSelector() {
- const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
-
addPass(createLowerAggrCopies());
addPass(createAllocaHoisting());
addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
-
- if (!ST.hasImageHandles())
- addPass(createNVPTXReplaceImageHandlesPass());
+ addPass(createNVPTXReplaceImageHandlesPass());
return false;
}
diff --git a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
index a4ab7892469163..30c3c0fc17c0a0 100644
--- a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
@@ -9,38 +10,79 @@ declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)
declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
-; SM20-LABEL: .entry foo
-; SM30-LABEL: .entry foo
define void @foo(i64 %img, ptr %red, i32 %idx) {
-; SM20: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
-; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
-; SM30: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
-; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
+; SM20-LABEL: foo(
+; SM20: {
+; SM20-NEXT: .reg .b32 %r<3>;
+; SM20-NEXT: .reg .f32 %f<2>;
+; SM20-NEXT: .reg .b64 %rd<4>;
+; SM20-EMPTY:
+; SM20-NEXT: // %bb.0:
+; SM20-NEXT: ld.param.u64 %rd1, [foo_param_0];
+; SM20-NEXT: ld.param.u64 %rd2, [foo_param_1];
+; SM20-NEXT: cvta.to.global.u64 %rd3, %rd2;
+; SM20-NEXT: ld.param.u32 %r1, [foo_param_2];
+; SM20-NEXT: suld.b.1d.b32.trap {%r2}, [%rd1, {%r1}];
+; SM20-NEXT: cvt.rn.f32.s32 %f1, %r2;
+; SM20-NEXT: st.global.f32 [%rd3], %f1;
+; SM20-NEXT: ret;
+;
+; SM30-LABEL: foo(
+; SM30: {
+; SM30-NEXT: .reg .b32 %r<3>;
+; SM30-NEXT: .reg .f32 %f<2>;
+; SM30-NEXT: .reg .b64 %rd<4>;
+; SM30-EMPTY:
+; SM30-NEXT: // %bb.0:
+; SM30-NEXT: ld.param.u64 %rd1, [foo_param_0];
+; SM30-NEXT: ld.param.u64 %rd2, [foo_param_1];
+; SM30-NEXT: cvta.to.global.u64 %rd3, %rd2;
+; SM30-NEXT: ld.param.u32 %r1, [foo_param_2];
+; SM30-NEXT: suld.b.1d.b32.trap {%r2}, [%rd1, {%r1}];
+; SM30-NEXT: cvt.rn.f32.s32 %f1, %r2;
+; SM30-NEXT: st.global.f32 [%rd3], %f1;
+; SM30-NEXT: ret;
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx)
-; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
-; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
%ret = sitofp i32 %val to float
-; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
-; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
store float %ret, ptr %red
ret void
}
@surf0 = internal addrspace(1) global i64 0, align 8
-; SM20-LABEL: .entry bar
-; SM30-LABEL: .entry bar
define void @bar(ptr %red, i32 %idx) {
-; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0
+; SM20-LABEL: bar(
+; SM20: {
+; SM20-NEXT: .reg .b32 %r<3>;
+; SM20-NEXT: .reg .f32 %f<2>;
+; SM20-NEXT: .reg .b64 %rd<4>;
+; SM20-EMPTY:
+; SM20-NEXT: // %bb.0:
+; SM20-NEXT: ld.param.u64 %rd1, [bar_param_0];
+; SM20-NEXT: cvta.to.global.u64 %rd2, %rd1;
+; SM20-NEXT: ld.param.u32 %r1, [bar_param_1];
+; SM20-NEXT: suld.b.1d.b32.trap {%r2}, [surf0, {%r1}];
+; SM20-NEXT: cvt.rn.f32.s32 %f1, %r2;
+; SM20-NEXT: st.global.f32 [%rd2], %f1;
+; SM20-NEXT: ret;
+;
+; SM30-LABEL: bar(
+; SM30: {
+; SM30-NEXT: .reg .b32 %r<3>;
+; SM30-NEXT: .reg .f32 %f<2>;
+; SM30-NEXT: .reg .b64 %rd<4>;
+; SM30-EMPTY:
+; SM30-NEXT: // %bb.0:
+; SM30-NEXT: ld.param.u64 %rd1, [bar_param_0];
+; SM30-NEXT: cvta.to.global.u64 %rd2, %rd1;
+; SM30-NEXT: ld.param.u32 %r1, [bar_param_1];
+; SM30-NEXT: suld.b.1d.b32.trap {%r2}, [surf0, {%r1}];
+; SM30-NEXT: cvt.rn.f32.s32 %f1, %r2;
+; SM30-NEXT: st.global.f32 [%rd2], %f1;
+; SM30-NEXT: ret;
%surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
-; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [surf0, {%r{{[0-9]+}}}]
-; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFHANDLE]], {%r{{[0-9]+}}}]
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %surfHandle, i32 %idx)
-; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
-; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
%ret = sitofp i32 %val to float
-; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
-; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
store float %ret, ptr %red
ret void
}
diff --git a/llvm/test/CodeGen/NVPTX/surf-tex.py b/llvm/test/CodeGen/NVPTX/surf-tex.py
index 7d86696087438b..9607a58856bac8 100644
--- a/llvm/test/CodeGen/NVPTX/surf-tex.py
+++ b/llvm/test/CodeGen/NVPTX/surf-tex.py
@@ -1,12 +1,12 @@
# RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll
-# RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll --check-prefixes=CHECK,CHECK-CUDA
+# RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll
# RUN: %if ptxas %{ llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
# We only need to run this second time for texture tests, because
# there is a difference between unified and non-unified intrinsics.
#
# RUN: %python %s --target=nvcl --tests=suld,sust,tex,tld4 --gen-list-append --gen-list=%t.list > %t-nvcl.ll
-# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll --check-prefixes=CHECK,CHECK-NVCL
+# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll
# RUN: %if ptxas %{ llc %t-nvcl.ll -verify-machineinstrs -o - | %ptxas-verify %}
# Verify that all instructions and intrinsics defined in TableGen
@@ -269,9 +269,7 @@ def gen_suld_tests(target, global_surf):
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
- ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
- ; CHECK-CUDA: ${instruction} ${reg_ret}, [[[REG${reg_id}]], ${reg_access}]
- ; CHECK-NVCL: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
+ ; CHECK: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
define void @${test_name}_global(${retty}* %ret, ${access}) {
%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
%val = tail call ${retty} @${intrinsic}(i64 %gs, ${access})
@@ -314,7 +312,6 @@ def gen_suld_tests(target, global_surf):
"reg_ret": get_ptx_vec_reg(vec, dtype),
"reg_surf": get_ptx_surface(target),
"reg_access": get_ptx_surface_access(geom),
- "reg_id": get_table_gen_id(),
}
gen_test(template, params)
generated_items.append((params["intrinsic"], params["instruction"]))
@@ -364,9 +361,7 @@ def gen_sust_tests(target, global_surf):
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
- ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
- ; CHECK-CUDA: ${instruction} [[[REG${reg_id}]], ${reg_access}], ${reg_value}
- ; CHECK-NVCL: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
+ ; CHECK: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
define void @${test_name}_global(${value}, ${access}) {
%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
tail call void @${intrinsic}(i64 %gs, ${access}, ${value})
@@ -420,7 +415,6 @@ def gen_sust_tests(target, global_surf):
"reg_value": get_ptx_vec_reg(vec, ctype),
"reg_surf": get_ptx_surface(target),
"reg_access": get_ptx_surface_access(geom),
- "reg_id": get_table_gen_id(),
}
gen_test(template, params)
generated_items.append((params["intrinsic"], params["instruction"]))
@@ -627,9 +621,7 @@ def gen_tex_tests(target, global_tex, global_sampler):
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
- ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
- ; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
- ; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
+ ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
define void @${test_name}_global(${retty}* %ret, ${access}) {
%gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
${get_sampler_handle}
@@ -713,7 +705,6 @@ def gen_tex_tests(target, global_tex, global_sampler):
"ptx_tex": get_ptx_texture(target),
"ptx_access": get_ptx_texture_access(geom, ctype),
"ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
- "reg_id": get_table_gen_id(),
}
gen_test(template, params)
generated_items.append((params["intrinsic"], params["instruction"]))
@@ -814,9 +805,7 @@ def gen_tld4_tests(target, global_tex, global_sampler):
ret void
}
; CHECK-LABEL: .entry ${test_name}_global
- ; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
- ; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
- ; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
+ ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
define void @${test_name}_global(${retty}* %ret, ${access}) {
%gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
${get_sampler_handle}
@@ -862,7 +851,6 @@ def gen_tld4_tests(target, global_tex, global_sampler):
"ptx_tex": get_ptx_texture(target),
"ptx_access": get_ptx_tld4_access(geom),
"ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
- "reg_id": get_table_gen_id(),
}
gen_test(template, params)
generated_items.append((params["intrinsic"], params["instruction"]))
diff --git a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
index 9d840ce24e7af8..d6f3956d68db45 100644
--- a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
@@ -9,13 +10,30 @@ declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32)
declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
-; SM20-LABEL: .entry foo
-; SM30-LABEL: .entry foo
define void @foo(i64 %img, i32 %val, i32 %idx) {
-; SM20: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
-; SM20: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}],...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/119730
More information about the llvm-commits
mailing list