[llvm] 69e73d0 - [NVPTX] Fix pointer argument declaration for --nvptx-short-ptr
Andrew Savonichev via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 15 10:52:52 PST 2022
Author: Andrew Savonichev
Date: 2022-11-15T21:41:33+03:00
New Revision: 69e73d076b5711e2d19878b52014817b16ce8c55
URL: https://github.com/llvm/llvm-project/commit/69e73d076b5711e2d19878b52014817b16ce8c55
DIFF: https://github.com/llvm/llvm-project/commit/69e73d076b5711e2d19878b52014817b16ce8c55.diff
LOG: [NVPTX] Fix pointer argument declaration for --nvptx-short-ptr
When --nvptx-short-ptr is set, local pointers are stored as 32-bit on
nvptx64 target.
Before this patch, arguments for a function declaration were always
emitted as b64 regardless of their address space, but they were set as
b32 for the corresponding call instruction:
.extern .func test
(
.param .b64 test_param_0
)
[...]
.param .b32 param0;
st.param.b32 [param0+0], %r1;
call.uni test, (param0);
This is not supported:
ptxas: Type of argument does not match formal parameter
'test_param_0'
Now short pointers in a function declaration are emitted as b32 if
--nvptx-short-ptr is set.
Differential Revision: https://reviews.llvm.org/D135674
Added:
Modified:
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
llvm/test/CodeGen/NVPTX/short-ptr.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index f3a49fb683d6..1326b60beeb9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1465,7 +1465,6 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
bool isKernelFunc = isKernelFunction(*F);
bool isABI = (STI.getSmVersion() >= 20);
bool hasImageHandles = STI.hasImageHandles();
- MVT thePointerTy = TLI->getPointerTy(DL);
if (F->arg_empty()) {
O << "()\n";
@@ -1538,10 +1537,17 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
// Just a scalar
auto *PTy = dyn_cast<PointerType>(Ty);
+ unsigned PTySizeInBits = 0;
+ if (PTy) {
+ PTySizeInBits =
+ TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
+ assert(PTySizeInBits && "Invalid pointer size");
+ }
+
if (isKernelFunc) {
if (PTy) {
// Special handling for pointer arguments to kernel
- O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
+ O << "\t.param .u" << PTySizeInBits << " ";
if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
NVPTX::CUDA) {
@@ -1584,9 +1590,10 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (isa<IntegerType>(Ty)) {
sz = cast<IntegerType>(Ty)->getBitWidth();
sz = promoteScalarArgumentSize(sz);
- } else if (isa<PointerType>(Ty))
- sz = thePointerTy.getSizeInBits();
- else if (Ty->isHalfTy())
+ } else if (PTy) {
+ assert(PTySizeInBits && "Invalid pointer size");
+ sz = PTySizeInBits;
+ } else if (Ty->isHalfTy())
// PTX ABI requires all scalar parameters to be at least 32
// bits in size. fp16 normally uses .b16 as its storage type
// in PTX, so its size must be adjusted here, too.
diff --git a/llvm/test/CodeGen/NVPTX/short-ptr.ll b/llvm/test/CodeGen/NVPTX/short-ptr.ll
index 243c0c184788..22958391c633 100644
--- a/llvm/test/CodeGen/NVPTX/short-ptr.ll
+++ b/llvm/test/CodeGen/NVPTX/short-ptr.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix CHECK-DEFAULT
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix CHECK-DEFAULT-32
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-short-ptr | FileCheck %s --check-prefixes CHECK-SHORT-SHARED,CHECK-SHORT-CONST
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-short-ptr | FileCheck %s --check-prefixes CHECK-SHORT-SHARED,CHECK-SHORT-CONST,CHECK-SHORT-LOCAL
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
@@ -15,3 +15,30 @@
; CHECK-DEFAULT-32: .visible .const .align 8 .u32 c
; CHECK-SHORT-CONST: .visible .const .align 8 .u32 c
@c = local_unnamed_addr addrspace(4) global i32 addrspace(4)* null, align 8
+
+declare void @use(i8 %arg);
+
+; CHECK-DEFAULT: .param .b64 test1_param_0
+; CHECK-DEFAULT-32: .param .b32 test1_param_0
+; CHECK-SHORT-LOCAL: .param .b32 test1_param_0
+define void @test1(i8 addrspace(5)* %local) {
+ ; CHECK-DEFAULT: ld.param.u64 %rd{{.*}}, [test1_param_0];
+ ; CHECK-DEFAULT-32: ld.param.u32 %r{{.*}}, [test1_param_0];
+ ; CHECK-SHORT-LOCAL: ld.param.u32 %r{{.*}}, [test1_param_0];
+ %v = load i8, i8 addrspace(5)* %local
+ call void @use(i8 %v)
+ ret void
+}
+
+define void @test2() {
+ %v = alloca i8
+ %cast = addrspacecast i8* %v to i8 addrspace(5)*
+ ; CHECK-DEFAULT: .param .b64 param0;
+ ; CHECK-DEFAULT: st.param.b64
+ ; CHECK-DEFAULT-32: .param .b32 param0;
+ ; CHECK-DEFAULT-32: st.param.b32
+ ; CHECK-SHORT-LOCAL: .param .b32 param0;
+ ; CHECK-SHORT-LOCAL: st.param.b32
+ call void @test1(i8 addrspace(5)* %cast)
+ ret void
+}
More information about the llvm-commits
mailing list