[libc] [llvm] [NVPTX] Fix internal indirect call prototypes not obeying the ABI (PR #100131)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 23 07:39:32 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
@llvm/pr-subscribers-backend-nvptx
Author: Joseph Huber (jhuber6)
<details>
<summary>Changes</summary>
Summary:
The NVPTX backend optimizes the ABI for functions that are internal,
however, this is not legal for indirect call prototypes. Previously, we
would modify the ABI on an aggregate byval type passed to an indirect
call prototype, which would make PTXAS error. This patch just passes the
function as a nullptr to force strict ABI compliance without
modification in the helper function.
Fixes https://github.com/llvm/llvm-project/issues/100055
---
Full diff: https://github.com/llvm/llvm-project/pull/100131.diff
3 Files Affected:
- (modified) libc/config/gpu/entrypoints.txt (+4-11)
- (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+2-2)
- (added) llvm/test/CodeGen/NVPTX/indirect_byval.ll (+94)
``````````diff
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index 42909cec55890..fa878d8999227 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -1,13 +1,3 @@
-if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
- set(extra_entrypoints
- # stdio.h entrypoints
- libc.src.stdio.snprintf
- libc.src.stdio.sprintf
- libc.src.stdio.vsnprintf
- libc.src.stdio.vsprintf
- )
-endif()
-
set(TARGET_LIBC_ENTRYPOINTS
# assert.h entrypoints
libc.src.assert.__assert_fail
@@ -186,13 +176,16 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.errno.errno
# stdio.h entrypoints
- ${extra_entrypoints}
libc.src.stdio.clearerr
libc.src.stdio.fclose
libc.src.stdio.printf
libc.src.stdio.vprintf
libc.src.stdio.fprintf
libc.src.stdio.vfprintf
+ libc.src.stdio.snprintf
+ libc.src.stdio.sprintf
+ libc.src.stdio.vsnprintf
+ libc.src.stdio.vsprintf
libc.src.stdio.feof
libc.src.stdio.ferror
libc.src.stdio.fflush
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 44c1a2e50486c..bcd2ee2f4fc1e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1429,7 +1429,6 @@ std::string NVPTXTargetLowering::getPrototype(
bool first = true;
- const Function *F = CB.getFunction();
unsigned NumArgs = VAInfo ? VAInfo->first : Args.size();
for (unsigned i = 0, OIdx = 0; i != NumArgs; ++i, ++OIdx) {
Type *Ty = Args[i].Ty;
@@ -1471,10 +1470,11 @@ std::string NVPTXTargetLowering::getPrototype(
continue;
}
+ // Indirect calls need strict ABI alignment so we disable optimizations.
Type *ETy = Args[i].IndirectType;
Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
Align ParamByValAlign =
- getFunctionByValParamAlign(F, ETy, InitialAlign, DL);
+ getFunctionByValParamAlign(/*F=*/nullptr, ETy, InitialAlign, DL);
O << ".param .align " << ParamByValAlign.value() << " .b8 ";
O << "_";
diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
new file mode 100644
index 0000000000000..ac6c4e262fd60
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_52 -mattr=+ptx64 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_52 -mattr=+ptx64 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+%struct.S = type { i8 }
+%struct.U = type { i64 }
+
+ at ptr = external global ptr, align 8
+
+define internal i32 @foo() {
+; CHECK-LABEL: foo(
+; CHECK: {
+; CHECK-NEXT: .local .align 1 .b8 __local_depot0[2];
+; CHECK-NEXT: .reg .b64 %SP;
+; CHECK-NEXT: .reg .b64 %SPL;
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: mov.u64 %SPL, __local_depot0;
+; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
+; CHECK-NEXT: ld.global.u64 %rd1, [ptr];
+; CHECK-NEXT: ld.u8 %rs1, [%SP+1];
+; CHECK-NEXT: add.u64 %rd2, %SP, 0;
+; CHECK-NEXT: { // callseq 0, 0
+; CHECK-NEXT: .param .align 1 .b8 param0[1];
+; CHECK-NEXT: st.param.b8 [param0+0], %rs1;
+; CHECK-NEXT: .param .b64 param1;
+; CHECK-NEXT: st.param.b64 [param1+0], %rd2;
+; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _);
+; CHECK-NEXT: call (retval0),
+; CHECK-NEXT: %rd1,
+; CHECK-NEXT: (
+; CHECK-NEXT: param0,
+; CHECK-NEXT: param1
+; CHECK-NEXT: )
+; CHECK-NEXT: , prototype_0;
+; CHECK-NEXT: ld.param.b32 %r1, [retval0+0];
+; CHECK-NEXT: } // callseq 0
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT: ret;
+entry:
+ %s = alloca %struct.S, align 1
+ %agg.tmp = alloca %struct.S, align 1
+ %0 = load ptr, ptr @ptr, align 8
+ %call = call i32 %0(ptr byval(%struct.S) align 1 %agg.tmp, ptr noundef %s)
+ ret i32 %call
+}
+
+define internal i32 @bar() {
+; CHECK-LABEL: bar(
+; CHECK: // @bar
+; CHECK-NEXT: {
+; CHECK-NEXT: .local .align 8 .b8 __local_depot1[16];
+; CHECK-NEXT: .reg .b64 %SP;
+; CHECK-NEXT: .reg .b64 %SPL;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: mov.u64 %SPL, __local_depot1;
+; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
+; CHECK-NEXT: ld.global.u64 %rd1, [ptr];
+; CHECK-NEXT: ld.u64 %rd2, [%SP+8];
+; CHECK-NEXT: add.u64 %rd3, %SP, 0;
+; CHECK-NEXT: { // callseq 1, 0
+; CHECK-NEXT: .param .align 8 .b8 param0[8];
+; CHECK-NEXT: st.param.b64 [param0+0], %rd2;
+; CHECK-NEXT: .param .b64 param1;
+; CHECK-NEXT: st.param.b64 [param1+0], %rd3;
+; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _);
+; CHECK-NEXT: call (retval0),
+; CHECK-NEXT: %rd1,
+; CHECK-NEXT: (
+; CHECK-NEXT: param0,
+; CHECK-NEXT: param1
+; CHECK-NEXT: )
+; CHECK-NEXT: , prototype_1;
+; CHECK-NEXT: ld.param.b32 %r1, [retval0+0];
+; CHECK-NEXT: } // callseq 1
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT: ret;
+entry:
+ %s = alloca %struct.U, align 8
+ %agg.tmp = alloca %struct.U, align 8
+ %0 = load ptr, ptr @ptr, align 8
+ %call = call noundef i32 %0(ptr byval(%struct.U) align 8 %agg.tmp, ptr %s)
+ ret i32 %call
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/100131
More information about the llvm-commits
mailing list