[llvm-branch-commits] [libc] [llvm] release/19.x: [NVPTX] Fix internal indirect call prototypes not obeying the ABI (#100131) (PR #100174)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jul 23 11:02:04 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
Author: None (llvmbot)
<details>
<summary>Changes</summary>
Backport e0649a5dfc6b859d652318f578bc3d49674787a4
Requested by: @<!-- -->jhuber6
---
Full diff: https://github.com/llvm/llvm-project/pull/100174.diff
3 Files Affected:
- (modified) libc/config/gpu/entrypoints.txt (+4-11)
- (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+3-2)
- (added) llvm/test/CodeGen/NVPTX/indirect_byval.ll (+94)
``````````diff
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index 42909cec55890..fa878d8999227 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -1,13 +1,3 @@
-if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
- set(extra_entrypoints
- # stdio.h entrypoints
- libc.src.stdio.snprintf
- libc.src.stdio.sprintf
- libc.src.stdio.vsnprintf
- libc.src.stdio.vsprintf
- )
-endif()
-
set(TARGET_LIBC_ENTRYPOINTS
# assert.h entrypoints
libc.src.assert.__assert_fail
@@ -186,13 +176,16 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.errno.errno
# stdio.h entrypoints
- ${extra_entrypoints}
libc.src.stdio.clearerr
libc.src.stdio.fclose
libc.src.stdio.printf
libc.src.stdio.vprintf
libc.src.stdio.fprintf
libc.src.stdio.vfprintf
+ libc.src.stdio.snprintf
+ libc.src.stdio.sprintf
+ libc.src.stdio.vsnprintf
+ libc.src.stdio.vsprintf
libc.src.stdio.feof
libc.src.stdio.ferror
libc.src.stdio.fflush
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 44c1a2e50486c..6975412ce5d35 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1429,7 +1429,6 @@ std::string NVPTXTargetLowering::getPrototype(
bool first = true;
- const Function *F = CB.getFunction();
unsigned NumArgs = VAInfo ? VAInfo->first : Args.size();
for (unsigned i = 0, OIdx = 0; i != NumArgs; ++i, ++OIdx) {
Type *Ty = Args[i].Ty;
@@ -1471,10 +1470,12 @@ std::string NVPTXTargetLowering::getPrototype(
continue;
}
+ // Indirect calls need strict ABI alignment so we disable optimizations by
+ // not providing a function to optimize.
Type *ETy = Args[i].IndirectType;
Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
Align ParamByValAlign =
- getFunctionByValParamAlign(F, ETy, InitialAlign, DL);
+ getFunctionByValParamAlign(/*F=*/nullptr, ETy, InitialAlign, DL);
O << ".param .align " << ParamByValAlign.value() << " .b8 ";
O << "_";
diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
new file mode 100644
index 0000000000000..ac6c4e262fd60
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_52 -mattr=+ptx64 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_52 -mattr=+ptx64 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+%struct.S = type { i8 }
+%struct.U = type { i64 }
+
+ at ptr = external global ptr, align 8
+
+define internal i32 @foo() {
+; CHECK-LABEL: foo(
+; CHECK: {
+; CHECK-NEXT: .local .align 1 .b8 __local_depot0[2];
+; CHECK-NEXT: .reg .b64 %SP;
+; CHECK-NEXT: .reg .b64 %SPL;
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: mov.u64 %SPL, __local_depot0;
+; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
+; CHECK-NEXT: ld.global.u64 %rd1, [ptr];
+; CHECK-NEXT: ld.u8 %rs1, [%SP+1];
+; CHECK-NEXT: add.u64 %rd2, %SP, 0;
+; CHECK-NEXT: { // callseq 0, 0
+; CHECK-NEXT: .param .align 1 .b8 param0[1];
+; CHECK-NEXT: st.param.b8 [param0+0], %rs1;
+; CHECK-NEXT: .param .b64 param1;
+; CHECK-NEXT: st.param.b64 [param1+0], %rd2;
+; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _);
+; CHECK-NEXT: call (retval0),
+; CHECK-NEXT: %rd1,
+; CHECK-NEXT: (
+; CHECK-NEXT: param0,
+; CHECK-NEXT: param1
+; CHECK-NEXT: )
+; CHECK-NEXT: , prototype_0;
+; CHECK-NEXT: ld.param.b32 %r1, [retval0+0];
+; CHECK-NEXT: } // callseq 0
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT: ret;
+entry:
+ %s = alloca %struct.S, align 1
+ %agg.tmp = alloca %struct.S, align 1
+ %0 = load ptr, ptr @ptr, align 8
+ %call = call i32 %0(ptr byval(%struct.S) align 1 %agg.tmp, ptr noundef %s)
+ ret i32 %call
+}
+
+define internal i32 @bar() {
+; CHECK-LABEL: bar(
+; CHECK: // @bar
+; CHECK-NEXT: {
+; CHECK-NEXT: .local .align 8 .b8 __local_depot1[16];
+; CHECK-NEXT: .reg .b64 %SP;
+; CHECK-NEXT: .reg .b64 %SPL;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: mov.u64 %SPL, __local_depot1;
+; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
+; CHECK-NEXT: ld.global.u64 %rd1, [ptr];
+; CHECK-NEXT: ld.u64 %rd2, [%SP+8];
+; CHECK-NEXT: add.u64 %rd3, %SP, 0;
+; CHECK-NEXT: { // callseq 1, 0
+; CHECK-NEXT: .param .align 8 .b8 param0[8];
+; CHECK-NEXT: st.param.b64 [param0+0], %rd2;
+; CHECK-NEXT: .param .b64 param1;
+; CHECK-NEXT: st.param.b64 [param1+0], %rd3;
+; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _);
+; CHECK-NEXT: call (retval0),
+; CHECK-NEXT: %rd1,
+; CHECK-NEXT: (
+; CHECK-NEXT: param0,
+; CHECK-NEXT: param1
+; CHECK-NEXT: )
+; CHECK-NEXT: , prototype_1;
+; CHECK-NEXT: ld.param.b32 %r1, [retval0+0];
+; CHECK-NEXT: } // callseq 1
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT: ret;
+entry:
+ %s = alloca %struct.U, align 8
+ %agg.tmp = alloca %struct.U, align 8
+ %0 = load ptr, ptr @ptr, align 8
+ %call = call noundef i32 %0(ptr byval(%struct.U) align 8 %agg.tmp, ptr %s)
+ ret i32 %call
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/100174
More information about the llvm-branch-commits
mailing list