[llvm] [NVPTX] Improve device function byval parameter lowering (PR #129188)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 27 21:29:56 PST 2025
================
@@ -0,0 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s | FileCheck %s
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i32 @test_ld_param_const(ptr byval(i32) %a) {
+; CHECK-LABEL: test_ld_param_const(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [test_ld_param_const_param_0+4];
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %p2 = getelementptr i32, ptr %a, i32 1
+ %ld = load i32, ptr %p2
+ ret i32 %ld
+}
+
+define i32 @test_ld_param_non_const(ptr byval([10 x i32]) %a, i32 %b) {
+; CHECK-LABEL: test_ld_param_non_const(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: mov.b64 %rd1, test_ld_param_non_const_param_0;
+; CHECK-NEXT: cvta.local.u64 %rd2, %rd1;
+; CHECK-NEXT: cvta.to.local.u64 %rd3, %rd2;
----------------
AlexMaclean wrote:
Note: once https://github.com/llvm/llvm-project/pull/129157 lands this will be cleaned up.
https://github.com/llvm/llvm-project/pull/129188
More information about the llvm-commits
mailing list