[llvm] [X86] Fix missing ByValTemporaries update in CopyViaTemp path for musttail calls (PR #190540)

Zile Xiong via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 5 10:32:15 PDT 2026


https://github.com/xiongzile updated https://github.com/llvm/llvm-project/pull/190540

>From b0f1215b6532735b381e829d21255cdac17ff452 Mon Sep 17 00:00:00 2001
From: Zile Xiong <xiongzile99 at gmail.com>
Date: Mon, 6 Apr 2026 01:13:20 +0800
Subject: [PATCH] [X86] Fix missing ByValTemporaries update in CopyViaTemp path
 for musttail calls

The CopyViaTemp path creates a temporary for byval arguments but does not
record it in ByValTemporaries. As a result, the final lowering phase does
not emit the copy to the outgoing argument slot, leading to incorrect
values being observed by the callee.

Record the temporary in ByValTemporaries to ensure correct lowering.

Fixes: https://github.com/llvm/llvm-project/issues/190429
---
 llvm/lib/Target/X86/X86ISelLoweringCall.cpp   |  1 +
 .../X86/musttail-byval-copyvia-temp.ll        | 90 +++++++++++++++++++
 2 files changed, 91 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/musttail-byval-copyvia-temp.ll

diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index 37c80e27f4bd2d..65d77769b3c456 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -2237,6 +2237,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         SDValue CopyChain =
             CreateCopyOfByValArgument(Src, Temp, Chain, Flags, DAG, dl);
         ByValCopyChains.push_back(CopyChain);
+        ByValTemporaries[ArgIdx] = Temp;
       }
     }
     if (!ByValCopyChains.empty())
diff --git a/llvm/test/CodeGen/X86/musttail-byval-copyvia-temp.ll b/llvm/test/CodeGen/X86/musttail-byval-copyvia-temp.ll
new file mode 100644
index 00000000000000..ed21fea857fc94
--- /dev/null
+++ b/llvm/test/CodeGen/X86/musttail-byval-copyvia-temp.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+; ModuleID = '2.c'
+source_filename = "2.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.quad = type { [4 x i64] }
+
+ at quads = dso_local local_unnamed_addr global [2 x %struct.quad] [%struct.quad { [4 x i64] [i64 10, i64 10, i64 10, i64 10] }, %struct.quad { [4 x i64] [i64 20, i64 20, i64 20, i64 20] }], align 16
+
+; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable
+define dso_local i64 @callee(i64 %0, ptr noundef readonly byval(%struct.quad) align 8 captures(none) %1) local_unnamed_addr #0 {
+; CHECK-LABEL: callee:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT:    retq
+  %3 = getelementptr inbounds nuw i8, ptr %1, i64 8
+  %4 = load i64, ptr %3, align 8, !tbaa !8
+  ret i64 %4
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(read, argmem: none, inaccessiblemem: none, target_mem: none) uwtable
+define dso_local i64 @bad(i64 noundef %0, ptr noundef readnone byval(%struct.quad) align 8 captures(none) %1) local_unnamed_addr #1 {
+; CHECK-LABEL: bad:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    shlq $5, %rdi
+; CHECK-NEXT:    movups quads+16(%rdi), %xmm0
+; CHECK-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movups quads(%rdi), %xmm1
+; CHECK-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    jmp callee # TAILCALL
+  %3 = getelementptr inbounds nuw [32 x i8], ptr @quads, i64 %0
+  %4 = musttail call i64 @callee(i64 poison, ptr noundef nonnull byval(%struct.quad) align 8 %3)
+  ret i64 %4
+}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read, inaccessiblemem: none, target_mem: none) uwtable
+define dso_local i32 @main() local_unnamed_addr #2 {
+; CHECK-LABEL: main:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subq $72, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    movups {{[0-9]+}}(%rsp), %xmm0
+; CHECK-NEXT:    movups {{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT:    movups %xmm1, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movups %xmm0, (%rsp)
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    callq bad
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    addq $72, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+  %1 = alloca %struct.quad, align 8
+  call void @llvm.lifetime.start.p0(ptr nonnull %1) #4
+  %2 = tail call i64 @bad(i64 noundef 1, ptr noundef nonnull byval(%struct.quad) align 8 %1)
+  %3 = trunc i64 %2 to i32
+  call void @llvm.lifetime.end.p0(ptr nonnull %1) #4
+  ret i32 %3
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(ptr captures(none)) #3
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(ptr captures(none)) #3
+
+attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(read, argmem: none, inaccessiblemem: none, target_mem: none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { mustprogress nofree norecurse nosync nounwind willreturn memory(read, inaccessiblemem: none, target_mem: none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #4 = { nounwind }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+!llvm.errno.tbaa = !{!4}
+
+!0 = !{i32 8, !"PIC Level", i32 2}
+!1 = !{i32 7, !"PIE Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 2}
+!3 = !{!"clang version 23.0.0git (git at github.com:xiongzile/llvm-project.git 5ecff6e430f71d951b0f75705bc8f01b2da14005)"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"int", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"long", !6, i64 0}



More information about the llvm-commits mailing list