[llvm-branch-commits] [llvm] release/22.x: x86: fix musttail sibcall miscompilation (#168956) (PR #176470)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jan 16 12:50:59 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: None (llvmbot)
<details>
<summary>Changes</summary>
Backport 782bf6aff6ba6e9617bd3c4e27b3b9220ed5c850
Requested by: @<!-- -->rnk
---
Patch is 33.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/176470.diff
9 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.h (+16)
- (modified) llvm/lib/Target/X86/X86ISelLoweringCall.cpp (+156-36)
- (modified) llvm/test/CodeGen/X86/hipe-cc64.ll (+1-2)
- (added) llvm/test/CodeGen/X86/musttail-struct.ll (+320)
- (modified) llvm/test/CodeGen/X86/musttail-tailcc.ll (-18)
- (modified) llvm/test/CodeGen/X86/sibcall.ll (+7-2)
- (modified) llvm/test/CodeGen/X86/swifttailcc-store-ret-address-aliasing-stack-slot.ll (+2-4)
- (modified) llvm/test/CodeGen/X86/tailcallbyval64.ll (+1-2)
- (modified) llvm/test/CodeGen/X86/tailccbyval64.ll (+1-2)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 7c8135d3a2013..a31ac8191ee40 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1072,6 +1072,19 @@ namespace llvm {
//===--------------------------------------------------------------------===//
// X86 Implementation of the TargetLowering interface
class X86TargetLowering final : public TargetLowering {
+ // Copying needed for an outgoing byval argument.
+ enum ByValCopyKind {
+ // Argument is already in the correct location, no copy needed.
+ NoCopy,
+ // Argument value is currently in the local stack frame, needs copying to
+ // outgoing arguemnt area.
+ CopyOnce,
+ // Argument value is currently in the outgoing argument area, but not at
+ // the correct offset, so needs copying via a temporary in local stack
+ // space.
+ CopyViaTemp,
+ };
+
public:
explicit X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI);
@@ -1777,6 +1790,9 @@ namespace llvm {
SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
+ ByValCopyKind ByValNeedsCopyForTailCall(SelectionDAG &DAG, SDValue Src,
+ SDValue Dst,
+ ISD::ArgFlagsTy Flags) const;
SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index 80299a639d3a3..7e1c894655f3f 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -2009,6 +2009,49 @@ SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
}
+// Returns the type of copying which is required to set up a byval argument to
+// a tail-called function. This isn't needed for non-tail calls, because they
+// always need the equivalent of CopyOnce, but tail-calls sometimes need two to
+// avoid clobbering another argument (CopyViaTemp), and sometimes can be
+// optimised to zero copies when forwarding an argument from the caller's
+// caller (NoCopy).
+X86TargetLowering::ByValCopyKind X86TargetLowering::ByValNeedsCopyForTailCall(
+ SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+
+ // Globals are always safe to copy from.
+ if (isa<GlobalAddressSDNode>(Src) || isa<ExternalSymbolSDNode>(Src))
+ return CopyOnce;
+
+ // Can only analyse frame index nodes, conservatively assume we need a
+ // temporary.
+ auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
+ auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
+ if (!SrcFrameIdxNode || !DstFrameIdxNode)
+ return CopyViaTemp;
+
+ int SrcFI = SrcFrameIdxNode->getIndex();
+ int DstFI = DstFrameIdxNode->getIndex();
+ assert(MFI.isFixedObjectIndex(DstFI) &&
+ "byval passed in non-fixed stack slot");
+
+ int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
+ int64_t DstOffset = MFI.getObjectOffset(DstFI);
+
+ // If the source is in the local frame, then the copy to the argument
+ // memory is always valid.
+ bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
+ if (!FixedSrc || (FixedSrc && SrcOffset < 0))
+ return CopyOnce;
+
+ // If the value is already in the correct location, then no copying is
+ // needed. If not, then we need to copy via a temporary.
+ if (SrcOffset == DstOffset)
+ return NoCopy;
+ else
+ return CopyViaTemp;
+}
+
SDValue
X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
@@ -2026,11 +2069,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget.is64Bit();
- bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
- bool IsSibcall = false;
- bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
- CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
- bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
+ bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
+ bool ShouldGuaranteeTCO = shouldGuaranteeTCO(
+ CallConv, MF.getTarget().Options.GuaranteedTailCallOpt);
+ bool IsCalleePopSRet =
+ !ShouldGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
bool HasNCSR = (CB && isa<CallInst>(CB) &&
CB->hasFnAttr("no_caller_saved_registers"));
@@ -2077,7 +2120,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
- if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
+ if (Subtarget.isPICStyleGOT() && !ShouldGuaranteeTCO && !IsMustTail) {
// If we are using a GOT, disable tail calls to external symbols with
// default visibility. Tail calling such a symbol requires using a GOT
// relocation, which forces early binding of the symbol. This breaks code
@@ -2089,15 +2132,20 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isTailCall = false;
}
- if (isTailCall && !IsMustTail) {
- // Check if it's really possible to do a tail call.
- isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
- IsCalleePopSRet);
-
- // Sibcalls are automatically detected tailcalls which do not require
- // ABI changes.
- if (!IsGuaranteeTCO && isTailCall)
- IsSibcall = true;
+ // Check if this tail call is a "sibling" call, which is loosely defined to
+ // be a tail call that doesn't require heroics like moving the return address
+ // or swapping byval arguments.
+ bool IsSibcall = false;
+ if (isTailCall) {
+ // We believe that this should be a tail call, now check if that is really
+ // possible.
+ IsSibcall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
+ IsCalleePopSRet);
+
+ if (!IsMustTail) {
+ isTailCall = IsSibcall;
+ IsSibcall = IsSibcall && !ShouldGuaranteeTCO;
+ }
if (isTailCall)
++NumTailCalls;
@@ -2116,13 +2164,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
- else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
+ else if (ShouldGuaranteeTCO && canGuaranteeTCO(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
+ // A sibcall is ABI-compatible and does not need to adjust the stack pointer.
int FPDiff = 0;
- if (isTailCall &&
- shouldGuaranteeTCO(CallConv,
- MF.getTarget().Options.GuaranteedTailCallOpt)) {
+ if (isTailCall && ShouldGuaranteeTCO && !IsSibcall) {
// Lower arguments at fp - stackoffset + fpdiff.
unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
@@ -2137,6 +2184,80 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned NumBytesToPush = NumBytes;
unsigned NumBytesToPop = NumBytes;
+ SDValue StackPtr;
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+
+ // If we are doing a tail-call, any byval arguments will be written to stack
+ // space which was used for incoming arguments. If any the values being used
+ // are incoming byval arguments to this function, then they might be
+ // overwritten by the stores of the outgoing arguments. To avoid this, we
+ // need to make a temporary copy of them in local stack space, then copy back
+ // to the argument area.
+ // FIXME: There's potential to improve the code by using virtual registers for
+ // temporary storage, and letting the register allocator spill if needed.
+ SmallVector<SDValue, 8> ByValTemporaries;
+ SDValue ByValTempChain;
+ if (isTailCall) {
+ // Use null SDValue to mean "no temporary recorded for this arg index".
+ ByValTemporaries.assign(OutVals.size(), SDValue());
+
+ SmallVector<SDValue, 8> ByValCopyChains;
+ for (const CCValAssign &VA : ArgLocs) {
+ unsigned ArgIdx = VA.getValNo();
+ SDValue Src = OutVals[ArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
+
+ if (!Flags.isByVal())
+ continue;
+
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+
+ if (!StackPtr.getNode())
+ StackPtr =
+ DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), PtrVT);
+
+ // Destination: where this byval should live in the calleeās frame
+ // after the tail call.
+ int64_t Offset = VA.getLocMemOffset() + FPDiff;
+ uint64_t Size = VA.getLocVT().getFixedSizeInBits() / 8;
+ int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset,
+ /*IsImmutable=*/true);
+ SDValue Dst = DAG.getFrameIndex(FI, PtrVT);
+
+ ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
+
+ if (Copy == NoCopy) {
+ // If the argument is already at the correct offset on the stack
+ // (because we are forwarding a byval argument from our caller), we
+ // don't need any copying.
+ continue;
+ } else if (Copy == CopyOnce) {
+ // If the argument is in our local stack frame, no other argument
+ // preparation can clobber it, so we can copy it to the final location
+ // later.
+ ByValTemporaries[ArgIdx] = Src;
+ } else {
+ assert(Copy == CopyViaTemp && "unexpected enum value");
+ // If we might be copying this argument from the outgoing argument
+ // stack area, we need to copy via a temporary in the local stack
+ // frame.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ int TempFrameIdx = MFI.CreateStackObject(Flags.getByValSize(),
+ Flags.getNonZeroByValAlign(),
+ /*isSS=*/false);
+ SDValue Temp =
+ DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));
+
+ SDValue CopyChain =
+ CreateCopyOfByValArgument(Src, Temp, Chain, Flags, DAG, dl);
+ ByValCopyChains.push_back(CopyChain);
+ }
+ }
+ if (!ByValCopyChains.empty())
+ ByValTempChain =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
+ }
+
// If we have an inalloca argument, all stack space has already been allocated
// for us and be right at the top of the stack. We don't support multiple
// arguments passed in memory when using inalloca.
@@ -2177,7 +2298,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
- SDValue StackPtr;
// The next loop assumes that the locations are in the same order of the
// input arguments.
@@ -2186,7 +2306,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization arguments are handle later.
- const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
++I, ++OutIndex) {
assert(OutIndex < Outs.size() && "Invalid Out index");
@@ -2276,7 +2395,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (ShadowReg)
RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
}
- } else if (!IsSibcall && (!isTailCall || isByVal)) {
+ } else if (!IsSibcall && (!isTailCall || (isByVal && !IsMustTail))) {
assert(VA.isMemLoc());
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
@@ -2353,7 +2472,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// For tail calls lower the arguments to the 'real' stack slots. Sibcalls
// don't need this because the eligibility check rejects calls that require
// shuffling arguments passed in memory.
- if (!IsSibcall && isTailCall) {
+ if (isTailCall && !IsSibcall) {
// Force all the incoming stack arguments to be loaded from the stack
// before any new outgoing arguments or the return address are stored to the
// stack, because the outgoing stack slots may alias the incoming argument
@@ -2363,6 +2482,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// would clobber.
Chain = DAG.getStackArgumentTokenFactor(Chain);
+ if (ByValTempChain)
+ Chain =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain, ByValTempChain);
+
SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
@@ -2395,16 +2518,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
if (Flags.isByVal()) {
- // Copy relative to framepointer.
- SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
- if (!StackPtr.getNode())
- StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
- getPointerTy(DAG.getDataLayout()));
- Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
- StackPtr, Source);
-
- MemOpChains2.push_back(
- CreateCopyOfByValArgument(Source, FIN, Chain, Flags, DAG, dl));
+ if (SDValue ByValSrc = ByValTemporaries[OutsIndex]) {
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue DstAddr = DAG.getFrameIndex(FI, PtrVT);
+
+ MemOpChains2.push_back(CreateCopyOfByValArgument(
+ ByValSrc, DstAddr, Chain, Flags, DAG, dl));
+ }
} else {
// Store relative to framepointer.
MemOpChains2.push_back(DAG.getStore(
@@ -2837,8 +2957,8 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
bool CCMatch = CallerCC == CalleeCC;
bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
- bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
- CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
+ bool ShouldGuaranteeTCO = shouldGuaranteeTCO(
+ CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt);
// Win64 functions have extra shadow space for argument homing. Don't do the
// sibcall if the caller and callee have mismatched expectations for this
@@ -2846,7 +2966,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
if (IsCalleeWin64 != IsCallerWin64)
return false;
- if (IsGuaranteeTCO) {
+ if (ShouldGuaranteeTCO) {
if (canGuaranteeTCO(CalleeCC) && CCMatch)
return true;
return false;
diff --git a/llvm/test/CodeGen/X86/hipe-cc64.ll b/llvm/test/CodeGen/X86/hipe-cc64.ll
index d8505641cd789..4cb033b1a6580 100644
--- a/llvm/test/CodeGen/X86/hipe-cc64.ll
+++ b/llvm/test/CodeGen/X86/hipe-cc64.ll
@@ -21,14 +21,13 @@ define void @zap(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: movl $2, %ecx
; CHECK-NEXT: movl $3, %r8d
; CHECK-NEXT: movq %rax, %r9
-; CHECK-NEXT: callq foo at PLT
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
; CHECK-NEXT: popq %r13
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
-; CHECK-NEXT: retq
+; CHECK-NEXT: jmp foo at PLT # TAILCALL
entry:
%0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
%res = extractvalue {i64, i64, i64} %0, 2
diff --git a/llvm/test/CodeGen/X86/musttail-struct.ll b/llvm/test/CodeGen/X86/musttail-struct.ll
new file mode 100644
index 0000000000000..735fd674a2ff1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/musttail-struct.ll
@@ -0,0 +1,320 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -x86-asm-syntax=intel | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -x86-asm-syntax=intel | FileCheck %s --check-prefix=X64
+
+; Test correct handling of a musttail call with a byval struct argument.
+
+%struct.1xi32 = type { [1 x i32] }
+%struct.3xi32 = type { [3 x i32] }
+%struct.5xi32 = type { [5 x i32] }
+
+declare dso_local i32 @Func1(ptr byval(%struct.1xi32) %0)
+declare dso_local i32 @Func3(ptr byval(%struct.3xi32) %0)
+declare dso_local i32 @Func5(ptr byval(%struct.5xi32) %0)
+declare dso_local i32 @FuncManyArgs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7)
+
+define dso_local i32 @test1(ptr byval(%struct.1xi32) %0) {
+; X32-LABEL: test1:
+; X32: # %bb.0:
+; X32-NEXT: jmp Func1 # TAILCALL
+;
+; X64-LABEL: test1:
+; X64: # %bb.0:
+; X64-NEXT: jmp Func1 # TAILCALL
+ %r = musttail call i32 @Func1(ptr byval(%struct.1xi32) %0)
+ ret i32 %r
+}
+
+define dso_local i32 @test3(ptr byval(%struct.3xi32) %0) {
+; X32-LABEL: test3:
+; X32: # %bb.0:
+; X32-NEXT: jmp Func3 # TAILCALL
+;
+; X64-LABEL: test3:
+; X64: # %bb.0:
+; X64-NEXT: jmp Func3 # TAILCALL
+ %r = musttail call i32 @Func3(ptr byval(%struct.3xi32) %0)
+ ret i32 %r
+}
+
+; sizeof(%struct.5xi32) > 16, in x64 this is passed on stack.
+define dso_local i32 @test5(ptr byval(%struct.5xi32) %0) {
+; X32-LABEL: test5:
+; X32: # %bb.0:
+; X32-NEXT: jmp Func5 # TAILCALL
+;
+; X64-LABEL: test5:
+; X64: # %bb.0:
+; X64-NEXT: jmp Func5 # TAILCALL
+ %r = musttail call i32 @Func5(ptr byval(%struct.5xi32) %0)
+ ret i32 %r
+}
+
+; Test passing multiple arguments with different sizes on stack. In x64 Linux
+; the first 6 are passed by register.
+define dso_local i32 @testManyArgs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7) {
+; X32-LABEL: testManyArgs:
+; X32: # %bb.0:
+; X32-NEXT: jmp FuncManyArgs # TAILCALL
+;
+; X64-LABEL: testManyArgs:
+; X64: # %bb.0:
+; X64-NEXT: jmp FuncManyArgs # TAILCALL
+ %r = musttail call i32 @FuncManyArgs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7)
+ ret i32 %r
+}
+
+define dso_local i32 @testRecursion(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7) {
+; X32-LABEL: testRecursion:
+; X32: # %bb.0:
+; X32-NEXT: jmp testRecursion # TAILCALL
+;
+; X64-LABEL: testRecursion:
+; X64: # %bb.0:
+; X64-NEXT: jmp testRecursion # TAILCALL
+ %r = musttail call i32 @testRecursion(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7)
+ ret i32 %r
+}
+
+define dso_local i32 @swap(ptr byval(%struct.1xi32) %0, ptr byval(%struct.1xi32) %1) noinline {
+; X32-LABEL: swap:
+; X32: # %bb.0: # %entry
+; X32-NEXT: mov eax, dword ptr [esp + 4]
+; X32-NEXT: add eax, dword ptr [esp + 8]
+; X32-NEXT: ret
+;
+; X64-LABEL: swap:
+; X64: # %bb.0: # %entry
+; X64-NEXT: mov eax, dword ptr [rsp + 8]
+; X64-NEXT: add eax, dword ptr [rsp + 16]
+; X64-NEXT: ret
+entry:
+ %a.ptr = getelementptr inbounds %struct.1xi32, ptr %0, i32 0, i32 0, i32 0
+ %a = load i32, ptr %a.ptr, align 4
+ %b.ptr = getelementptr inbounds %struct.1xi32, ptr %1, i32 0, i32 0, i32 0
+ %b = load i32, ptr %b.ptr, align 4
+ %sum = add i32 %a, %b
+ ret i32 %sum
+}
+
+define dso_local i32 @swapByValArguments(ptr byval(%struct.1xi32) %0, ptr byval(%struct.1xi32) %1) {
+; X32-LABEL: swapByValArguments:
+; X32: # %bb.0:
+; X32-NEXT: sub esp, 8
+; X32-NEXT: .cfi_def_cfa_offset 12
+; X32-NEXT: mov eax, dword ptr [esp + 12]
+; X32-NEXT: mov dword ptr [esp], eax
+; X32-NEXT: mov eax, dword ptr [esp + 16]
+; X32-NEXT: mov d...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/176470
More information about the llvm-branch-commits
mailing list