[llvm] [AArch64][GlobalISel] Implement selectVaStartAAPCS (PR #106979)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 6 01:13:34 PDT 2024
https://github.com/Him188 updated https://github.com/llvm/llvm-project/pull/106979
>From 350019e8b178394de03ff0cab38375ba806c9dc0 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Mon, 19 Aug 2024 17:59:36 +0100
Subject: [PATCH 1/3] [AArch64][GlobalISel] Implement selectVaStartAAPCS
This commit adds the missing support for varargs in the instruction selection pass for AAPCS. Previously we only implemented this for Darwin.
The implementation was according to AAPCS and SelectionDAG's LowerAAPCS_VASTART.
It resolves all VA_START fallbacks in RAJAperf, llvm-test-suite, and SPEC CPU2017. These benchmarks now compile and pass without fallbacks due to varargs.
---
.../GISel/AArch64InstructionSelector.cpp | 103 +++-
.../CodeGen/AArch64/GlobalISel/vararg.mir | 325 ++++++++++
llvm/test/CodeGen/AArch64/vararg.ll | 569 ++++++++++++++++++
3 files changed, 996 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/vararg.mir
create mode 100644 llvm/test/CodeGen/AArch64/vararg.ll
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index e9e6b6cb68d0d1..29d1392b51090c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -1994,7 +1994,108 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
bool AArch64InstructionSelector::selectVaStartAAPCS(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
- return false;
+
+ if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
+ MF.getFunction().getCallingConv(), MF.getFunction().isVarArg()))
+ return false;
+
+ // The layout of the va_list struct is specified in the AArch64 Procedure Call
+ // Standard, section 10.1.5.
+
+ const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
+ const auto *PtrRegClass =
+ STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
+
+ const MCInstrDesc &MCIDAddAddr =
+ TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
+ const MCInstrDesc &MCIDStoreAddr =
+ TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
+
+ /*
+ * typedef struct va_list {
+ * void * stack; // next stack param
+ * void * gr_top; // end of GP arg reg save area
+ * void * vr_top; // end of FP/SIMD arg reg save area
+ * int gr_offs; // offset from gr_top to next GP register arg
+ * int vr_offs; // offset from vr_top to next FP/SIMD register arg
+ * } va_list;
+ */
+ const auto VAList = I.getOperand(0).getReg();
+
+ // Our current offset in bytes from the va_list struct (VAList).
+ unsigned OffsetBytes = 0;
+
+ // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
+ // and increment OffsetBytes by PtrSize.
+ const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
+ const Register Top = MRI.createVirtualRegister(PtrRegClass);
+ auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
+ .addDef(Top)
+ .addFrameIndex(FrameIndex)
+ .addImm(Imm)
+ .addImm(0);
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+
+ MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
+ .addUse(Top)
+ .addUse(VAList)
+ .addImm(OffsetBytes / PtrSize)
+ .addMemOperand(MF.getMachineMemOperand(
+ (*I.memoperands_begin())
+ ->getPointerInfo()
+ .getWithOffset(OffsetBytes),
+ MachineMemOperand::MOStore, PtrSize, Align(PtrSize)));
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+
+ OffsetBytes += PtrSize;
+ };
+
+ // void* stack at offset 0
+ PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
+
+ // void* gr_top at offset 8 (4 on ILP32)
+ const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
+ PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
+
+ // void* vr_top at offset 16 (8 on ILP32)
+ const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
+ PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
+
+ // Helper function to store a 4-byte integer constant to VAList at offset
+ // OffsetBytes, and increment OffsetBytes by 4.
+ const auto PushIntConstant = [&](const int32_t Value) {
+ constexpr int IntSize = 4;
+ const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ auto MIB =
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
+ .addDef(Temp)
+ .addImm(Value);
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+
+ MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
+ .addUse(Temp)
+ .addUse(VAList)
+ .addImm(OffsetBytes / IntSize)
+ .addMemOperand(MF.getMachineMemOperand(
+ (*I.memoperands_begin())
+ ->getPointerInfo()
+ .getWithOffset(OffsetBytes),
+ MachineMemOperand::MOStore, IntSize, Align(IntSize)));
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+ OffsetBytes += IntSize;
+ };
+
+ // int gr_offs at offset 24 (12 on ILP32)
+ PushIntConstant(-static_cast<int32_t>(GPRSize));
+
+ // int vr_offs at offset 28 (16 on ILP32)
+ PushIntConstant(-static_cast<int32_t>(FPRSize));
+
+ assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
+
+ I.eraseFromParent();
+ return true;
}
bool AArch64InstructionSelector::selectVaStartDarwin(
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/vararg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/vararg.mir
new file mode 100644
index 00000000000000..50911582eef0bc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/vararg.mir
@@ -0,0 +1,325 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=aarch64-unknown-linux -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=CHECK
+
+--- |
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+ target triple = "aarch64-unknown-linux-gnu"
+
+ %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
+ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #0
+
+ declare void @llvm.va_start.p0(ptr) #1
+
+ declare void @llvm.va_end.p0(ptr) #1
+
+ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #0
+
+ define float @vararg(ptr %a, ...) #2 {
+ entry:
+ %ap = alloca %struct.__va_list, align 8
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %ap)
+ call void @llvm.va_start.p0(ptr nonnull %ap)
+ %vr_offs_p = getelementptr inbounds i8, ptr %ap, i64 28
+ %vr_offs = load i32, ptr %vr_offs_p, align 4
+ %0 = sext i32 %vr_offs to i64
+ %1 = icmp sgt i32 %vr_offs, -1
+ br i1 %1, label %vaarg.on_stack, label %vaarg.maybe_reg
+
+ vaarg.maybe_reg: ; preds = %entry
+ %2 = trunc i64 %0 to i32
+ %3 = trunc i64 %0 to i32
+ %new_reg_offs = add nsw i32 %2, 16
+ %sunkaddr = getelementptr inbounds i8, ptr %ap, i64 28
+ store i32 %new_reg_offs, ptr %sunkaddr, align 4
+ %inreg = icmp ult i32 %3, -15
+ br i1 %inreg, label %vaarg.in_reg, label %vaarg.on_stack
+
+ vaarg.in_reg: ; preds = %vaarg.maybe_reg
+ %reg_top_p = getelementptr inbounds i8, ptr %ap, i64 16
+ %reg_top = load ptr, ptr %reg_top_p, align 8
+ %4 = getelementptr inbounds i8, ptr %reg_top, i64 %0
+ br label %vaarg.end
+
+ vaarg.on_stack: ; preds = %vaarg.maybe_reg, %entry
+ %stack = load ptr, ptr %ap, align 8
+ %new_stack = getelementptr inbounds i8, ptr %stack, i64 8
+ store ptr %new_stack, ptr %ap, align 8
+ br label %vaarg.end
+
+ vaarg.end: ; preds = %vaarg.on_stack, %vaarg.in_reg
+ %vaargs.addr = phi ptr [ %4, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ]
+ %5 = load double, ptr %vaargs.addr, align 8
+ %conv = fptrunc double %5 to float
+ call void @llvm.va_end.p0(ptr nonnull %ap)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %ap)
+ ret float %conv
+ }
+
+ attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) "target-cpu"="neoverse-v2" }
+ attributes #1 = { nocallback nofree nosync nounwind willreturn "target-cpu"="neoverse-v2" }
+ attributes #2 = { uwtable "frame-pointer"="all" "target-cpu"="neoverse-v2" }
+
+...
+---
+name: vararg
+alignment: 16
+exposesReturnsTwice: false
+legalized: true
+regBankSelected: true
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHCatchret: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+liveins:
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 16
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack:
+ - { id: 0, type: default, offset: 0, size: 4, alignment: 16, stack-id: default,
+ isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+stack:
+ - { id: 0, name: '', type: default, offset: 0, size: 56, alignment: 8,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: default, offset: 0, size: 128, alignment: 16,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 2, name: ap, type: default, offset: 0, size: 32, alignment: 8,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: vararg
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x50000000), %bb.1(0x30000000)
+ ; CHECK-NEXT: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: STRXui [[COPY]], %stack.0, 0 :: (store (s64) into stack + 8, align 1)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: STRXui [[COPY1]], %stack.0, 1 :: (store (s64) into stack + 16, align 1)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x3
+ ; CHECK-NEXT: STRXui [[COPY2]], %stack.0, 2 :: (store (s64) into stack + 24, align 1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x4
+ ; CHECK-NEXT: STRXui [[COPY3]], %stack.0, 3 :: (store (s64) into stack + 32, align 1)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY $x5
+ ; CHECK-NEXT: STRXui [[COPY4]], %stack.0, 4 :: (store (s64) into stack + 40, align 1)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64 = COPY $x6
+ ; CHECK-NEXT: STRXui [[COPY5]], %stack.0, 5 :: (store (s64) into stack + 48, align 1)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr64 = COPY $x7
+ ; CHECK-NEXT: STRXui [[COPY6]], %stack.0, 6 :: (store (s64) into stack + 56, align 1)
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-NEXT: STRQui [[COPY7]], %stack.1, 0 :: (store (s128) into stack, align 1)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:fpr128 = COPY $q1
+ ; CHECK-NEXT: STRQui [[COPY8]], %stack.1, 1 :: (store (s128) into stack + 16, align 1)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:fpr128 = COPY $q2
+ ; CHECK-NEXT: STRQui [[COPY9]], %stack.1, 2 :: (store (s128) into stack + 32, align 1)
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:fpr128 = COPY $q3
+ ; CHECK-NEXT: STRQui [[COPY10]], %stack.1, 3 :: (store (s128) into stack + 48, align 1)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:fpr128 = COPY $q4
+ ; CHECK-NEXT: STRQui [[COPY11]], %stack.1, 4 :: (store (s128) into stack + 64, align 1)
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:fpr128 = COPY $q5
+ ; CHECK-NEXT: STRQui [[COPY12]], %stack.1, 5 :: (store (s128) into stack + 80, align 1)
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:fpr128 = COPY $q6
+ ; CHECK-NEXT: STRQui [[COPY13]], %stack.1, 6 :: (store (s128) into stack + 96, align 1)
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:fpr128 = COPY $q7
+ ; CHECK-NEXT: STRQui [[COPY14]], %stack.1, 7 :: (store (s128) into stack + 112, align 1)
+ ; CHECK-NEXT: LIFETIME_START %stack.2.ap
+ ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.2.ap, 0, 0
+ ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
+ ; CHECK-NEXT: STRXui [[ADDXri1]], [[ADDXri]], 0 :: (store (s64) into %ir.ap)
+ ; CHECK-NEXT: [[ADDXri2:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
+ ; CHECK-NEXT: STRXui [[ADDXri2]], [[ADDXri]], 1 :: (store (s64) into %ir.ap + 8)
+ ; CHECK-NEXT: [[ADDXri3:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
+ ; CHECK-NEXT: STRXui [[ADDXri3]], [[ADDXri]], 2 :: (store (s64) into %ir.ap + 16)
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0
+ ; CHECK-NEXT: STRWui [[MOVi32imm]], [[ADDXri]], 6 :: (store (s32) into %ir.ap + 24)
+ ; CHECK-NEXT: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 0
+ ; CHECK-NEXT: STRWui [[MOVi32imm1]], [[ADDXri]], 7 :: (store (s32) into %ir.ap + 28)
+ ; CHECK-NEXT: [[LDRSWui:%[0-9]+]]:gpr64common = LDRSWui %stack.2.ap, 7 :: (dereferenceable load (s32) from %ir.vr_offs_p)
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:gpr32common = COPY [[LDRSWui]].sub_32
+ ; CHECK-NEXT: TBZW [[COPY15]], 31, %bb.3
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.vaarg.maybe_reg:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:gpr32sp = COPY [[LDRSWui]].sub_32
+ ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = nsw ADDWri [[COPY16]], 16, 0
+ ; CHECK-NEXT: STRWui [[ADDWri]], %stack.2.ap, 7 :: (store (s32) into %ir.sunkaddr)
+ ; CHECK-NEXT: [[ADDSWri:%[0-9]+]]:gpr32 = ADDSWri [[COPY16]], 15, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 2, %bb.3, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.vaarg.in_reg:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui %stack.2.ap, 2 :: (dereferenceable load (p0) from %ir.reg_top_p)
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[LDRSWui]]
+ ; CHECK-NEXT: B %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.vaarg.on_stack:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui %stack.2.ap, 0 :: (dereferenceable load (p0) from %ir.ap)
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:gpr64common = COPY [[LDRXui1]]
+ ; CHECK-NEXT: [[ADDXri4:%[0-9]+]]:gpr64sp = ADDXri [[COPY17]], 8, 0
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:gpr64 = COPY [[ADDXri4]]
+ ; CHECK-NEXT: STRXui [[COPY18]], %stack.2.ap, 0 :: (store (p0) into %ir.ap)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.vaarg.end:
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr64sp = PHI [[ADDXrr]], %bb.2, [[LDRXui1]], %bb.3
+ ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[PHI]], 0 :: (load (s64) from %ir.vaargs.addr)
+ ; CHECK-NEXT: [[FCVTSDr:%[0-9]+]]:fpr32 = nofpexcept FCVTSDr [[LDRDui]], implicit $fpcr
+ ; CHECK-NEXT: LIFETIME_END %stack.2.ap
+ ; CHECK-NEXT: $s0 = COPY [[FCVTSDr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
+ bb.1.entry:
+ successors: %bb.4(0x50000000), %bb.2(0x30000000)
+ liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
+
+ %3:gpr(s64) = COPY $x1
+ %1:gpr(p0) = G_FRAME_INDEX %stack.0
+ G_STORE %3(s64), %1(p0) :: (store (s64) into stack + 8, align 1)
+ %5:gpr(s64) = COPY $x2
+ %2:gpr(s64) = G_CONSTANT i64 8
+ %4:gpr(p0) = G_PTR_ADD %1, %2(s64)
+ G_STORE %5(s64), %4(p0) :: (store (s64) into stack + 16, align 1)
+ %7:gpr(s64) = COPY $x3
+ %18:gpr(s64) = G_CONSTANT i64 16
+ %6:gpr(p0) = G_PTR_ADD %1, %18(s64)
+ G_STORE %7(s64), %6(p0) :: (store (s64) into stack + 24, align 1)
+ %9:gpr(s64) = COPY $x4
+ %61:gpr(s64) = G_CONSTANT i64 24
+ %8:gpr(p0) = G_PTR_ADD %1, %61(s64)
+ G_STORE %9(s64), %8(p0) :: (store (s64) into stack + 32, align 1)
+ %11:gpr(s64) = COPY $x5
+ %62:gpr(s64) = G_CONSTANT i64 32
+ %10:gpr(p0) = G_PTR_ADD %1, %62(s64)
+ G_STORE %11(s64), %10(p0) :: (store (s64) into stack + 40, align 1)
+ %13:gpr(s64) = COPY $x6
+ %63:gpr(s64) = G_CONSTANT i64 40
+ %12:gpr(p0) = G_PTR_ADD %1, %63(s64)
+ G_STORE %13(s64), %12(p0) :: (store (s64) into stack + 48, align 1)
+ %15:gpr(s64) = COPY $x7
+ %64:gpr(s64) = G_CONSTANT i64 48
+ %14:gpr(p0) = G_PTR_ADD %1, %64(s64)
+ G_STORE %15(s64), %14(p0) :: (store (s64) into stack + 56, align 1)
+ %19:fpr(s128) = COPY $q0
+ %17:gpr(p0) = G_FRAME_INDEX %stack.1
+ G_STORE %19(s128), %17(p0) :: (store (s128) into stack, align 1)
+ %21:fpr(s128) = COPY $q1
+ %20:gpr(p0) = G_PTR_ADD %17, %18(s64)
+ G_STORE %21(s128), %20(p0) :: (store (s128) into stack + 16, align 1)
+ %23:fpr(s128) = COPY $q2
+ %22:gpr(p0) = G_PTR_ADD %17, %62(s64)
+ G_STORE %23(s128), %22(p0) :: (store (s128) into stack + 32, align 1)
+ %25:fpr(s128) = COPY $q3
+ %24:gpr(p0) = G_PTR_ADD %17, %64(s64)
+ G_STORE %25(s128), %24(p0) :: (store (s128) into stack + 48, align 1)
+ %27:fpr(s128) = COPY $q4
+ %65:gpr(s64) = G_CONSTANT i64 64
+ %26:gpr(p0) = G_PTR_ADD %17, %65(s64)
+ G_STORE %27(s128), %26(p0) :: (store (s128) into stack + 64, align 1)
+ %29:fpr(s128) = COPY $q5
+ %66:gpr(s64) = G_CONSTANT i64 80
+ %28:gpr(p0) = G_PTR_ADD %17, %66(s64)
+ G_STORE %29(s128), %28(p0) :: (store (s128) into stack + 80, align 1)
+ %31:fpr(s128) = COPY $q6
+ %67:gpr(s64) = G_CONSTANT i64 96
+ %30:gpr(p0) = G_PTR_ADD %17, %67(s64)
+ G_STORE %31(s128), %30(p0) :: (store (s128) into stack + 96, align 1)
+ %33:fpr(s128) = COPY $q7
+ %68:gpr(s64) = G_CONSTANT i64 112
+ %32:gpr(p0) = G_PTR_ADD %17, %68(s64)
+ G_STORE %33(s128), %32(p0) :: (store (s128) into stack + 112, align 1)
+ LIFETIME_START %stack.2.ap
+ %35:gpr(p0) = G_FRAME_INDEX %stack.2.ap
+ G_VASTART %35(p0) :: (store (s256) into %ir.ap, align 8)
+ %36:gpr(s64) = G_CONSTANT i64 28
+ %37:gpr(p0) = G_PTR_ADD %35, %36(s64)
+ %39:gpr(s64) = G_SEXTLOAD %37(p0) :: (dereferenceable load (s32) from %ir.vr_offs_p)
+ %69:gpr(s32) = G_TRUNC %39(s64)
+ %81:gpr(s32) = G_CONSTANT i32 0
+ %80:gpr(s32) = G_ICMP intpred(sge), %69(s32), %81
+ G_BRCOND %80(s32), %bb.4
+ G_BR %bb.2
+
+ bb.2.vaarg.maybe_reg:
+ successors: %bb.3(0x40000000), %bb.4(0x40000000)
+
+ %42:gpr(s32) = G_TRUNC %39(s64)
+ %76:gpr(s32) = G_CONSTANT i32 16
+ %45:gpr(s32) = nsw G_ADD %42, %76
+ %46:gpr(s64) = G_CONSTANT i64 28
+ %72:gpr(p0) = G_FRAME_INDEX %stack.2.ap
+ %47:gpr(p0) = G_PTR_ADD %72, %46(s64)
+ G_STORE %45(s32), %47(p0) :: (store (s32) into %ir.sunkaddr)
+ %75:gpr(s32) = G_CONSTANT i32 -15
+ %78:gpr(s32) = G_ICMP intpred(uge), %42(s32), %75
+ G_BRCOND %78(s32), %bb.4
+ G_BR %bb.3
+
+ bb.3.vaarg.in_reg:
+ successors: %bb.5(0x80000000)
+
+ %50:gpr(s64) = G_CONSTANT i64 16
+ %73:gpr(p0) = G_FRAME_INDEX %stack.2.ap
+ %51:gpr(p0) = G_PTR_ADD %73, %50(s64)
+ %52:gpr(p0) = G_LOAD %51(p0) :: (dereferenceable load (p0) from %ir.reg_top_p)
+ %53:gpr(p0) = G_PTR_ADD %52, %39(s64)
+ G_BR %bb.5
+
+ bb.4.vaarg.on_stack:
+ successors: %bb.5(0x80000000)
+
+ %74:gpr(p0) = G_FRAME_INDEX %stack.2.ap
+ %55:gpr(p0) = G_LOAD %74(p0) :: (dereferenceable load (p0) from %ir.ap)
+ %56:gpr(s64) = G_CONSTANT i64 8
+ %57:gpr(p0) = G_PTR_ADD %55, %56(s64)
+ G_STORE %57(p0), %74(p0) :: (store (p0) into %ir.ap)
+
+ bb.5.vaarg.end:
+ %58:gpr(p0) = G_PHI %53(p0), %bb.3, %55(p0), %bb.4
+ %59:fpr(s64) = G_LOAD %58(p0) :: (load (s64) from %ir.vaargs.addr)
+ %60:fpr(s32) = G_FPTRUNC %59(s64)
+ LIFETIME_END %stack.2.ap
+ $s0 = COPY %60(s32)
+ RET_ReallyLR implicit $s0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/vararg.ll b/llvm/test/CodeGen/AArch64/vararg.ll
new file mode 100644
index 00000000000000..fa9cdf4fc1bcaa
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/vararg.ll
@@ -0,0 +1,569 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -O0 -global-isel=0 -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -O0 -global-isel=1 -global-isel-abort=1 -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+
+%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
+declare void @llvm.va_start(ptr) nounwind
+declare void @llvm.va_end(ptr) nounwind
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+declare void @llvm.va_start.p0(ptr)
+declare void @llvm.va_end.p0(ptr)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+; To make the outputs more readable
+attributes #0 = { uwtable "frame-pointer"="all" }
+
+define i64 @vararg(...) #0 {
+; CHECK-SD-LABEL: vararg:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #224
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 224
+; CHECK-SD-NEXT: stp x29, x30, [sp, #208] // 16-byte Folded Spill
+; CHECK-SD-NEXT: add x29, sp, #208
+; CHECK-SD-NEXT: .cfi_def_cfa w29, 16
+; CHECK-SD-NEXT: .cfi_offset w30, -8
+; CHECK-SD-NEXT: .cfi_offset w29, -16
+; CHECK-SD-NEXT: str q7, [sp, #112]
+; CHECK-SD-NEXT: str q6, [sp, #96]
+; CHECK-SD-NEXT: str q5, [sp, #80]
+; CHECK-SD-NEXT: str q4, [sp, #64]
+; CHECK-SD-NEXT: str q3, [sp, #48]
+; CHECK-SD-NEXT: str q2, [sp, #32]
+; CHECK-SD-NEXT: str q1, [sp, #16]
+; CHECK-SD-NEXT: str q0, [sp]
+; CHECK-SD-NEXT: stur x7, [x29, #-16]
+; CHECK-SD-NEXT: stur x6, [x29, #-24]
+; CHECK-SD-NEXT: stur x5, [x29, #-32]
+; CHECK-SD-NEXT: stur x4, [x29, #-40]
+; CHECK-SD-NEXT: stur x3, [x29, #-48]
+; CHECK-SD-NEXT: stur x2, [x29, #-56]
+; CHECK-SD-NEXT: stur x1, [x29, #-64]
+; CHECK-SD-NEXT: stur x0, [x29, #-72]
+; CHECK-SD-NEXT: mov w8, #-128 // =0xffffff80
+; CHECK-SD-NEXT: str w8, [x29, #20]
+; CHECK-SD-NEXT: mov w8, #-64 // =0xffffffc0
+; CHECK-SD-NEXT: str w8, [x29, #16]
+; CHECK-SD-NEXT: add x8, x29, #16
+; CHECK-SD-NEXT: stur x8, [x29, #-8]
+; CHECK-SD-NEXT: mov x8, sp
+; CHECK-SD-NEXT: add x8, x8, #128
+; CHECK-SD-NEXT: str x8, [x29, #8]
+; CHECK-SD-NEXT: sub x8, x29, #72
+; CHECK-SD-NEXT: add x8, x8, #64
+; CHECK-SD-NEXT: str x8, [x29]
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: mov w0, w8
+; CHECK-SD-NEXT: .cfi_def_cfa wsp, 224
+; CHECK-SD-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #224
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 0
+; CHECK-SD-NEXT: .cfi_restore w30
+; CHECK-SD-NEXT: .cfi_restore w29
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vararg:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #224
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 224
+; CHECK-GI-NEXT: stp x29, x30, [sp, #208] // 16-byte Folded Spill
+; CHECK-GI-NEXT: add x29, sp, #208
+; CHECK-GI-NEXT: .cfi_def_cfa w29, 16
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset w29, -16
+; CHECK-GI-NEXT: stur x0, [x29, #-64]
+; CHECK-GI-NEXT: stur x1, [x29, #-56]
+; CHECK-GI-NEXT: stur x2, [x29, #-48]
+; CHECK-GI-NEXT: stur x3, [x29, #-40]
+; CHECK-GI-NEXT: stur x4, [x29, #-32]
+; CHECK-GI-NEXT: stur x5, [x29, #-24]
+; CHECK-GI-NEXT: stur x6, [x29, #-16]
+; CHECK-GI-NEXT: stur x7, [x29, #-8]
+; CHECK-GI-NEXT: str q0, [sp, #16]
+; CHECK-GI-NEXT: str q1, [sp, #32]
+; CHECK-GI-NEXT: str q2, [sp, #48]
+; CHECK-GI-NEXT: str q3, [sp, #64]
+; CHECK-GI-NEXT: str q4, [sp, #80]
+; CHECK-GI-NEXT: str q5, [sp, #96]
+; CHECK-GI-NEXT: str q6, [sp, #112]
+; CHECK-GI-NEXT: str q7, [sp, #128]
+; CHECK-GI-NEXT: add x9, sp, #8
+; CHECK-GI-NEXT: add x8, x29, #16
+; CHECK-GI-NEXT: str x8, [x9]
+; CHECK-GI-NEXT: add x8, x29, #0
+; CHECK-GI-NEXT: str x8, [x9, #8]
+; CHECK-GI-NEXT: add x8, sp, #144
+; CHECK-GI-NEXT: str x8, [x9, #16]
+; CHECK-GI-NEXT: mov w8, #-64 // =0xffffffc0
+; CHECK-GI-NEXT: str w8, [x9, #24]
+; CHECK-GI-NEXT: mov w8, #-128 // =0xffffff80
+; CHECK-GI-NEXT: str w8, [x9, #28]
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: mov w0, w8
+; CHECK-GI-NEXT: .cfi_def_cfa wsp, 224
+; CHECK-GI-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #224
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 0
+; CHECK-GI-NEXT: .cfi_restore w30
+; CHECK-GI-NEXT: .cfi_restore w29
+; CHECK-GI-NEXT: ret
+entry:
+ %g = alloca ptr, align 4
+ call void @llvm.va_start(ptr %g)
+ ret i64 1
+}
+
+define i64 @vararg_many_gpr(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, ...) #0 {
+; CHECK-SD-LABEL: vararg_many_gpr:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #160
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 160
+; CHECK-SD-NEXT: stp x29, x30, [sp, #144] // 16-byte Folded Spill
+; CHECK-SD-NEXT: add x29, sp, #144
+; CHECK-SD-NEXT: .cfi_def_cfa w29, 16
+; CHECK-SD-NEXT: .cfi_offset w30, -8
+; CHECK-SD-NEXT: .cfi_offset w29, -16
+; CHECK-SD-NEXT: str q7, [sp, #112]
+; CHECK-SD-NEXT: str q6, [sp, #96]
+; CHECK-SD-NEXT: str q5, [sp, #80]
+; CHECK-SD-NEXT: str q4, [sp, #64]
+; CHECK-SD-NEXT: str q3, [sp, #48]
+; CHECK-SD-NEXT: str q2, [sp, #32]
+; CHECK-SD-NEXT: str q1, [sp, #16]
+; CHECK-SD-NEXT: str q0, [sp]
+; CHECK-SD-NEXT: stur x7, [x29, #-16]
+; CHECK-SD-NEXT: mov w8, #-128 // =0xffffff80
+; CHECK-SD-NEXT: str w8, [x29, #20]
+; CHECK-SD-NEXT: mov w8, #-8 // =0xfffffff8
+; CHECK-SD-NEXT: str w8, [x29, #16]
+; CHECK-SD-NEXT: add x8, x29, #16
+; CHECK-SD-NEXT: stur x8, [x29, #-8]
+; CHECK-SD-NEXT: mov x8, sp
+; CHECK-SD-NEXT: add x8, x8, #128
+; CHECK-SD-NEXT: str x8, [x29, #8]
+; CHECK-SD-NEXT: sub x8, x29, #16
+; CHECK-SD-NEXT: add x8, x8, #8
+; CHECK-SD-NEXT: str x8, [x29]
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: mov w0, w8
+; CHECK-SD-NEXT: .cfi_def_cfa wsp, 160
+; CHECK-SD-NEXT: ldp x29, x30, [sp, #144] // 16-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #160
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 0
+; CHECK-SD-NEXT: .cfi_restore w30
+; CHECK-SD-NEXT: .cfi_restore w29
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vararg_many_gpr:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #176
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 176
+; CHECK-GI-NEXT: stp x29, x30, [sp, #160] // 16-byte Folded Spill
+; CHECK-GI-NEXT: add x29, sp, #160
+; CHECK-GI-NEXT: .cfi_def_cfa w29, 16
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset w29, -16
+; CHECK-GI-NEXT: stur x7, [x29, #-8]
+; CHECK-GI-NEXT: str q0, [sp, #16]
+; CHECK-GI-NEXT: str q1, [sp, #32]
+; CHECK-GI-NEXT: str q2, [sp, #48]
+; CHECK-GI-NEXT: str q3, [sp, #64]
+; CHECK-GI-NEXT: str q4, [sp, #80]
+; CHECK-GI-NEXT: str q5, [sp, #96]
+; CHECK-GI-NEXT: str q6, [sp, #112]
+; CHECK-GI-NEXT: str q7, [sp, #128]
+; CHECK-GI-NEXT: add x9, sp, #8
+; CHECK-GI-NEXT: add x8, x29, #16
+; CHECK-GI-NEXT: str x8, [x9]
+; CHECK-GI-NEXT: add x8, x29, #0
+; CHECK-GI-NEXT: str x8, [x9, #8]
+; CHECK-GI-NEXT: add x8, sp, #144
+; CHECK-GI-NEXT: str x8, [x9, #16]
+; CHECK-GI-NEXT: mov w8, #-8 // =0xfffffff8
+; CHECK-GI-NEXT: str w8, [x9, #24]
+; CHECK-GI-NEXT: mov w8, #-128 // =0xffffff80
+; CHECK-GI-NEXT: str w8, [x9, #28]
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: mov w0, w8
+; CHECK-GI-NEXT: .cfi_def_cfa wsp, 176
+; CHECK-GI-NEXT: ldp x29, x30, [sp, #160] // 16-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #176
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 0
+; CHECK-GI-NEXT: .cfi_restore w30
+; CHECK-GI-NEXT: .cfi_restore w29
+; CHECK-GI-NEXT: ret
+entry:
+ %g = alloca ptr, align 4
+ call void @llvm.va_start(ptr %g)
+ ret i64 1
+}
+
+define i64 @vararg_many_float(float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, ...) #0 {
+; CHECK-SD-LABEL: vararg_many_float:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #112
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 112
+; CHECK-SD-NEXT: stp x29, x30, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT: add x29, sp, #96
+; CHECK-SD-NEXT: .cfi_def_cfa w29, 16
+; CHECK-SD-NEXT: .cfi_offset w30, -8
+; CHECK-SD-NEXT: .cfi_offset w29, -16
+; CHECK-SD-NEXT: str q7, [sp]
+; CHECK-SD-NEXT: str x7, [sp, #80]
+; CHECK-SD-NEXT: str x6, [sp, #72]
+; CHECK-SD-NEXT: str x5, [sp, #64]
+; CHECK-SD-NEXT: str x4, [sp, #56]
+; CHECK-SD-NEXT: str x3, [sp, #48]
+; CHECK-SD-NEXT: str x2, [sp, #40]
+; CHECK-SD-NEXT: str x1, [sp, #32]
+; CHECK-SD-NEXT: str x0, [sp, #24]
+; CHECK-SD-NEXT: mov w8, #-16 // =0xfffffff0
+; CHECK-SD-NEXT: str w8, [x29, #20]
+; CHECK-SD-NEXT: mov w8, #-64 // =0xffffffc0
+; CHECK-SD-NEXT: str w8, [x29, #16]
+; CHECK-SD-NEXT: add x8, x29, #16
+; CHECK-SD-NEXT: stur x8, [x29, #-8]
+; CHECK-SD-NEXT: mov x8, sp
+; CHECK-SD-NEXT: add x8, x8, #16
+; CHECK-SD-NEXT: str x8, [x29, #8]
+; CHECK-SD-NEXT: add x8, sp, #24
+; CHECK-SD-NEXT: add x8, x8, #64
+; CHECK-SD-NEXT: str x8, [x29]
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: mov w0, w8
+; CHECK-SD-NEXT: .cfi_def_cfa wsp, 112
+; CHECK-SD-NEXT: ldp x29, x30, [sp, #96] // 16-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #112
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 0
+; CHECK-SD-NEXT: .cfi_restore w30
+; CHECK-SD-NEXT: .cfi_restore w29
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vararg_many_float:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #112
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 112
+; CHECK-GI-NEXT: stp x29, x30, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT: add x29, sp, #96
+; CHECK-GI-NEXT: .cfi_def_cfa w29, 16
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset w29, -16
+; CHECK-GI-NEXT: str x0, [sp, #32]
+; CHECK-GI-NEXT: str x1, [sp, #40]
+; CHECK-GI-NEXT: str x2, [sp, #48]
+; CHECK-GI-NEXT: str x3, [sp, #56]
+; CHECK-GI-NEXT: str x4, [sp, #64]
+; CHECK-GI-NEXT: str x5, [sp, #72]
+; CHECK-GI-NEXT: str x6, [sp, #80]
+; CHECK-GI-NEXT: str x7, [sp, #88]
+; CHECK-GI-NEXT: str q7, [sp, #16]
+; CHECK-GI-NEXT: add x9, sp, #8
+; CHECK-GI-NEXT: add x8, x29, #16
+; CHECK-GI-NEXT: str x8, [x9]
+; CHECK-GI-NEXT: add x8, sp, #96
+; CHECK-GI-NEXT: str x8, [x9, #8]
+; CHECK-GI-NEXT: add x8, sp, #32
+; CHECK-GI-NEXT: str x8, [x9, #16]
+; CHECK-GI-NEXT: mov w8, #-64 // =0xffffffc0
+; CHECK-GI-NEXT: str w8, [x9, #24]
+; CHECK-GI-NEXT: mov w8, #-16 // =0xfffffff0
+; CHECK-GI-NEXT: str w8, [x9, #28]
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: mov w0, w8
+; CHECK-GI-NEXT: .cfi_def_cfa wsp, 112
+; CHECK-GI-NEXT: ldp x29, x30, [sp, #96] // 16-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #112
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 0
+; CHECK-GI-NEXT: .cfi_restore w30
+; CHECK-GI-NEXT: .cfi_restore w29
+; CHECK-GI-NEXT: ret
+entry:
+ %g = alloca ptr, align 4
+ call void @llvm.va_start(ptr %g)
+ ret i64 1
+}
+
+define i64 @gpr1_fpr1(i32 %i, float %f, ...) #0 {
+; CHECK-SD-LABEL: gpr1_fpr1:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #192
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 192
+; CHECK-SD-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill
+; CHECK-SD-NEXT: add x29, sp, #176
+; CHECK-SD-NEXT: .cfi_def_cfa w29, 16
+; CHECK-SD-NEXT: .cfi_offset w30, -8
+; CHECK-SD-NEXT: .cfi_offset w29, -16
+; CHECK-SD-NEXT: str q7, [sp, #96]
+; CHECK-SD-NEXT: str q6, [sp, #80]
+; CHECK-SD-NEXT: str q5, [sp, #64]
+; CHECK-SD-NEXT: str q4, [sp, #48]
+; CHECK-SD-NEXT: str q3, [sp, #32]
+; CHECK-SD-NEXT: str q2, [sp, #16]
+; CHECK-SD-NEXT: str q1, [sp]
+; CHECK-SD-NEXT: stur x7, [x29, #-16]
+; CHECK-SD-NEXT: stur x6, [x29, #-24]
+; CHECK-SD-NEXT: stur x5, [x29, #-32]
+; CHECK-SD-NEXT: stur x4, [x29, #-40]
+; CHECK-SD-NEXT: stur x3, [x29, #-48]
+; CHECK-SD-NEXT: stur x2, [x29, #-56]
+; CHECK-SD-NEXT: stur x1, [x29, #-64]
+; CHECK-SD-NEXT: mov w8, #-112 // =0xffffff90
+; CHECK-SD-NEXT: str w8, [x29, #20]
+; CHECK-SD-NEXT: mov w8, #-56 // =0xffffffc8
+; CHECK-SD-NEXT: str w8, [x29, #16]
+; CHECK-SD-NEXT: add x8, x29, #16
+; CHECK-SD-NEXT: stur x8, [x29, #-8]
+; CHECK-SD-NEXT: mov x8, sp
+; CHECK-SD-NEXT: add x8, x8, #112
+; CHECK-SD-NEXT: str x8, [x29, #8]
+; CHECK-SD-NEXT: sub x8, x29, #64
+; CHECK-SD-NEXT: add x8, x8, #56
+; CHECK-SD-NEXT: str x8, [x29]
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: mov w0, w8
+; CHECK-SD-NEXT: .cfi_def_cfa wsp, 192
+; CHECK-SD-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #192
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 0
+; CHECK-SD-NEXT: .cfi_restore w30
+; CHECK-SD-NEXT: .cfi_restore w29
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: gpr1_fpr1:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #208
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 208
+; CHECK-GI-NEXT: stp x29, x30, [sp, #192] // 16-byte Folded Spill
+; CHECK-GI-NEXT: add x29, sp, #192
+; CHECK-GI-NEXT: .cfi_def_cfa w29, 16
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset w29, -16
+; CHECK-GI-NEXT: stur x1, [x29, #-56]
+; CHECK-GI-NEXT: stur x2, [x29, #-48]
+; CHECK-GI-NEXT: stur x3, [x29, #-40]
+; CHECK-GI-NEXT: stur x4, [x29, #-32]
+; CHECK-GI-NEXT: stur x5, [x29, #-24]
+; CHECK-GI-NEXT: stur x6, [x29, #-16]
+; CHECK-GI-NEXT: stur x7, [x29, #-8]
+; CHECK-GI-NEXT: str q1, [sp, #16]
+; CHECK-GI-NEXT: str q2, [sp, #32]
+; CHECK-GI-NEXT: str q3, [sp, #48]
+; CHECK-GI-NEXT: str q4, [sp, #64]
+; CHECK-GI-NEXT: str q5, [sp, #80]
+; CHECK-GI-NEXT: str q6, [sp, #96]
+; CHECK-GI-NEXT: str q7, [sp, #112]
+; CHECK-GI-NEXT: add x9, sp, #8
+; CHECK-GI-NEXT: add x8, x29, #16
+; CHECK-GI-NEXT: str x8, [x9]
+; CHECK-GI-NEXT: add x8, x29, #0
+; CHECK-GI-NEXT: str x8, [x9, #8]
+; CHECK-GI-NEXT: add x8, sp, #128
+; CHECK-GI-NEXT: str x8, [x9, #16]
+; CHECK-GI-NEXT: mov w8, #-56 // =0xffffffc8
+; CHECK-GI-NEXT: str w8, [x9, #24]
+; CHECK-GI-NEXT: mov w8, #-112 // =0xffffff90
+; CHECK-GI-NEXT: str w8, [x9, #28]
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: mov w0, w8
+; CHECK-GI-NEXT: .cfi_def_cfa wsp, 208
+; CHECK-GI-NEXT: ldp x29, x30, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #208
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 0
+; CHECK-GI-NEXT: .cfi_restore w30
+; CHECK-GI-NEXT: .cfi_restore w29
+; CHECK-GI-NEXT: ret
+entry:
+ %g = alloca ptr, align 4
+ call void @llvm.va_start(ptr %g)
+ ret i64 1
+}
+
+; A real program case
+define float @vararg_program(ptr %a, ...) #0 {
+; CHECK-SD-LABEL: vararg_program:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #272
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 272
+; CHECK-SD-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill
+; CHECK-SD-NEXT: add x29, sp, #256
+; CHECK-SD-NEXT: .cfi_def_cfa w29, 16
+; CHECK-SD-NEXT: .cfi_offset w30, -8
+; CHECK-SD-NEXT: .cfi_offset w29, -16
+; CHECK-SD-NEXT: str q7, [sp, #144]
+; CHECK-SD-NEXT: str q6, [sp, #128]
+; CHECK-SD-NEXT: str q5, [sp, #112]
+; CHECK-SD-NEXT: str q4, [sp, #96]
+; CHECK-SD-NEXT: str q3, [sp, #80]
+; CHECK-SD-NEXT: str q2, [sp, #64]
+; CHECK-SD-NEXT: str q1, [sp, #48]
+; CHECK-SD-NEXT: str q0, [sp, #32]
+; CHECK-SD-NEXT: stur x7, [x29, #-40]
+; CHECK-SD-NEXT: stur x6, [x29, #-48]
+; CHECK-SD-NEXT: stur x5, [x29, #-56]
+; CHECK-SD-NEXT: stur x4, [x29, #-64]
+; CHECK-SD-NEXT: stur x3, [x29, #-72]
+; CHECK-SD-NEXT: stur x2, [x29, #-80]
+; CHECK-SD-NEXT: stur x1, [x29, #-88]
+; CHECK-SD-NEXT: mov w8, #-128 // =0xffffff80
+; CHECK-SD-NEXT: stur w8, [x29, #-4]
+; CHECK-SD-NEXT: mov w8, #-56 // =0xffffffc8
+; CHECK-SD-NEXT: stur w8, [x29, #-8]
+; CHECK-SD-NEXT: add x8, x29, #16
+; CHECK-SD-NEXT: stur x8, [x29, #-32]
+; CHECK-SD-NEXT: add x8, sp, #32
+; CHECK-SD-NEXT: add x8, x8, #128
+; CHECK-SD-NEXT: stur x8, [x29, #-16]
+; CHECK-SD-NEXT: sub x8, x29, #88
+; CHECK-SD-NEXT: add x8, x8, #56
+; CHECK-SD-NEXT: stur x8, [x29, #-24]
+; CHECK-SD-NEXT: sub x8, x29, #32
+; CHECK-SD-NEXT: add x8, x8, #28
+; CHECK-SD-NEXT: str x8, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: ldur w8, [x29, #-4]
+; CHECK-SD-NEXT: str w8, [sp, #28] // 4-byte Folded Spill
+; CHECK-SD-NEXT: tbz w8, #31, .LBB4_3
+; CHECK-SD-NEXT: // %bb.1: // %vaarg.maybe_reg
+; CHECK-SD-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload
+; CHECK-SD-NEXT: ldr x10, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add w9, w8, #16
+; CHECK-SD-NEXT: str w9, [x10]
+; CHECK-SD-NEXT: mov w9, #-15 // =0xfffffff1
+; CHECK-SD-NEXT: cmp w8, w9
+; CHECK-SD-NEXT: b.hs .LBB4_3
+; CHECK-SD-NEXT: // %bb.2: // %vaarg.in_reg
+; CHECK-SD-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload
+; CHECK-SD-NEXT: ldur x8, [x29, #-16]
+; CHECK-SD-NEXT: // kill: def $x9 killed $w9
+; CHECK-SD-NEXT: sxtw x9, w9
+; CHECK-SD-NEXT: add x8, x8, x9
+; CHECK-SD-NEXT: str x8, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT: b .LBB4_4
+; CHECK-SD-NEXT: .LBB4_3: // %vaarg.on_stack
+; CHECK-SD-NEXT: ldur x8, [x29, #-32]
+; CHECK-SD-NEXT: add x9, x8, #8
+; CHECK-SD-NEXT: stur x9, [x29, #-32]
+; CHECK-SD-NEXT: str x8, [sp, #8] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .LBB4_4: // %vaarg.end
+; CHECK-SD-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload
+; CHECK-SD-NEXT: ldr d0, [x8]
+; CHECK-SD-NEXT: fcvt s0, d0
+; CHECK-SD-NEXT: .cfi_def_cfa wsp, 272
+; CHECK-SD-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #272
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 0
+; CHECK-SD-NEXT: .cfi_restore w30
+; CHECK-SD-NEXT: .cfi_restore w29
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vararg_program:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #256
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 256
+; CHECK-GI-NEXT: stp x29, x30, [sp, #240] // 16-byte Folded Spill
+; CHECK-GI-NEXT: add x29, sp, #240
+; CHECK-GI-NEXT: .cfi_def_cfa w29, 16
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset w29, -16
+; CHECK-GI-NEXT: stur x1, [x29, #-56]
+; CHECK-GI-NEXT: stur x2, [x29, #-48]
+; CHECK-GI-NEXT: stur x3, [x29, #-40]
+; CHECK-GI-NEXT: stur x4, [x29, #-32]
+; CHECK-GI-NEXT: stur x5, [x29, #-24]
+; CHECK-GI-NEXT: stur x6, [x29, #-16]
+; CHECK-GI-NEXT: stur x7, [x29, #-8]
+; CHECK-GI-NEXT: str q0, [sp, #48]
+; CHECK-GI-NEXT: str q1, [sp, #64]
+; CHECK-GI-NEXT: str q2, [sp, #80]
+; CHECK-GI-NEXT: str q3, [sp, #96]
+; CHECK-GI-NEXT: str q4, [sp, #112]
+; CHECK-GI-NEXT: str q5, [sp, #128]
+; CHECK-GI-NEXT: str q6, [sp, #144]
+; CHECK-GI-NEXT: str q7, [sp, #160]
+; CHECK-GI-NEXT: add x9, sp, #16
+; CHECK-GI-NEXT: add x8, x29, #16
+; CHECK-GI-NEXT: str x8, [x9]
+; CHECK-GI-NEXT: add x8, x29, #0
+; CHECK-GI-NEXT: str x8, [x9, #8]
+; CHECK-GI-NEXT: add x8, sp, #176
+; CHECK-GI-NEXT: str x8, [x9, #16]
+; CHECK-GI-NEXT: mov w8, #-56 // =0xffffffc8
+; CHECK-GI-NEXT: str w8, [x9, #24]
+; CHECK-GI-NEXT: mov w8, #-128 // =0xffffff80
+; CHECK-GI-NEXT: str w8, [x9, #28]
+; CHECK-GI-NEXT: ldrsw x8, [sp, #44]
+; CHECK-GI-NEXT: str x8, [sp, #8] // 8-byte Folded Spill
+; CHECK-GI-NEXT: tbz w8, #31, .LBB4_3
+; CHECK-GI-NEXT: b .LBB4_1
+; CHECK-GI-NEXT: .LBB4_1: // %vaarg.maybe_reg
+; CHECK-GI-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT: add w9, w8, #16
+; CHECK-GI-NEXT: str w9, [sp, #44]
+; CHECK-GI-NEXT: adds w8, w8, #15
+; CHECK-GI-NEXT: b.hs .LBB4_3
+; CHECK-GI-NEXT: b .LBB4_2
+; CHECK-GI-NEXT: .LBB4_2: // %vaarg.in_reg
+; CHECK-GI-NEXT: ldr x9, [sp, #8] // 8-byte Folded Reload
+; CHECK-GI-NEXT: ldr x8, [sp, #32]
+; CHECK-GI-NEXT: add x8, x8, x9
+; CHECK-GI-NEXT: str x8, [sp] // 8-byte Folded Spill
+; CHECK-GI-NEXT: b .LBB4_4
+; CHECK-GI-NEXT: .LBB4_3: // %vaarg.on_stack
+; CHECK-GI-NEXT: ldr x8, [sp, #16]
+; CHECK-GI-NEXT: mov x9, x8
+; CHECK-GI-NEXT: add x9, x9, #8
+; CHECK-GI-NEXT: str x9, [sp, #16]
+; CHECK-GI-NEXT: str x8, [sp] // 8-byte Folded Spill
+; CHECK-GI-NEXT: b .LBB4_4
+; CHECK-GI-NEXT: .LBB4_4: // %vaarg.end
+; CHECK-GI-NEXT: ldr x8, [sp] // 8-byte Folded Reload
+; CHECK-GI-NEXT: ldr d0, [x8]
+; CHECK-GI-NEXT: fcvt s0, d0
+; CHECK-GI-NEXT: .cfi_def_cfa wsp, 256
+; CHECK-GI-NEXT: ldp x29, x30, [sp, #240] // 16-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #256
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 0
+; CHECK-GI-NEXT: .cfi_restore w30
+; CHECK-GI-NEXT: .cfi_restore w29
+; CHECK-GI-NEXT: ret
+entry:
+ %ap = alloca %struct.__va_list, align 8
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %ap) #3
+ call void @llvm.va_start.p0(ptr nonnull %ap)
+ %vr_offs_p = getelementptr inbounds i8, ptr %ap, i64 28
+ %vr_offs = load i32, ptr %vr_offs_p, align 4
+ %0 = icmp sgt i32 %vr_offs, -1
+ br i1 %0, label %vaarg.on_stack, label %vaarg.maybe_reg
+
+vaarg.maybe_reg: ; preds = %entry
+ %new_reg_offs = add nsw i32 %vr_offs, 16
+ store i32 %new_reg_offs, ptr %vr_offs_p, align 4
+ %inreg = icmp ult i32 %vr_offs, -15
+ br i1 %inreg, label %vaarg.in_reg, label %vaarg.on_stack
+
+vaarg.in_reg: ; preds = %vaarg.maybe_reg
+ %reg_top_p = getelementptr inbounds i8, ptr %ap, i64 16
+ %reg_top = load ptr, ptr %reg_top_p, align 8
+ %1 = sext i32 %vr_offs to i64
+ %2 = getelementptr inbounds i8, ptr %reg_top, i64 %1
+ br label %vaarg.end
+
+vaarg.on_stack: ; preds = %vaarg.maybe_reg, %entry
+ %stack = load ptr, ptr %ap, align 8
+ %new_stack = getelementptr inbounds i8, ptr %stack, i64 8
+ store ptr %new_stack, ptr %ap, align 8
+ br label %vaarg.end
+
+vaarg.end: ; preds = %vaarg.on_stack, %vaarg.in_reg
+ %vaargs.addr = phi ptr [ %2, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ]
+ %3 = load double, ptr %vaargs.addr, align 8
+ %conv = fptrunc double %3 to float
+ call void @llvm.va_end.p0(ptr nonnull %ap)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %ap)
+ ret float %conv
+}
+
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
>From e121c29d421ed0a06c03ee4b699e926a8c51cf6a Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Wed, 4 Sep 2024 10:14:40 +0100
Subject: [PATCH 2/3] Use STI from AArch64InstructionSelector
---
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 29d1392b51090c..af40d7f6f8da0b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -1995,7 +1995,7 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
bool AArch64InstructionSelector::selectVaStartAAPCS(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
- if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
+ if (STI.isCallingConvWin64(
MF.getFunction().getCallingConv(), MF.getFunction().isVarArg()))
return false;
>From 0910b49d02d6e0b833959a3b01a8b6dade72265b Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Thu, 5 Sep 2024 16:20:18 +0100
Subject: [PATCH 3/3] Use commonAlignment
---
.../lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index af40d7f6f8da0b..bfed006a11a687 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2045,7 +2045,8 @@ bool AArch64InstructionSelector::selectVaStartAAPCS(
(*I.memoperands_begin())
->getPointerInfo()
.getWithOffset(OffsetBytes),
- MachineMemOperand::MOStore, PtrSize, Align(PtrSize)));
+ MachineMemOperand::MOStore, PtrSize,
+ commonAlignment(Align(PtrSize), OffsetBytes)));
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
OffsetBytes += PtrSize;
@@ -2081,7 +2082,8 @@ bool AArch64InstructionSelector::selectVaStartAAPCS(
(*I.memoperands_begin())
->getPointerInfo()
.getWithOffset(OffsetBytes),
- MachineMemOperand::MOStore, IntSize, Align(IntSize)));
+ MachineMemOperand::MOStore, IntSize,
+ commonAlignment(Align(IntSize), OffsetBytes)));
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
OffsetBytes += IntSize;
};
More information about the llvm-commits
mailing list