[llvm] [AArch64][GlobalISel] Implement selectVaStartAAPCS (PR #106979)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 2 05:29:39 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Him188 (Him188)
<details>
<summary>Changes</summary>
This commit adds the missing support for varargs in the instruction selection pass for AAPCS. Previously we only implemented this for Darwin.
The implementation was according to AAPCS and SelectionDAG's LowerAAPCS_VASTART.
It resolves all VA_START fallbacks in RAJAperf, llvm-test-suite, and SPEC CPU2017. These benchmarks now compile and pass without fallbacks due to varargs.
---
Patch is 42.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106979.diff
3 Files Affected:
- (modified) llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp (+102-1)
- (added) llvm/test/CodeGen/AArch64/GlobalISel/vararg.mir (+325)
- (added) llvm/test/CodeGen/AArch64/vararg.ll (+569)
``````````diff
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index e9e6b6cb68d0d1..29d1392b51090c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -1994,7 +1994,108 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
bool AArch64InstructionSelector::selectVaStartAAPCS(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
- return false;
+
+ if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
+ MF.getFunction().getCallingConv(), MF.getFunction().isVarArg()))
+ return false;
+
+ // The layout of the va_list struct is specified in the AArch64 Procedure Call
+ // Standard, section 10.1.5.
+
+ const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
+ const auto *PtrRegClass =
+ STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
+
+ const MCInstrDesc &MCIDAddAddr =
+ TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
+ const MCInstrDesc &MCIDStoreAddr =
+ TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
+
+ /*
+ * typedef struct va_list {
+ * void * stack; // next stack param
+ * void * gr_top; // end of GP arg reg save area
+ * void * vr_top; // end of FP/SIMD arg reg save area
+ * int gr_offs; // offset from gr_top to next GP register arg
+ * int vr_offs; // offset from vr_top to next FP/SIMD register arg
+ * } va_list;
+ */
+ const auto VAList = I.getOperand(0).getReg();
+
+ // Our current offset in bytes from the va_list struct (VAList).
+ unsigned OffsetBytes = 0;
+
+ // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
+ // and increment OffsetBytes by PtrSize.
+ const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
+ const Register Top = MRI.createVirtualRegister(PtrRegClass);
+ auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
+ .addDef(Top)
+ .addFrameIndex(FrameIndex)
+ .addImm(Imm)
+ .addImm(0);
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+
+ MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
+ .addUse(Top)
+ .addUse(VAList)
+ .addImm(OffsetBytes / PtrSize)
+ .addMemOperand(MF.getMachineMemOperand(
+ (*I.memoperands_begin())
+ ->getPointerInfo()
+ .getWithOffset(OffsetBytes),
+ MachineMemOperand::MOStore, PtrSize, Align(PtrSize)));
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+
+ OffsetBytes += PtrSize;
+ };
+
+ // void* stack at offset 0
+ PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
+
+ // void* gr_top at offset 8 (4 on ILP32)
+ const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
+ PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
+
+ // void* vr_top at offset 16 (8 on ILP32)
+ const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
+ PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
+
+ // Helper function to store a 4-byte integer constant to VAList at offset
+ // OffsetBytes, and increment OffsetBytes by 4.
+ const auto PushIntConstant = [&](const int32_t Value) {
+ constexpr int IntSize = 4;
+ const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ auto MIB =
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
+ .addDef(Temp)
+ .addImm(Value);
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+
+ MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
+ .addUse(Temp)
+ .addUse(VAList)
+ .addImm(OffsetBytes / IntSize)
+ .addMemOperand(MF.getMachineMemOperand(
+ (*I.memoperands_begin())
+ ->getPointerInfo()
+ .getWithOffset(OffsetBytes),
+ MachineMemOperand::MOStore, IntSize, Align(IntSize)));
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+ OffsetBytes += IntSize;
+ };
+
+ // int gr_offs at offset 24 (12 on ILP32)
+ PushIntConstant(-static_cast<int32_t>(GPRSize));
+
+ // int vr_offs at offset 28 (16 on ILP32)
+ PushIntConstant(-static_cast<int32_t>(FPRSize));
+
+ assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
+
+ I.eraseFromParent();
+ return true;
}
bool AArch64InstructionSelector::selectVaStartDarwin(
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/vararg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/vararg.mir
new file mode 100644
index 00000000000000..50911582eef0bc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/vararg.mir
@@ -0,0 +1,325 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=aarch64-unknown-linux -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=CHECK
+
+--- |
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+ target triple = "aarch64-unknown-linux-gnu"
+
+ %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
+ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #0
+
+ declare void @llvm.va_start.p0(ptr) #1
+
+ declare void @llvm.va_end.p0(ptr) #1
+
+ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #0
+
+ define float @vararg(ptr %a, ...) #2 {
+ entry:
+ %ap = alloca %struct.__va_list, align 8
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %ap)
+ call void @llvm.va_start.p0(ptr nonnull %ap)
+ %vr_offs_p = getelementptr inbounds i8, ptr %ap, i64 28
+ %vr_offs = load i32, ptr %vr_offs_p, align 4
+ %0 = sext i32 %vr_offs to i64
+ %1 = icmp sgt i32 %vr_offs, -1
+ br i1 %1, label %vaarg.on_stack, label %vaarg.maybe_reg
+
+ vaarg.maybe_reg: ; preds = %entry
+ %2 = trunc i64 %0 to i32
+ %3 = trunc i64 %0 to i32
+ %new_reg_offs = add nsw i32 %2, 16
+ %sunkaddr = getelementptr inbounds i8, ptr %ap, i64 28
+ store i32 %new_reg_offs, ptr %sunkaddr, align 4
+ %inreg = icmp ult i32 %3, -15
+ br i1 %inreg, label %vaarg.in_reg, label %vaarg.on_stack
+
+ vaarg.in_reg: ; preds = %vaarg.maybe_reg
+ %reg_top_p = getelementptr inbounds i8, ptr %ap, i64 16
+ %reg_top = load ptr, ptr %reg_top_p, align 8
+ %4 = getelementptr inbounds i8, ptr %reg_top, i64 %0
+ br label %vaarg.end
+
+ vaarg.on_stack: ; preds = %vaarg.maybe_reg, %entry
+ %stack = load ptr, ptr %ap, align 8
+ %new_stack = getelementptr inbounds i8, ptr %stack, i64 8
+ store ptr %new_stack, ptr %ap, align 8
+ br label %vaarg.end
+
+ vaarg.end: ; preds = %vaarg.on_stack, %vaarg.in_reg
+ %vaargs.addr = phi ptr [ %4, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ]
+ %5 = load double, ptr %vaargs.addr, align 8
+ %conv = fptrunc double %5 to float
+ call void @llvm.va_end.p0(ptr nonnull %ap)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %ap)
+ ret float %conv
+ }
+
+ attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) "target-cpu"="neoverse-v2" }
+ attributes #1 = { nocallback nofree nosync nounwind willreturn "target-cpu"="neoverse-v2" }
+ attributes #2 = { uwtable "frame-pointer"="all" "target-cpu"="neoverse-v2" }
+
+...
+---
+name: vararg
+alignment: 16
+exposesReturnsTwice: false
+legalized: true
+regBankSelected: true
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHCatchret: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+liveins:
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 16
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack:
+ - { id: 0, type: default, offset: 0, size: 4, alignment: 16, stack-id: default,
+ isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+stack:
+ - { id: 0, name: '', type: default, offset: 0, size: 56, alignment: 8,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: default, offset: 0, size: 128, alignment: 16,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 2, name: ap, type: default, offset: 0, size: 32, alignment: 8,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: vararg
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x50000000), %bb.1(0x30000000)
+ ; CHECK-NEXT: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: STRXui [[COPY]], %stack.0, 0 :: (store (s64) into stack + 8, align 1)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: STRXui [[COPY1]], %stack.0, 1 :: (store (s64) into stack + 16, align 1)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x3
+ ; CHECK-NEXT: STRXui [[COPY2]], %stack.0, 2 :: (store (s64) into stack + 24, align 1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x4
+ ; CHECK-NEXT: STRXui [[COPY3]], %stack.0, 3 :: (store (s64) into stack + 32, align 1)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY $x5
+ ; CHECK-NEXT: STRXui [[COPY4]], %stack.0, 4 :: (store (s64) into stack + 40, align 1)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64 = COPY $x6
+ ; CHECK-NEXT: STRXui [[COPY5]], %stack.0, 5 :: (store (s64) into stack + 48, align 1)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr64 = COPY $x7
+ ; CHECK-NEXT: STRXui [[COPY6]], %stack.0, 6 :: (store (s64) into stack + 56, align 1)
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-NEXT: STRQui [[COPY7]], %stack.1, 0 :: (store (s128) into stack, align 1)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:fpr128 = COPY $q1
+ ; CHECK-NEXT: STRQui [[COPY8]], %stack.1, 1 :: (store (s128) into stack + 16, align 1)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:fpr128 = COPY $q2
+ ; CHECK-NEXT: STRQui [[COPY9]], %stack.1, 2 :: (store (s128) into stack + 32, align 1)
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:fpr128 = COPY $q3
+ ; CHECK-NEXT: STRQui [[COPY10]], %stack.1, 3 :: (store (s128) into stack + 48, align 1)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:fpr128 = COPY $q4
+ ; CHECK-NEXT: STRQui [[COPY11]], %stack.1, 4 :: (store (s128) into stack + 64, align 1)
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:fpr128 = COPY $q5
+ ; CHECK-NEXT: STRQui [[COPY12]], %stack.1, 5 :: (store (s128) into stack + 80, align 1)
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:fpr128 = COPY $q6
+ ; CHECK-NEXT: STRQui [[COPY13]], %stack.1, 6 :: (store (s128) into stack + 96, align 1)
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:fpr128 = COPY $q7
+ ; CHECK-NEXT: STRQui [[COPY14]], %stack.1, 7 :: (store (s128) into stack + 112, align 1)
+ ; CHECK-NEXT: LIFETIME_START %stack.2.ap
+ ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.2.ap, 0, 0
+ ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
+ ; CHECK-NEXT: STRXui [[ADDXri1]], [[ADDXri]], 0 :: (store (s64) into %ir.ap)
+ ; CHECK-NEXT: [[ADDXri2:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
+ ; CHECK-NEXT: STRXui [[ADDXri2]], [[ADDXri]], 1 :: (store (s64) into %ir.ap + 8)
+ ; CHECK-NEXT: [[ADDXri3:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
+ ; CHECK-NEXT: STRXui [[ADDXri3]], [[ADDXri]], 2 :: (store (s64) into %ir.ap + 16)
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0
+ ; CHECK-NEXT: STRWui [[MOVi32imm]], [[ADDXri]], 6 :: (store (s32) into %ir.ap + 24)
+ ; CHECK-NEXT: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 0
+ ; CHECK-NEXT: STRWui [[MOVi32imm1]], [[ADDXri]], 7 :: (store (s32) into %ir.ap + 28)
+ ; CHECK-NEXT: [[LDRSWui:%[0-9]+]]:gpr64common = LDRSWui %stack.2.ap, 7 :: (dereferenceable load (s32) from %ir.vr_offs_p)
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:gpr32common = COPY [[LDRSWui]].sub_32
+ ; CHECK-NEXT: TBZW [[COPY15]], 31, %bb.3
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.vaarg.maybe_reg:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:gpr32sp = COPY [[LDRSWui]].sub_32
+ ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = nsw ADDWri [[COPY16]], 16, 0
+ ; CHECK-NEXT: STRWui [[ADDWri]], %stack.2.ap, 7 :: (store (s32) into %ir.sunkaddr)
+ ; CHECK-NEXT: [[ADDSWri:%[0-9]+]]:gpr32 = ADDSWri [[COPY16]], 15, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 2, %bb.3, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.vaarg.in_reg:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui %stack.2.ap, 2 :: (dereferenceable load (p0) from %ir.reg_top_p)
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[LDRSWui]]
+ ; CHECK-NEXT: B %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.vaarg.on_stack:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui %stack.2.ap, 0 :: (dereferenceable load (p0) from %ir.ap)
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:gpr64common = COPY [[LDRXui1]]
+ ; CHECK-NEXT: [[ADDXri4:%[0-9]+]]:gpr64sp = ADDXri [[COPY17]], 8, 0
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:gpr64 = COPY [[ADDXri4]]
+ ; CHECK-NEXT: STRXui [[COPY18]], %stack.2.ap, 0 :: (store (p0) into %ir.ap)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.vaarg.end:
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr64sp = PHI [[ADDXrr]], %bb.2, [[LDRXui1]], %bb.3
+ ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[PHI]], 0 :: (load (s64) from %ir.vaargs.addr)
+ ; CHECK-NEXT: [[FCVTSDr:%[0-9]+]]:fpr32 = nofpexcept FCVTSDr [[LDRDui]], implicit $fpcr
+ ; CHECK-NEXT: LIFETIME_END %stack.2.ap
+ ; CHECK-NEXT: $s0 = COPY [[FCVTSDr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
+ bb.1.entry:
+ successors: %bb.4(0x50000000), %bb.2(0x30000000)
+ liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
+
+ %3:gpr(s64) = COPY $x1
+ %1:gpr(p0) = G_FRAME_INDEX %stack.0
+ G_STORE %3(s64), %1(p0) :: (store (s64) into stack + 8, align 1)
+ %5:gpr(s64) = COPY $x2
+ %2:gpr(s64) = G_CONSTANT i64 8
+ %4:gpr(p0) = G_PTR_ADD %1, %2(s64)
+ G_STORE %5(s64), %4(p0) :: (store (s64) into stack + 16, align 1)
+ %7:gpr(s64) = COPY $x3
+ %18:gpr(s64) = G_CONSTANT i64 16
+ %6:gpr(p0) = G_PTR_ADD %1, %18(s64)
+ G_STORE %7(s64), %6(p0) :: (store (s64) into stack + 24, align 1)
+ %9:gpr(s64) = COPY $x4
+ %61:gpr(s64) = G_CONSTANT i64 24
+ %8:gpr(p0) = G_PTR_ADD %1, %61(s64)
+ G_STORE %9(s64), %8(p0) :: (store (s64) into stack + 32, align 1)
+ %11:gpr(s64) = COPY $x5
+ %62:gpr(s64) = G_CONSTANT i64 32
+ %10:gpr(p0) = G_PTR_ADD %1, %62(s64)
+ G_STORE %11(s64), %10(p0) :: (store (s64) into stack + 40, align 1)
+ %13:gpr(s64) = COPY $x6
+ %63:gpr(s64) = G_CONSTANT i64 40
+ %12:gpr(p0) = G_PTR_ADD %1, %63(s64)
+ G_STORE %13(s64), %12(p0) :: (store (s64) into stack + 48, align 1)
+ %15:gpr(s64) = COPY $x7
+ %64:gpr(s64) = G_CONSTANT i64 48
+ %14:gpr(p0) = G_PTR_ADD %1, %64(s64)
+ G_STORE %15(s64), %14(p0) :: (store (s64) into stack + 56, align 1)
+ %19:fpr(s128) = COPY $q0
+ %17:gpr(p0) = G_FRAME_INDEX %stack.1
+ G_STORE %19(s128), %17(p0) :: (store (s128) into stack, align 1)
+ %21:fpr(s128) = COPY $q1
+ %20:gpr(p0) = G_PTR_ADD %17, %18(s64)
+ G_STORE %21(s128), %20(p0) :: (store (s128) into stack + 16, align 1)
+ %23:fpr(s128) = COPY $q2
+ %22:gpr(p0) = G_PTR_ADD %17, %62(s64)
+ G_STORE %23(s128), %22(p0) :: (store (s128) into stack + 32, align 1)
+ %25:fpr(s128) = COPY $q3
+ %24:gpr(p0) = G_PTR_ADD %17, %64(s64)
+ G_STORE %25(s128), %24(p0) :: (store (s128) into stack + 48, align 1)
+ %27:fpr(s128) = COPY $q4
+ %65:gpr(s64) = G_CONSTANT i64 64
+ %26:gpr(p0) = G_PTR_ADD %17, %65(s64)
+ G_STORE %27(s128), %26(p0) :: (store (s128) into stack + 64, align 1)
+ %29:fpr(s128) = COPY $q5
+ %66:gpr(s64) = G_CONSTANT i64 80
+ %28:gpr(p0) = G_PTR_ADD %17, %66(s64)
+ G_STORE %29(s128), %28(p0) :: (store (s128) into stack + 80, align 1)
+ %31:fpr(s128) = COPY $q6
+ %67:gpr(s64) = G_CONSTANT i64 96
+ %30:gpr(p0) = G_PTR_ADD %17, %67(s64)
+ G_STORE %31(s128), %30(p0) :: (store (s128) into stack + 96, align 1)
+ %33:fpr(s128) = COPY $q7
+ %68:gpr(s64) = G_CONSTANT i64 112
+ %32:gpr(p0) = G_PTR_ADD %17, %68(s64)
+ G_STORE %33(s128), %32(p0) :: (store (s128) into stack + 112, align 1)
+ LIFETIME_START %stack.2.ap
+ %35:gpr(p0) = G_FRAME_INDEX %stack.2.ap
+ G_VASTART %35(p0) :: (store (s256) into %ir.ap, align 8)
+ %36:gpr(s64) = G_CONSTANT i64 28
+ %37:gpr(p0) = G_PTR_ADD %35, %36(s64)
+ %39:gpr(s64) = G_SEXTLOAD %37(p0) :: (dereferenceable load (s32) from %ir.vr_offs_p)
+ %69:gpr(s32) = G_TRUNC %39(s64)
+ %81:gpr(s32) = G_CONSTANT i32 0
+ %80:gpr(s32) = G_ICMP intpred(sge), %69(s32), %81
+ G_BRCOND %80(s32), %bb.4
+ G_BR %bb.2
+
+ bb.2.vaarg.maybe_reg:
+ successors: %bb.3(0x40000000), %bb.4(0x40000000)
+
+ %42:gpr(s32) = G_TRUNC %39(s64)
+ %76:gpr(s32) = G_CONSTANT i32 16
+ %45:gpr(s32) = nsw G_ADD %42, %76
+ %46:gpr(s64) = G_CONSTANT i64 28
+ %72:gpr(p0) = G_FRAME_INDEX %stack.2.ap
+ %47:gpr(p0) = G_PTR_ADD %72, %46(s64)
+ G_STORE %45(s32), %47(p0) :: (store (s32) into %ir.sunkaddr)
+ %75:gpr(s32) = G_CONSTANT i32 -15
+ %78:gpr(s32) = G_ICMP intpred(uge), %42(s32), %75
+ G_BRCOND %78(s32), %bb.4
+ G_BR %bb.3
+
+ bb.3.vaarg.in_reg:
+ successors: %bb.5(0x80000000)
+
+ %50:gpr(s64) = G_CONSTANT i64 16
+ %73:gpr(p0) = G_FRAME_INDEX %stack.2.ap
+ %51:gpr(p0) = G_PTR_ADD %73, %50(s64)
+ %52:gpr(p0) = G_LOAD %51(p0) :: (dereferenceable load (p0) from %ir.reg_top_p)
+ %53:gpr(p0) = G_PTR_ADD %52, %39(s64)
+ G_BR %bb.5
+
+ bb.4.vaarg.on_stack:
+ successors: %bb.5(0x80000000)
+
+ %74:gpr(p0) = G_FRAME_INDEX %stack.2.ap
+ %55:gpr(p0) = G_LOAD %74(p0) :: (dereferenceable load (p0) from %ir.ap)
+ %56:gpr(s64) = G_CONSTANT i64 8
+ %57:gpr(p0) = G_PTR_ADD %55, %56(s64)
+ G_STORE %57(p0), %74(p0) :: (store (p0) into %ir.ap)
+
+ bb.5.vaarg.end:
+ %58:gpr(p0) = G_PHI %53(p0), %bb.3, %55(p0), %bb.4
+ %59:fpr(s64) = G_LOAD %58(p0) :: (load (s64) from %ir.vaargs.addr)
+ %60:fpr(s32) = G_FPTRUNC %59(s64)
+ LIFETIME_END %stack.2.ap
+ $s0 = COPY %60(s32)
+ RET_ReallyLR implicit $s0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/vararg.ll b/llvm/test/CodeGen/AArch64/vararg.ll
new file mode 100644
index 00000000000000..fa9cdf4fc1bcaa
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/vararg.ll
@@ -0,0 +1,569 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -O0 -gl...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/106979
More information about the llvm-commits
mailing list