[llvm] 77af9d1 - [AArch64][GlobalISel] Implement selectVaStartAAPCS (#106979)

via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 18 23:18:18 PDT 2024


Author: Him188
Date: 2024-09-19T11:48:14+05:30
New Revision: 77af9d10237fef194eb275f33a11daea88e304a4

URL: https://github.com/llvm/llvm-project/commit/77af9d10237fef194eb275f33a11daea88e304a4
DIFF: https://github.com/llvm/llvm-project/commit/77af9d10237fef194eb275f33a11daea88e304a4.diff

LOG: [AArch64][GlobalISel] Implement selectVaStartAAPCS (#106979)

This commit adds the missing support for varargs in the instruction
selection pass for AAPCS. Previously we only implemented this for
Darwin.

The implementation was according to AAPCS and SelectionDAG's
LowerAAPCS_VASTART.

It resolves all VA_START fallbacks in RAJAperf, llvm-test-suite, and
SPEC CPU2017. These benchmarks now compile and pass without fallbacks
due to varargs.

---------

Co-authored-by: Madhur Amilkanthwar <madhura at nvidia.com>

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/vararg.mir
    llvm/test/CodeGen/AArch64/vararg.ll

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 18361cf3685642..df0c09d32c074a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -1994,7 +1994,106 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
 
 bool AArch64InstructionSelector::selectVaStartAAPCS(
     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
-  return false;
+
+  if (STI.isCallingConvWin64(MF.getFunction().getCallingConv(),
+                             MF.getFunction().isVarArg()))
+    return false;
+
+  // The layout of the va_list struct is specified in the AArch64 Procedure Call
+  // Standard, section 10.1.5.
+
+  const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+  const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
+  const auto *PtrRegClass =
+      STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
+
+  const MCInstrDesc &MCIDAddAddr =
+      TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
+  const MCInstrDesc &MCIDStoreAddr =
+      TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
+
+  /*
+   * typedef struct va_list {
+   *  void * stack; // next stack param
+   *  void * gr_top; // end of GP arg reg save area
+   *  void * vr_top; // end of FP/SIMD arg reg save area
+   *  int gr_offs; // offset from gr_top to next GP register arg
+   *  int vr_offs; // offset from vr_top to next FP/SIMD register arg
+   * } va_list;
+   */
+  const auto VAList = I.getOperand(0).getReg();
+
+  // Our current offset in bytes from the va_list struct (VAList).
+  unsigned OffsetBytes = 0;
+
+  // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
+  // and increment OffsetBytes by PtrSize.
+  const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
+    const Register Top = MRI.createVirtualRegister(PtrRegClass);
+    auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
+                   .addDef(Top)
+                   .addFrameIndex(FrameIndex)
+                   .addImm(Imm)
+                   .addImm(0);
+    constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+
+    const auto *MMO = *I.memoperands_begin();
+    MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
+              .addUse(Top)
+              .addUse(VAList)
+              .addImm(OffsetBytes / PtrSize)
+              .addMemOperand(MF.getMachineMemOperand(
+                  MMO->getPointerInfo().getWithOffset(OffsetBytes),
+                  MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));
+    constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+
+    OffsetBytes += PtrSize;
+  };
+
+  // void* stack at offset 0
+  PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
+
+  // void* gr_top at offset 8 (4 on ILP32)
+  const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
+  PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
+
+  // void* vr_top at offset 16 (8 on ILP32)
+  const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
+  PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
+
+  // Helper function to store a 4-byte integer constant to VAList at offset
+  // OffsetBytes, and increment OffsetBytes by 4.
+  const auto PushIntConstant = [&](const int32_t Value) {
+    constexpr int IntSize = 4;
+    const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+    auto MIB =
+        BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
+            .addDef(Temp)
+            .addImm(Value);
+    constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+
+    const auto *MMO = *I.memoperands_begin();
+    MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
+              .addUse(Temp)
+              .addUse(VAList)
+              .addImm(OffsetBytes / IntSize)
+              .addMemOperand(MF.getMachineMemOperand(
+                  MMO->getPointerInfo().getWithOffset(OffsetBytes),
+                  MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));
+    constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+    OffsetBytes += IntSize;
+  };
+
+  // int gr_offs at offset 24 (12 on ILP32)
+  PushIntConstant(-static_cast<int32_t>(GPRSize));
+
+  // int vr_offs at offset 28 (16 on ILP32)
+  PushIntConstant(-static_cast<int32_t>(FPRSize));
+
+  assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
+
+  I.eraseFromParent();
+  return true;
 }
 
 bool AArch64InstructionSelector::selectVaStartDarwin(

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/vararg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/vararg.mir
new file mode 100644
index 00000000000000..437a9e6cb89ac3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/vararg.mir
@@ -0,0 +1,56 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=aarch64-unknown-linux -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=CHECK
+
+--- |
+  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+  target triple = "aarch64-unknown-linux"
+
+  %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
+  define i32 @va_start(ptr %a, ...) {
+  entry:
+    %ap = alloca %struct.__va_list, align 8
+    call void @llvm.lifetime.start.p0(i64 32, ptr %ap)
+    call void @llvm.va_start.p0(ptr %ap)
+    %vr_offs_p = getelementptr inbounds i8, ptr %ap, i64 28
+    %vr_offs = load i32, ptr %vr_offs_p, align 4
+    ret i32 %vr_offs
+  }
+...
+---
+name:            va_start
+alignment:       16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+fixedStack:
+  - { id: 0, size: 4, alignment: 16 }
+stack:
+  - { id: 0, size: 56, alignment: 8 }
+  - { id: 1, size: 128, alignment: 16 }
+  - { id: 2, name: ap, size: 32, alignment: 8 }
+body: |
+  bb.0.entry:
+    ; CHECK-LABEL: name: va_start
+    ; CHECK: LIFETIME_START %stack.2.ap
+    ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.2.ap, 0, 0
+    ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
+    ; CHECK-NEXT: STRXui [[ADDXri1]], [[ADDXri]], 0 :: (store (s64) into %ir.ap)
+    ; CHECK-NEXT: [[ADDXri2:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
+    ; CHECK-NEXT: STRXui [[ADDXri2]], [[ADDXri]], 1 :: (store (s64) into %ir.ap + 8)
+    ; CHECK-NEXT: [[ADDXri3:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
+    ; CHECK-NEXT: STRXui [[ADDXri3]], [[ADDXri]], 2 :: (store (s64) into %ir.ap + 16)
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0
+    ; CHECK-NEXT: STRWui [[MOVi32imm]], [[ADDXri]], 6 :: (store (s32) into %ir.ap + 24, align 8)
+    ; CHECK-NEXT: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 0
+    ; CHECK-NEXT: STRWui [[MOVi32imm1]], [[ADDXri]], 7 :: (store (s32) into %ir.ap + 28, basealign 8)
+    ; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui %stack.2.ap, 7 :: (dereferenceable load (s32) from %ir.vr_offs_p)
+    ; CHECK-NEXT: $w0 = COPY [[LDRWui]]
+    LIFETIME_START %stack.2.ap
+    %0:gpr(p0) = G_FRAME_INDEX %stack.2.ap
+    G_VASTART %0(p0) :: (store (s256) into %ir.ap, align 8)
+    %1:gpr(s64) = G_CONSTANT i64 28
+    %2:gpr(p0) = G_PTR_ADD %0, %1(s64)
+    %3:gpr(s32) = G_LOAD %2(p0) :: (dereferenceable load (s32) from %ir.vr_offs_p)
+    $w0 = COPY %3(s32)
+...

diff  --git a/llvm/test/CodeGen/AArch64/vararg.ll b/llvm/test/CodeGen/AArch64/vararg.ll
new file mode 100644
index 00000000000000..291eee2ddf706d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/vararg.ll
@@ -0,0 +1,384 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -O0 -global-isel=0 -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -O0 -global-isel=1 -global-isel-abort=1 -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+
+%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
+declare void @llvm.va_start(ptr) nounwind
+declare void @llvm.va_end(ptr) nounwind
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+declare void @llvm.va_start.p0(ptr)
+declare void @llvm.va_end.p0(ptr)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+define i64 @vararg(...) #0 {
+; CHECK-SD-LABEL: vararg:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #224
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 224
+; CHECK-SD-NEXT:    stp x29, x30, [sp, #208] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    add x29, sp, #208
+; CHECK-SD-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-SD-NEXT:    .cfi_offset w30, -8
+; CHECK-SD-NEXT:    .cfi_offset w29, -16
+; CHECK-SD-NEXT:    str q7, [sp, #112]
+; CHECK-SD-NEXT:    str q6, [sp, #96]
+; CHECK-SD-NEXT:    str q5, [sp, #80]
+; CHECK-SD-NEXT:    str q4, [sp, #64]
+; CHECK-SD-NEXT:    str q3, [sp, #48]
+; CHECK-SD-NEXT:    str q2, [sp, #32]
+; CHECK-SD-NEXT:    str q1, [sp, #16]
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    stur x7, [x29, #-16]
+; CHECK-SD-NEXT:    stur x6, [x29, #-24]
+; CHECK-SD-NEXT:    stur x5, [x29, #-32]
+; CHECK-SD-NEXT:    stur x4, [x29, #-40]
+; CHECK-SD-NEXT:    stur x3, [x29, #-48]
+; CHECK-SD-NEXT:    stur x2, [x29, #-56]
+; CHECK-SD-NEXT:    stur x1, [x29, #-64]
+; CHECK-SD-NEXT:    stur x0, [x29, #-72]
+; CHECK-SD-NEXT:    mov w8, #-128 // =0xffffff80
+; CHECK-SD-NEXT:    str w8, [x29, #20]
+; CHECK-SD-NEXT:    mov w8, #-64 // =0xffffffc0
+; CHECK-SD-NEXT:    str w8, [x29, #16]
+; CHECK-SD-NEXT:    add x8, x29, #16
+; CHECK-SD-NEXT:    stur x8, [x29, #-8]
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    add x8, x8, #128
+; CHECK-SD-NEXT:    str x8, [x29, #8]
+; CHECK-SD-NEXT:    sub x8, x29, #72
+; CHECK-SD-NEXT:    add x8, x8, #64
+; CHECK-SD-NEXT:    str x8, [x29]
+; CHECK-SD-NEXT:    mov w8, #1 // =0x1
+; CHECK-SD-NEXT:    mov w0, w8
+; CHECK-SD-NEXT:    .cfi_def_cfa wsp, 224
+; CHECK-SD-NEXT:    ldp x29, x30, [sp, #208] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    add sp, sp, #224
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SD-NEXT:    .cfi_restore w30
+; CHECK-SD-NEXT:    .cfi_restore w29
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vararg:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #224
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 224
+; CHECK-GI-NEXT:    stp x29, x30, [sp, #208] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    add x29, sp, #208
+; CHECK-GI-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -8
+; CHECK-GI-NEXT:    .cfi_offset w29, -16
+; CHECK-GI-NEXT:    stur x0, [x29, #-64]
+; CHECK-GI-NEXT:    stur x1, [x29, #-56]
+; CHECK-GI-NEXT:    stur x2, [x29, #-48]
+; CHECK-GI-NEXT:    stur x3, [x29, #-40]
+; CHECK-GI-NEXT:    stur x4, [x29, #-32]
+; CHECK-GI-NEXT:    stur x5, [x29, #-24]
+; CHECK-GI-NEXT:    stur x6, [x29, #-16]
+; CHECK-GI-NEXT:    stur x7, [x29, #-8]
+; CHECK-GI-NEXT:    str q0, [sp, #16]
+; CHECK-GI-NEXT:    str q1, [sp, #32]
+; CHECK-GI-NEXT:    str q2, [sp, #48]
+; CHECK-GI-NEXT:    str q3, [sp, #64]
+; CHECK-GI-NEXT:    str q4, [sp, #80]
+; CHECK-GI-NEXT:    str q5, [sp, #96]
+; CHECK-GI-NEXT:    str q6, [sp, #112]
+; CHECK-GI-NEXT:    str q7, [sp, #128]
+; CHECK-GI-NEXT:    add x9, sp, #8
+; CHECK-GI-NEXT:    add x8, x29, #16
+; CHECK-GI-NEXT:    str x8, [x9]
+; CHECK-GI-NEXT:    add x8, x29, #0
+; CHECK-GI-NEXT:    str x8, [x9, #8]
+; CHECK-GI-NEXT:    add x8, sp, #144
+; CHECK-GI-NEXT:    str x8, [x9, #16]
+; CHECK-GI-NEXT:    mov w8, #-64 // =0xffffffc0
+; CHECK-GI-NEXT:    str w8, [x9, #24]
+; CHECK-GI-NEXT:    mov w8, #-128 // =0xffffff80
+; CHECK-GI-NEXT:    str w8, [x9, #28]
+; CHECK-GI-NEXT:    mov w8, #1 // =0x1
+; CHECK-GI-NEXT:    mov w0, w8
+; CHECK-GI-NEXT:    .cfi_def_cfa wsp, 224
+; CHECK-GI-NEXT:    ldp x29, x30, [sp, #208] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    add sp, sp, #224
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-GI-NEXT:    .cfi_restore w30
+; CHECK-GI-NEXT:    .cfi_restore w29
+; CHECK-GI-NEXT:    ret
+entry:
+  %g = alloca ptr, align 4
+  call void @llvm.va_start(ptr %g)
+  ret i64 1
+}
+
+define i64 @vararg_many_gpr(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, ...) #0 {
+; CHECK-SD-LABEL: vararg_many_gpr:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #160
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 160
+; CHECK-SD-NEXT:    stp x29, x30, [sp, #144] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    add x29, sp, #144
+; CHECK-SD-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-SD-NEXT:    .cfi_offset w30, -8
+; CHECK-SD-NEXT:    .cfi_offset w29, -16
+; CHECK-SD-NEXT:    str q7, [sp, #112]
+; CHECK-SD-NEXT:    str q6, [sp, #96]
+; CHECK-SD-NEXT:    str q5, [sp, #80]
+; CHECK-SD-NEXT:    str q4, [sp, #64]
+; CHECK-SD-NEXT:    str q3, [sp, #48]
+; CHECK-SD-NEXT:    str q2, [sp, #32]
+; CHECK-SD-NEXT:    str q1, [sp, #16]
+; CHECK-SD-NEXT:    str q0, [sp]
+; CHECK-SD-NEXT:    stur x7, [x29, #-16]
+; CHECK-SD-NEXT:    mov w8, #-128 // =0xffffff80
+; CHECK-SD-NEXT:    str w8, [x29, #20]
+; CHECK-SD-NEXT:    mov w8, #-8 // =0xfffffff8
+; CHECK-SD-NEXT:    str w8, [x29, #16]
+; CHECK-SD-NEXT:    add x8, x29, #16
+; CHECK-SD-NEXT:    stur x8, [x29, #-8]
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    add x8, x8, #128
+; CHECK-SD-NEXT:    str x8, [x29, #8]
+; CHECK-SD-NEXT:    sub x8, x29, #16
+; CHECK-SD-NEXT:    add x8, x8, #8
+; CHECK-SD-NEXT:    str x8, [x29]
+; CHECK-SD-NEXT:    mov w8, #1 // =0x1
+; CHECK-SD-NEXT:    mov w0, w8
+; CHECK-SD-NEXT:    .cfi_def_cfa wsp, 160
+; CHECK-SD-NEXT:    ldp x29, x30, [sp, #144] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    add sp, sp, #160
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SD-NEXT:    .cfi_restore w30
+; CHECK-SD-NEXT:    .cfi_restore w29
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vararg_many_gpr:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #176
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 176
+; CHECK-GI-NEXT:    stp x29, x30, [sp, #160] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    add x29, sp, #160
+; CHECK-GI-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -8
+; CHECK-GI-NEXT:    .cfi_offset w29, -16
+; CHECK-GI-NEXT:    stur x7, [x29, #-8]
+; CHECK-GI-NEXT:    str q0, [sp, #16]
+; CHECK-GI-NEXT:    str q1, [sp, #32]
+; CHECK-GI-NEXT:    str q2, [sp, #48]
+; CHECK-GI-NEXT:    str q3, [sp, #64]
+; CHECK-GI-NEXT:    str q4, [sp, #80]
+; CHECK-GI-NEXT:    str q5, [sp, #96]
+; CHECK-GI-NEXT:    str q6, [sp, #112]
+; CHECK-GI-NEXT:    str q7, [sp, #128]
+; CHECK-GI-NEXT:    add x9, sp, #8
+; CHECK-GI-NEXT:    add x8, x29, #16
+; CHECK-GI-NEXT:    str x8, [x9]
+; CHECK-GI-NEXT:    add x8, x29, #0
+; CHECK-GI-NEXT:    str x8, [x9, #8]
+; CHECK-GI-NEXT:    add x8, sp, #144
+; CHECK-GI-NEXT:    str x8, [x9, #16]
+; CHECK-GI-NEXT:    mov w8, #-8 // =0xfffffff8
+; CHECK-GI-NEXT:    str w8, [x9, #24]
+; CHECK-GI-NEXT:    mov w8, #-128 // =0xffffff80
+; CHECK-GI-NEXT:    str w8, [x9, #28]
+; CHECK-GI-NEXT:    mov w8, #1 // =0x1
+; CHECK-GI-NEXT:    mov w0, w8
+; CHECK-GI-NEXT:    .cfi_def_cfa wsp, 176
+; CHECK-GI-NEXT:    ldp x29, x30, [sp, #160] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    add sp, sp, #176
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-GI-NEXT:    .cfi_restore w30
+; CHECK-GI-NEXT:    .cfi_restore w29
+; CHECK-GI-NEXT:    ret
+entry:
+  %g = alloca ptr, align 4
+  call void @llvm.va_start(ptr %g)
+  ret i64 1
+}
+
+define i64 @vararg_many_float(float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, ...) #0 {
+; CHECK-SD-LABEL: vararg_many_float:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #112
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-SD-NEXT:    stp x29, x30, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    add x29, sp, #96
+; CHECK-SD-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-SD-NEXT:    .cfi_offset w30, -8
+; CHECK-SD-NEXT:    .cfi_offset w29, -16
+; CHECK-SD-NEXT:    str q7, [sp]
+; CHECK-SD-NEXT:    str x7, [sp, #80]
+; CHECK-SD-NEXT:    str x6, [sp, #72]
+; CHECK-SD-NEXT:    str x5, [sp, #64]
+; CHECK-SD-NEXT:    str x4, [sp, #56]
+; CHECK-SD-NEXT:    str x3, [sp, #48]
+; CHECK-SD-NEXT:    str x2, [sp, #40]
+; CHECK-SD-NEXT:    str x1, [sp, #32]
+; CHECK-SD-NEXT:    str x0, [sp, #24]
+; CHECK-SD-NEXT:    mov w8, #-16 // =0xfffffff0
+; CHECK-SD-NEXT:    str w8, [x29, #20]
+; CHECK-SD-NEXT:    mov w8, #-64 // =0xffffffc0
+; CHECK-SD-NEXT:    str w8, [x29, #16]
+; CHECK-SD-NEXT:    add x8, x29, #16
+; CHECK-SD-NEXT:    stur x8, [x29, #-8]
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    add x8, x8, #16
+; CHECK-SD-NEXT:    str x8, [x29, #8]
+; CHECK-SD-NEXT:    add x8, sp, #24
+; CHECK-SD-NEXT:    add x8, x8, #64
+; CHECK-SD-NEXT:    str x8, [x29]
+; CHECK-SD-NEXT:    mov w8, #1 // =0x1
+; CHECK-SD-NEXT:    mov w0, w8
+; CHECK-SD-NEXT:    .cfi_def_cfa wsp, 112
+; CHECK-SD-NEXT:    ldp x29, x30, [sp, #96] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    add sp, sp, #112
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SD-NEXT:    .cfi_restore w30
+; CHECK-SD-NEXT:    .cfi_restore w29
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vararg_many_float:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #112
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-GI-NEXT:    stp x29, x30, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    add x29, sp, #96
+; CHECK-GI-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -8
+; CHECK-GI-NEXT:    .cfi_offset w29, -16
+; CHECK-GI-NEXT:    str x0, [sp, #32]
+; CHECK-GI-NEXT:    str x1, [sp, #40]
+; CHECK-GI-NEXT:    str x2, [sp, #48]
+; CHECK-GI-NEXT:    str x3, [sp, #56]
+; CHECK-GI-NEXT:    str x4, [sp, #64]
+; CHECK-GI-NEXT:    str x5, [sp, #72]
+; CHECK-GI-NEXT:    str x6, [sp, #80]
+; CHECK-GI-NEXT:    str x7, [sp, #88]
+; CHECK-GI-NEXT:    str q7, [sp, #16]
+; CHECK-GI-NEXT:    add x9, sp, #8
+; CHECK-GI-NEXT:    add x8, x29, #16
+; CHECK-GI-NEXT:    str x8, [x9]
+; CHECK-GI-NEXT:    add x8, sp, #96
+; CHECK-GI-NEXT:    str x8, [x9, #8]
+; CHECK-GI-NEXT:    add x8, sp, #32
+; CHECK-GI-NEXT:    str x8, [x9, #16]
+; CHECK-GI-NEXT:    mov w8, #-64 // =0xffffffc0
+; CHECK-GI-NEXT:    str w8, [x9, #24]
+; CHECK-GI-NEXT:    mov w8, #-16 // =0xfffffff0
+; CHECK-GI-NEXT:    str w8, [x9, #28]
+; CHECK-GI-NEXT:    mov w8, #1 // =0x1
+; CHECK-GI-NEXT:    mov w0, w8
+; CHECK-GI-NEXT:    .cfi_def_cfa wsp, 112
+; CHECK-GI-NEXT:    ldp x29, x30, [sp, #96] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    add sp, sp, #112
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-GI-NEXT:    .cfi_restore w30
+; CHECK-GI-NEXT:    .cfi_restore w29
+; CHECK-GI-NEXT:    ret
+entry:
+  %g = alloca ptr, align 4
+  call void @llvm.va_start(ptr %g)
+  ret i64 1
+}
+
+define i64 @gpr1_fpr1(i32 %i, float %f, ...) #0 {
+; CHECK-SD-LABEL: gpr1_fpr1:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #192
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 192
+; CHECK-SD-NEXT:    stp x29, x30, [sp, #176] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    add x29, sp, #176
+; CHECK-SD-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-SD-NEXT:    .cfi_offset w30, -8
+; CHECK-SD-NEXT:    .cfi_offset w29, -16
+; CHECK-SD-NEXT:    str q7, [sp, #96]
+; CHECK-SD-NEXT:    str q6, [sp, #80]
+; CHECK-SD-NEXT:    str q5, [sp, #64]
+; CHECK-SD-NEXT:    str q4, [sp, #48]
+; CHECK-SD-NEXT:    str q3, [sp, #32]
+; CHECK-SD-NEXT:    str q2, [sp, #16]
+; CHECK-SD-NEXT:    str q1, [sp]
+; CHECK-SD-NEXT:    stur x7, [x29, #-16]
+; CHECK-SD-NEXT:    stur x6, [x29, #-24]
+; CHECK-SD-NEXT:    stur x5, [x29, #-32]
+; CHECK-SD-NEXT:    stur x4, [x29, #-40]
+; CHECK-SD-NEXT:    stur x3, [x29, #-48]
+; CHECK-SD-NEXT:    stur x2, [x29, #-56]
+; CHECK-SD-NEXT:    stur x1, [x29, #-64]
+; CHECK-SD-NEXT:    mov w8, #-112 // =0xffffff90
+; CHECK-SD-NEXT:    str w8, [x29, #20]
+; CHECK-SD-NEXT:    mov w8, #-56 // =0xffffffc8
+; CHECK-SD-NEXT:    str w8, [x29, #16]
+; CHECK-SD-NEXT:    add x8, x29, #16
+; CHECK-SD-NEXT:    stur x8, [x29, #-8]
+; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    add x8, x8, #112
+; CHECK-SD-NEXT:    str x8, [x29, #8]
+; CHECK-SD-NEXT:    sub x8, x29, #64
+; CHECK-SD-NEXT:    add x8, x8, #56
+; CHECK-SD-NEXT:    str x8, [x29]
+; CHECK-SD-NEXT:    mov w8, #1 // =0x1
+; CHECK-SD-NEXT:    mov w0, w8
+; CHECK-SD-NEXT:    .cfi_def_cfa wsp, 192
+; CHECK-SD-NEXT:    ldp x29, x30, [sp, #176] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    add sp, sp, #192
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SD-NEXT:    .cfi_restore w30
+; CHECK-SD-NEXT:    .cfi_restore w29
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: gpr1_fpr1:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sub sp, sp, #208
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 208
+; CHECK-GI-NEXT:    stp x29, x30, [sp, #192] // 16-byte Folded Spill
+; CHECK-GI-NEXT:    add x29, sp, #192
+; CHECK-GI-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -8
+; CHECK-GI-NEXT:    .cfi_offset w29, -16
+; CHECK-GI-NEXT:    stur x1, [x29, #-56]
+; CHECK-GI-NEXT:    stur x2, [x29, #-48]
+; CHECK-GI-NEXT:    stur x3, [x29, #-40]
+; CHECK-GI-NEXT:    stur x4, [x29, #-32]
+; CHECK-GI-NEXT:    stur x5, [x29, #-24]
+; CHECK-GI-NEXT:    stur x6, [x29, #-16]
+; CHECK-GI-NEXT:    stur x7, [x29, #-8]
+; CHECK-GI-NEXT:    str q1, [sp, #16]
+; CHECK-GI-NEXT:    str q2, [sp, #32]
+; CHECK-GI-NEXT:    str q3, [sp, #48]
+; CHECK-GI-NEXT:    str q4, [sp, #64]
+; CHECK-GI-NEXT:    str q5, [sp, #80]
+; CHECK-GI-NEXT:    str q6, [sp, #96]
+; CHECK-GI-NEXT:    str q7, [sp, #112]
+; CHECK-GI-NEXT:    add x9, sp, #8
+; CHECK-GI-NEXT:    add x8, x29, #16
+; CHECK-GI-NEXT:    str x8, [x9]
+; CHECK-GI-NEXT:    add x8, x29, #0
+; CHECK-GI-NEXT:    str x8, [x9, #8]
+; CHECK-GI-NEXT:    add x8, sp, #128
+; CHECK-GI-NEXT:    str x8, [x9, #16]
+; CHECK-GI-NEXT:    mov w8, #-56 // =0xffffffc8
+; CHECK-GI-NEXT:    str w8, [x9, #24]
+; CHECK-GI-NEXT:    mov w8, #-112 // =0xffffff90
+; CHECK-GI-NEXT:    str w8, [x9, #28]
+; CHECK-GI-NEXT:    mov w8, #1 // =0x1
+; CHECK-GI-NEXT:    mov w0, w8
+; CHECK-GI-NEXT:    .cfi_def_cfa wsp, 208
+; CHECK-GI-NEXT:    ldp x29, x30, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    add sp, sp, #208
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-GI-NEXT:    .cfi_restore w30
+; CHECK-GI-NEXT:    .cfi_restore w29
+; CHECK-GI-NEXT:    ret
+entry:
+  %g = alloca ptr, align 4
+  call void @llvm.va_start(ptr %g)
+  ret i64 1
+}
+
+; To make the outputs more readable
+attributes #0 = { uwtable "frame-pointer"="all" }
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}


        


More information about the llvm-commits mailing list