[clang] [llvm] [IPO] Optimise variadic functions (PR #92850)
Jon Chesterfield via llvm-commits
llvm-commits at lists.llvm.org
Mon May 20 20:51:47 PDT 2024
https://github.com/JonChesterfield updated https://github.com/llvm/llvm-project/pull/92850
>From 20e754b13eb2cd5affa0d8bba46ac97286ec32c2 Mon Sep 17 00:00:00 2001
From: Jon Chesterfield <jonathanchesterfield at gmail.com>
Date: Tue, 21 May 2024 04:22:45 +0100
Subject: [PATCH] [IPO] Optimise variadic functions
---
clang/test/CodeGen/aarch64-ABI-align-packed.c | 3 +-
clang/test/CodeGen/voidptr-vaarg.c | 478 +++++++
.../CodeGenCXX/inline-then-fold-variadics.cpp | 128 ++
llvm/include/llvm/InitializePasses.h | 1 +
.../llvm/Transforms/IPO/ExpandVariadics.h | 43 +
llvm/lib/Passes/PassBuilder.cpp | 1 +
llvm/lib/Passes/PassBuilderPipelines.cpp | 4 +
llvm/lib/Passes/PassRegistry.def | 1 +
llvm/lib/Transforms/IPO/CMakeLists.txt | 1 +
llvm/lib/Transforms/IPO/ExpandVariadics.cpp | 1243 +++++++++++++++++
.../expand-variadic-call-apcs64-linux.ll | 289 ++++
.../WebAssembly/expand-variadic-call.ll | 483 +++++++
llvm/test/CodeGen/WebAssembly/vararg-frame.ll | 525 +++++++
.../X86/expand-variadic-call-x64-linux.ll | 244 ++++
llvm/test/Other/new-pm-defaults.ll | 1 +
.../Other/new-pm-thinlto-postlink-defaults.ll | 1 +
.../new-pm-thinlto-postlink-pgo-defaults.ll | 1 +
...-pm-thinlto-postlink-samplepgo-defaults.ll | 2 +-
.../Other/new-pm-thinlto-prelink-defaults.ll | 1 +
.../new-pm-thinlto-prelink-pgo-defaults.ll | 1 +
...w-pm-thinlto-prelink-samplepgo-defaults.ll | 1 +
.../expand-va-intrinsic-split-linkage.ll | 239 ++++
.../expand-va-intrinsic-split-simple.ll | 212 +++
.../ExpandVariadics/indirect-calls.ll | 58 +
.../Transforms/ExpandVariadics/intrinsics.ll | 117 ++
.../Transforms/ExpandVariadics/pass-byval.ll | 81 ++
.../ExpandVariadics/pass-integers.ll | 344 +++++
.../llvm/lib/Transforms/IPO/BUILD.gn | 1 +
28 files changed, 4502 insertions(+), 2 deletions(-)
create mode 100644 clang/test/CodeGen/voidptr-vaarg.c
create mode 100644 clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
create mode 100644 llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
create mode 100644 llvm/lib/Transforms/IPO/ExpandVariadics.cpp
create mode 100644 llvm/test/CodeGen/AArch64/expand-variadic-call-apcs64-linux.ll
create mode 100644 llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll
create mode 100644 llvm/test/CodeGen/WebAssembly/vararg-frame.ll
create mode 100644 llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll
create mode 100644 llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll
create mode 100644 llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll
create mode 100644 llvm/test/Transforms/ExpandVariadics/indirect-calls.ll
create mode 100644 llvm/test/Transforms/ExpandVariadics/intrinsics.ll
create mode 100644 llvm/test/Transforms/ExpandVariadics/pass-byval.ll
create mode 100644 llvm/test/Transforms/ExpandVariadics/pass-integers.ll
diff --git a/clang/test/CodeGen/aarch64-ABI-align-packed.c b/clang/test/CodeGen/aarch64-ABI-align-packed.c
index 0349ebc8cc639..9ce051369f390 100644
--- a/clang/test/CodeGen/aarch64-ABI-align-packed.c
+++ b/clang/test/CodeGen/aarch64-ABI-align-packed.c
@@ -1,5 +1,6 @@
// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -emit-llvm -O2 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -emit-llvm -O2 -o - %s -mllvm -expand-variadics-override=disable | FileCheck %s
+
#include <stdarg.h>
#include <arm_neon.h>
diff --git a/clang/test/CodeGen/voidptr-vaarg.c b/clang/test/CodeGen/voidptr-vaarg.c
new file mode 100644
index 0000000000000..d023ddf0fb5d2
--- /dev/null
+++ b/clang/test/CodeGen/voidptr-vaarg.c
@@ -0,0 +1,478 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: webassembly-registered-target
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+// Multiple targets use emitVoidPtrVAArg to lower va_arg instructions in clang
+// PPC is complicated, excluding from this case analysis
+// ForceRightAdjust is false for all non-PPC targets
+// AllowHigherAlign is only false for two Microsoft targets, both of which
+// pass most things by reference.
+//
+// Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
+// QualType ValueTy, bool IsIndirect,
+// TypeInfoChars ValueInfo, CharUnits SlotSizeAndAlign,
+// bool AllowHigherAlign, bool ForceRightAdjust =
+// false);
+//
+// Target IsIndirect SlotSize AllowHigher ForceRightAdjust
+// ARC false four true false
+// ARM varies four true false
+// Mips false 4 or 8 true false
+// RISCV varies register true false
+// PPC elided
+// LoongArch varies register true false
+// NVPTX WIP
+// AMDGPU WIP
+// X86_32 false four true false
+// X86_64 MS varies eight false false
+// CSKY false four true false
+// Webassembly varies four true false
+// AArch64 false eight true false
+// AArch64 MS false eight false false
+//
+// Webassembly passes indirectly iff it's an aggregate of multiple values
+// Choosing this as a representative architecture to check IR generation
+// partly because it has a relatively simple variadic calling convention.
+
+// Int, by itself and packed in structs
+// CHECK-LABEL: @raw_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+int raw_int(__builtin_va_list list) { return __builtin_va_arg(list, int); }
+
+typedef struct {
+ int x;
+} one_int_t;
+
+// CHECK-LABEL: @one_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_INT_T:%.*]], align 4
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_INT_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[COERCE_DIVE]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+one_int_t one_int(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_int_t);
+}
+
+typedef struct {
+ int x;
+ int y;
+} two_int_t;
+
+// CHECK-LABEL: @two_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_RESULT:%.*]], ptr align 4 [[TMP0]], i32 8, i1 false)
+// CHECK-NEXT: ret void
+//
+two_int_t two_int(__builtin_va_list list) {
+ return __builtin_va_arg(list, two_int_t);
+}
+
+// Double, by itself and packed in structs
+// CHECK-LABEL: @raw_double(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
+// CHECK-NEXT: ret double [[TMP1]]
+//
+double raw_double(__builtin_va_list list) {
+ return __builtin_va_arg(list, double);
+}
+
+typedef struct {
+ double x;
+} one_double_t;
+
+// CHECK-LABEL: @one_double(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_DOUBLE_T:%.*]], align 8
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[RETVAL]], ptr align 8 [[ARGP_CUR_ALIGNED]], i32 8, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_DOUBLE_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT: ret double [[TMP1]]
+//
+one_double_t one_double(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_double_t);
+}
+
+typedef struct {
+ double x;
+ double y;
+} two_double_t;
+
+// CHECK-LABEL: @two_double(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[AGG_RESULT:%.*]], ptr align 8 [[TMP0]], i32 16, i1 false)
+// CHECK-NEXT: ret void
+//
+two_double_t two_double(__builtin_va_list list) {
+ return __builtin_va_arg(list, two_double_t);
+}
+
+// Scalar smaller than the slot size (C would promote a short to int)
+typedef struct {
+ char x;
+} one_char_t;
+
+// CHECK-LABEL: @one_char(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_CHAR_T:%.*]], align 1
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 1, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_CHAR_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+one_char_t one_char(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_char_t);
+}
+
+typedef struct {
+ short x;
+} one_short_t;
+
+// CHECK-LABEL: @one_short(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_SHORT_T:%.*]], align 2
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 2, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_SHORT_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[COERCE_DIVE]], align 2
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+one_short_t one_short(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_short_t);
+}
+
+// Composite smaller than the slot size
+typedef struct {
+ _Alignas(2) char x;
+ char y;
+} char_pair_t;
+
+// CHECK-LABEL: @char_pair(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[AGG_RESULT:%.*]], ptr align 2 [[TMP0]], i32 2, i1 false)
+// CHECK-NEXT: ret void
+//
+char_pair_t char_pair(__builtin_va_list list) {
+ return __builtin_va_arg(list, char_pair_t);
+}
+
+// Empty struct
+typedef struct {
+} empty_t;
+
+// CHECK-LABEL: @empty(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY_T:%.*]], align 1
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 0
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 0, i1 false)
+// CHECK-NEXT: ret void
+//
+empty_t empty(__builtin_va_list list) {
+ return __builtin_va_arg(list, empty_t);
+}
+
+typedef struct {
+ empty_t x;
+ int y;
+} empty_int_t;
+
+// CHECK-LABEL: @empty_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY_INT_T:%.*]], align 4
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false)
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+empty_int_t empty_int(__builtin_va_list list) {
+ return __builtin_va_arg(list, empty_int_t);
+}
+
+typedef struct {
+ int x;
+ empty_t y;
+} int_empty_t;
+
+// CHECK-LABEL: @int_empty(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT_EMPTY_T:%.*]], align 4
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT_EMPTY_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[COERCE_DIVE]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+int_empty_t int_empty(__builtin_va_list list) {
+ return __builtin_va_arg(list, int_empty_t);
+}
+
+// Need multiple va_arg instructions to check the postincrement
+// Using types that are passed directly as the indirect handling
+// is independent of the alignment handling in emitVoidPtrDirectVAArg.
+
+// CHECK-LABEL: @multiple_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT0_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT0:%.*]], ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT2:%.*]], ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP1]], align 4
+// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGP_CUR3]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4
+// CHECK-NEXT: ret void
+//
+void multiple_int(__builtin_va_list list, int *out0, int *out1, int *out2) {
+ *out0 = __builtin_va_arg(list, int);
+ *out1 = __builtin_va_arg(list, int);
+ *out2 = __builtin_va_arg(list, int);
+}
+
+// Scalars in structs are an easy way of specifying alignment from C
+// CHECK-LABEL: @increasing_alignment(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT0_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT3_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT0:%.*]], ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT2:%.*]], ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT3:%.*]], ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP0]], ptr align 4 [[ARGP_CUR]], i32 1, i1 false)
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[TMP1]], ptr align 4 [[ARGP_CUR1]], i32 2, i1 false)
+// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR3]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 7
+// CHECK-NEXT: [[ARGP_CUR5_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP4]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT6]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARGP_CUR5_ALIGNED]], align 8
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: store double [[TMP5]], ptr [[TMP6]], align 8
+// CHECK-NEXT: ret void
+//
+void increasing_alignment(__builtin_va_list list, one_char_t *out0,
+ one_short_t *out1, int *out2, double *out3) {
+ *out0 = __builtin_va_arg(list, one_char_t);
+ *out1 = __builtin_va_arg(list, one_short_t);
+ *out2 = __builtin_va_arg(list, int);
+ *out3 = __builtin_va_arg(list, double);
+}
+
+// CHECK-LABEL: @decreasing_alignment(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT0_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT3_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT0:%.*]], ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT2:%.*]], ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT3:%.*]], ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store double [[TMP1]], ptr [[TMP2]], align 8
+// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[TMP5]], ptr align 4 [[ARGP_CUR3]], i32 2, i1 false)
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT6]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP6]], ptr align 4 [[ARGP_CUR5]], i32 1, i1 false)
+// CHECK-NEXT: ret void
+//
+void decreasing_alignment(__builtin_va_list list, double *out0, int *out1,
+ one_short_t *out2, one_char_t *out3) {
+ *out0 = __builtin_va_arg(list, double);
+ *out1 = __builtin_va_arg(list, int);
+ *out2 = __builtin_va_arg(list, one_short_t);
+ *out3 = __builtin_va_arg(list, one_char_t);
+}
+
+// Typical edge cases, none hit special handling in VAArg lowering.
+typedef struct {
+ int x[16];
+ double y[8];
+} large_value_t;
+
+// CHECK-LABEL: @large_value(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[TMP0]], ptr align 8 [[TMP1]], i32 128, i1 false)
+// CHECK-NEXT: ret void
+//
+void large_value(__builtin_va_list list, large_value_t *out) {
+ *out = __builtin_va_arg(list, large_value_t);
+}
+
+typedef int v128_t __attribute__((__vector_size__(16), __aligned__(16)));
+// CHECK-LABEL: @vector(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 15
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -16)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 16
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARGP_CUR_ALIGNED]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 16
+// CHECK-NEXT: ret void
+//
+void vector(__builtin_va_list list, v128_t *out) {
+ *out = __builtin_va_arg(list, v128_t);
+}
+
+typedef struct BF {
+ float not_an_i32[2];
+ int A : 1;
+ char B;
+ int C : 13;
+} BF;
+
+// CHECK-LABEL: @bitfield(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[TMP1]], i32 12, i1 false)
+// CHECK-NEXT: ret void
+//
+void bitfield(__builtin_va_list list, BF *out) {
+ *out = __builtin_va_arg(list, BF);
+}
diff --git a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
new file mode 100644
index 0000000000000..cdc850a5b9d06
--- /dev/null
+++ b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
@@ -0,0 +1,128 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
+
+// Simple calls to known variadic functions that are completely elided when optimisations are on
+// This is a functional check that the expand-variadic pass is consistent with clang's va_arg handling
+
+// -Wno-varargs avoids warning second argument to 'va_start' is not the last named parameter
+
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -Wno-varargs -O1 -emit-llvm -o - %s | FileCheck %s
+
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -Wno-varargs -O1 -emit-llvm -o - %s | FileCheck %s
+
+// x64 needs O2 to remove the extra SROA layer
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -Wno-varargs -O2 -emit-llvm -o - %s | FileCheck %s
+
+
+#include <stdarg.h>
+#include <stdint.h>
+
+template <typename X, typename Y>
+static X first(...) {
+ va_list va;
+ __builtin_va_start(va, 0);
+ X r = va_arg(va, X);
+ va_end(va);
+ return r;
+}
+
+template <typename X, typename Y>
+static Y second(...) {
+ va_list va;
+ __builtin_va_start(va, 0);
+ va_arg(va, X);
+ Y r = va_arg(va, Y);
+ va_end(va);
+ return r;
+}
+
+
+extern "C"
+{
+
+// CHECK-LABEL: define {{[^@]+}}@first_pair_i32
+// CHECK-SAME: (i32 noundef returned [[X:%.*]], i32 noundef [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret i32 [[X]]
+//
+int first_pair_i32(int x, int y)
+{
+ return first<int,int>(x, y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@second_pair_i32
+// CHECK-SAME: (i32 noundef [[X:%.*]], i32 noundef returned [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret i32 [[Y]]
+//
+int second_pair_i32(int x, int y)
+{
+ return second<int,int>(x, y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@first_pair_f64
+// CHECK-SAME: (double noundef returned [[X:%.*]], double noundef [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret double [[X]]
+//
+double first_pair_f64(double x, double y)
+{
+ return first<double,double>(x, y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@second_pair_f64
+// CHECK-SAME: (double noundef [[X:%.*]], double noundef returned [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret double [[Y]]
+//
+double second_pair_f64(double x, double y)
+{
+ return second<double,double>(x, y);
+}
+
+}
+
+
+
+extern "C"
+{
+// CHECK-LABEL: define {{[^@]+}}@first_i32_f64
+// CHECK-SAME: (i32 noundef returned [[X:%.*]], double noundef [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret i32 [[X]]
+//
+int first_i32_f64(int x, double y)
+{
+ return first<int,double>(x, y);
+}
+
+
+// CHECK-LABEL: define {{[^@]+}}@second_i32_f64
+// CHECK-SAME: (i32 noundef [[X:%.*]], double noundef returned [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret double [[Y]]
+//
+double second_i32_f64(int x, double y)
+{
+ return second<int,double>(x, y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@first_f64_i32
+// CHECK-SAME: (double noundef returned [[X:%.*]], i32 noundef [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret double [[X]]
+//
+double first_f64_i32(double x, int y)
+{
+ return first<double,int>(x, y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@second_f64_i32
+// CHECK-SAME: (double noundef [[X:%.*]], i32 noundef returned [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret i32 [[Y]]
+//
+int second_f64_i32(double x, int y)
+{
+ return second<double,int>(x, y);
+}
+}
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 9ba75d491c1c9..5da681781da97 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -106,6 +106,7 @@ void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
void initializeExpandMemCmpLegacyPassPass(PassRegistry &);
void initializeExpandPostRAPass(PassRegistry&);
void initializeExpandReductionsPass(PassRegistry&);
+void initializeExpandVariadicsPass(PassRegistry &);
void initializeExpandVectorPredicationPass(PassRegistry &);
void initializeExternalAAWrapperPassPass(PassRegistry&);
void initializeFEntryInserterPass(PassRegistry&);
diff --git a/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
new file mode 100644
index 0000000000000..f7c9618bad79d
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
@@ -0,0 +1,43 @@
+//===- ExpandVariadics.h - expand variadic functions ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
+#define LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Module;
+class ModulePass;
+class OptimizationLevel;
+
+enum class ExpandVariadicsMode {
+ Unspecified, // Use the implementation defaults
+ Disable, // Disable the pass entirely
+ Optimize, // Optimise without changing ABI
+ Lowering, // Change variadic calling convention
+};
+
+class ExpandVariadicsPass : public PassInfoMixin<ExpandVariadicsPass> {
+ const ExpandVariadicsMode Mode;
+
+public:
+ // Operates under passed mode unless overridden on commandline
+ ExpandVariadicsPass(ExpandVariadicsMode Mode);
+
+ // Chooses disable or optimize based on optimization level
+ ExpandVariadicsPass(OptimizationLevel Level);
+
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+ModulePass *createExpandVariadicsPass(ExpandVariadicsMode);
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 734ca4d5deec9..b1448cc17cf39 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -137,6 +137,7 @@
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 1892e16a06528..f12300f2b2c9b 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -48,6 +48,7 @@
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
@@ -1195,6 +1196,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
if (EnablePGOForceFunctionAttrs && PGOOpt)
MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
+ // ExpandVariadics interacts well with the function inliner.
+ MPM.addPass(ExpandVariadicsPass(Level));
+
MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
if (EnableModuleInliner)
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 50682ca4970f1..e31fd685abcfe 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -59,6 +59,7 @@ MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass())
MODULE_PASS("dxil-upgrade", DXILUpgradePass())
MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))
+MODULE_PASS("expand-variadics", ExpandVariadicsPass(OptimizationLevel::O0))
MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
MODULE_PASS("function-import", FunctionImportPass())
MODULE_PASS("globalopt", GlobalOptPass())
diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
index 5fbdbc3a014f9..92a9697720efd 100644
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -12,6 +12,7 @@ add_llvm_component_library(LLVMipo
DeadArgumentElimination.cpp
ElimAvailExtern.cpp
EmbedBitcodePass.cpp
+ ExpandVariadics.cpp
ExtractGV.cpp
ForceFunctionAttrs.cpp
FunctionAttrs.cpp
diff --git a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
new file mode 100644
index 0000000000000..e27c391d88146
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
@@ -0,0 +1,1243 @@
+//===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an optimization pass for variadic functions. If called from codegen,
+// it can serve as the implementation of variadic functions for a given target.
+//
+// The strategy is to turn the ... part of a variadic function into a va_list
+// and fix up the call sites. The majority of the pass is target independent.
+// The exceptions are the va_list type itself and the rules for where to store
+// variables in memory such that va_arg can iterate over them given a va_list.
+//
+// The majority of the plumbing is splitting the variadic function into a
+// single basic block that packs the variadic arguments into a va_list and
+// a second function that does the work of the original. That packing is
+// exactly what is done by va_start. Further, the transform from ... to va_list
+// replaced va_start with an operation to copy a va_list from the new argument,
+// which is exactly a va_copy. This is useful for reducing target-dependence.
+//
+// A va_list instance is a forward iterator, where the primary operation va_arg
+// is dereference-then-increment. This interface forces significant convergent
+// evolution between target specific implementations. The variation in runtime
+// data layout is limited to that representable by the iterator, parameterised
+// by the type passed to the va_arg instruction.
+//
+// Therefore the majority of the target specific subtlety is packing arguments
+// into a stack allocated buffer such that a va_list can be initialised with it
+// and the va_arg expansion for the target will find the arguments at runtime.
+//
+// The aggregate effect is to unblock other transforms, most critically the
+// general purpose inliner. Known calls to variadic functions become zero cost.
+//
+// Consistency with clang is primarily tested by emitting va_arg using clang
+// then expanding the variadic functions using this pass, followed by trying
+// to constant fold the functions to no-ops.
+//
+// Target specific behaviour is tested in IR - mainly checking that values are
+// put into positions in call frames that make sense for that particular target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Passes/OptimizationLevel.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+#include <cstdio>
+
+#define DEBUG_TYPE "expand-variadics"
+
+using namespace llvm;
+
+cl::opt<ExpandVariadicsMode> ExpandVariadicsModeOption(
+ DEBUG_TYPE "-override", cl::desc("Override the behaviour of " DEBUG_TYPE),
+ cl::init(ExpandVariadicsMode::Unspecified),
+ cl::values(clEnumValN(ExpandVariadicsMode::Unspecified, "unspecified",
+ "Use the implementation defaults"),
+ clEnumValN(ExpandVariadicsMode::Disable, "disable",
+ "Disable the pass entirely"),
+ clEnumValN(ExpandVariadicsMode::Optimize, "optimize",
+ "Optimise without changing ABI"),
+ clEnumValN(ExpandVariadicsMode::Lowering, "lowering",
+ "Change variadic calling convention")));
+
+namespace {
+
+// Instances of this class encapsulate the target-dependant behaviour as a
+// function of triple. Implementing a new ABI is adding a case to the switch
+// in create(llvm::Triple) at the end of this file.
+class VariadicABIInfo {
+protected:
+ VariadicABIInfo() {}
+
+public:
+ static std::unique_ptr<VariadicABIInfo> create(llvm::Triple const &Triple);
+
+ // Whether a valist instance is passed by value or by address
+ // I.e. does it need to be alloca'ed and stored into, or can
+ // it be passed directly in a SSA register
+ virtual bool vaListPassedInSSARegister() = 0;
+
+ // The type of a va_list iterator object
+ virtual Type *vaListType(LLVMContext &Ctx) = 0;
+
+ // The type of a va_list as a function argument as lowered by C
+ virtual Type *vaListParameterType(Module &M) = 0;
+
+ // Initialize an allocated va_list object to point to an already
+ // initialized contiguous memory region.
+ // Return the value to pass as the va_list argument
+ virtual Value *initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder,
+ AllocaInst *, Value * /*buffer*/) = 0;
+
+ struct VAArgSlotInfo {
+ Align Align; // With respect to the call frame
+ bool Indirect; // Passed via a pointer
+ bool Unknown; // Cannot analyse this type, cannot transform the call
+ };
+ virtual VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) = 0;
+
+ // Targets implemented so far all have the same trivial lowering for these
+ bool vaEndIsNop() { return true; }
+ bool vaCopyIsMemcpy() { return true; }
+
+ virtual ~VariadicABIInfo() {}
+};
+
+// Module implements getFunction() which returns nullptr on missing declaration
+// and getOrInsertFunction which creates one when absent. Intrinsics.h only
+// implements getDeclaration which creates one when missing. Checking whether
+// an intrinsic exists thus inserts it in the module and it then needs to be
+// deleted again to clean up.
+// The right name for the two functions on intrinsics would match Module::,
+// but doing that in a single change would introduce nullptr dereferences
+// where currently there are none. The minimal collateral damage approach
+// would split the change over a release to help downstream branches. As it
+// is unclear what approach will be preferred, implementing the trivial
+// function here in the meantime to decouple from that discussion.
+Function *getPreexistingDeclaration(Module *M, Intrinsic::ID id,
+ ArrayRef<Type *> Tys = std::nullopt) {
+ auto *FT = Intrinsic::getType(M->getContext(), id, Tys);
+ return M->getFunction(Tys.empty() ? Intrinsic::getName(id)
+ : Intrinsic::getName(id, Tys, M, FT));
+}
+
+class ExpandVariadics : public ModulePass {
+
+ // The pass construction sets the default to optimize when called from middle
+ // end and lowering when called from the backend. The command line variable
+ // overrides that. This is useful for testing and debugging. It also allows
+ // building an applications with variadic functions wholly removed if one
+ // has sufficient control over the dependencies, e.g. a statically linked
+ // clang that has no variadic function calls remaining in the binary.
+ static ExpandVariadicsMode
+ withCommandLineOverride(ExpandVariadicsMode LLVMRequested) {
+ ExpandVariadicsMode UserRequested = ExpandVariadicsModeOption;
+ return (UserRequested == ExpandVariadicsMode::Unspecified) ? LLVMRequested
+ : UserRequested;
+ }
+
+public:
+ static char ID;
+ const ExpandVariadicsMode Mode;
+ std::unique_ptr<VariadicABIInfo> ABI;
+
+ ExpandVariadics(ExpandVariadicsMode Mode)
+ : ModulePass(ID), Mode(withCommandLineOverride(Mode)) {}
+ StringRef getPassName() const override { return "Expand variadic functions"; }
+
+ // Rewrite a variadic call site
+ bool expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB, FunctionType *,
+ Function *NF);
+
+ Function *replaceAllUsesWithNewDeclaration(Module &M,
+ Function *OriginalFunction);
+ Function *deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
+ Function *OriginalFunction);
+ Function *defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
+ Function *VariadicWrapper,
+ Function *FixedArityReplacement);
+
+ bool runOnModule(Module &M) override;
+ bool runOnFunction(Module &M, IRBuilder<> &Builder, Function *F);
+
+ bool rewriteABI() { return Mode == ExpandVariadicsMode::Lowering; }
+
+ void memcpyVAListPointers(const DataLayout &DL, IRBuilder<> &Builder,
+ Value *Dst, Value *Src) {
+ auto &Ctx = Builder.getContext();
+ Type *VaListTy = ABI->vaListType(Ctx);
+ uint64_t Size = DL.getTypeAllocSize(VaListTy).getFixedValue();
+ Builder.CreateMemCpyInline(Dst, {}, Src, {},
+ ConstantInt::get(Type::getInt32Ty(Ctx), Size));
+ }
+
+ template <Intrinsic::ID ID, typename InstructionType>
+ bool expandIntrinsicUsers(Module &M, IRBuilder<> &Builder,
+ PointerType *ArgType) {
+ bool Changed = false;
+ const DataLayout &DL = M.getDataLayout();
+ if (Function *Intrinsic = getPreexistingDeclaration(&M, ID, {ArgType})) {
+ for (User *U : llvm::make_early_inc_range(Intrinsic->users())) {
+ if (auto *I = dyn_cast<InstructionType>(U)) {
+ Changed |= expandVAIntrinsicCall(Builder, DL, I);
+ }
+ }
+ if (Intrinsic->use_empty())
+ Intrinsic->eraseFromParent();
+ }
+ return Changed;
+ }
+
+ bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
+ VAStartInst *Inst);
+
+ bool expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
+ VAEndInst *Inst);
+
+ bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
+ VACopyInst *Inst);
+
+ FunctionType *inlinableVariadicFunctionType(Module &M, FunctionType *FTy) {
+ // The type of "FTy" with the ... removed and a va_list appended
+ SmallVector<Type *> ArgTypes(FTy->param_begin(), FTy->param_end());
+ ArgTypes.push_back(ABI->vaListParameterType(M));
+ bool IsVarArgs = false;
+ return FunctionType::get(FTy->getReturnType(), ArgTypes, IsVarArgs);
+ }
+
+ static ConstantInt *sizeOfAlloca(LLVMContext &Ctx, const DataLayout &DL,
+ AllocaInst *Alloced) {
+ Type *AllocaType = Alloced->getAllocatedType();
+ TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
+ uint64_t AsInt = AllocaTypeSize.getFixedValue();
+ return ConstantInt::get(Type::getInt64Ty(Ctx), AsInt);
+ }
+
+ bool expansionApplicableToFunction(Module &M, Function *F) {
+ if (F->isIntrinsic() || !F->isVarArg() ||
+ F->hasFnAttribute(Attribute::Naked)) {
+ return false;
+ }
+
+ if (F->getCallingConv() != CallingConv::C)
+ return false;
+
+ if (!rewriteABI()) {
+ // e.g. can't replace a weak function unless changing the original symbol
+ if (GlobalValue::isInterposableLinkage(F->getLinkage())) {
+ return false;
+ }
+ }
+
+ if (!rewriteABI()) {
+ // If optimising, err on the side of leaving things alone
+ for (const Use &U : F->uses()) {
+ const auto *CB = dyn_cast<CallBase>(U.getUser());
+
+ if (!CB)
+ return false;
+
+ if (CB->isMustTailCall())
+ return false;
+
+ if (!CB->isCallee(&U) ||
+ CB->getFunctionType() != F->getFunctionType()) {
+ return false;
+ }
+ }
+ }
+
+ // Branch funnels look like variadic functions but aren't:
+ //
+ // define hidden void @__typeid_typeid1_0_branch_funnel(ptr nest %0, ...) {
+ // musttail call void (...) @llvm.icall.branch.funnel(ptr %0, ptr @vt1_1,
+ // ptr @vf1_1, ...) ret void
+ // }
+ //
+ // %1 = call i32 @__typeid_typeid1_0_branch_funnel(ptr nest %vtable, ptr
+ // %obj, i32 1)
+ //
+ // If this function contains a branch funnel intrinsic, don't transform it.
+
+ if (Function *Funnel =
+ getPreexistingDeclaration(&M, Intrinsic::icall_branch_funnel)) {
+ for (const User *U : Funnel->users()) {
+ if (auto *I = dyn_cast<CallBase>(U)) {
+ if (F == I->getFunction()) {
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+ }
+
+ bool callinstRewritable(CallBase *CB) {
+ if (CallInst *CI = dyn_cast<CallInst>(CB)) {
+ if (CI->isMustTailCall()) {
+ // Cannot expand musttail calls
+ return false;
+ }
+
+ return true;
+ }
+
+ if (isa<InvokeInst>(CB)) {
+ // Invoke not implemented in initial implementation of pass
+ return false;
+ }
+
+ // Other unimplemented derivative of CallBase
+ return false;
+ }
+
+ class ExpandedCallFrame {
+ // Helper for constructing an alloca instance containing the arguments bound
+ // to the variadic ... parameter, rearranged to allow indexing through a
+ // va_list iterator
+ enum { N = 4 };
+ SmallVector<Type *, N> FieldTypes;
+ enum Tag { Store, Memcpy, Padding };
+ SmallVector<std::tuple<Value *, uint64_t, Tag>, N> Source;
+
+ template <Tag tag> void append(Type *FieldType, Value *V, uint64_t Bytes) {
+ FieldTypes.push_back(FieldType);
+ Source.push_back({V, Bytes, tag});
+ }
+
+ public:
+ void store(LLVMContext &Ctx, Type *T, Value *V) { append<Store>(T, V, 0); }
+
+ void memcpy(LLVMContext &Ctx, Type *T, Value *V, uint64_t Bytes) {
+ append<Memcpy>(T, V, Bytes);
+ }
+
+ void padding(LLVMContext &Ctx, uint64_t By) {
+ append<Padding>(ArrayType::get(Type::getInt8Ty(Ctx), By), nullptr, 0);
+ }
+
+ size_t size() const { return FieldTypes.size(); }
+ bool empty() const { return FieldTypes.empty(); }
+
+ StructType *asStruct(LLVMContext &Ctx, StringRef Name) {
+ const bool IsPacked = true;
+ return StructType::create(Ctx, FieldTypes,
+ (Twine(Name) + ".vararg").str(), IsPacked);
+ }
+
+ void initializeStructAlloca(const DataLayout &DL, IRBuilder<> &Builder,
+ AllocaInst *Alloced) {
+
+ StructType *VarargsTy = cast<StructType>(Alloced->getAllocatedType());
+
+ for (size_t I = 0; I < size(); I++) {
+
+ auto [V, bytes, tag] = Source[I];
+
+ if (tag == Padding) {
+ assert(V == nullptr);
+ continue;
+ }
+
+ auto Dst = Builder.CreateStructGEP(VarargsTy, Alloced, I);
+
+ assert(V != nullptr);
+
+ if (tag == Store) {
+ Builder.CreateStore(V, Dst);
+ }
+
+ if (tag == Memcpy) {
+ Builder.CreateMemCpy(Dst, {}, V, {}, bytes);
+ }
+ }
+ }
+ };
+};
+
+bool ExpandVariadics::runOnModule(Module &M) {
+ bool Changed = false;
+
+ if (Mode == ExpandVariadicsMode::Disable)
+ return Changed;
+
+ llvm::Triple Triple(M.getTargetTriple());
+
+ if (Triple.getArch() == Triple::UnknownArch) {
+ // If we don't know the triple, we can't lower varargs
+ return false;
+ }
+
+ ABI = VariadicABIInfo::create(Triple);
+ if (!ABI) {
+ if (Mode == ExpandVariadicsMode::Lowering) {
+ report_fatal_error(
+ "Requested variadic lowering is unimplemented on this target");
+ }
+ return Changed;
+ }
+
+ auto &Ctx = M.getContext();
+ IRBuilder<> Builder(Ctx);
+
+ // At pass input, va_start intrinsics only occur in variadic functions, as
+ // checked by the IR verifier.
+
+ // The lowering pass needs to run on all variadic functions.
+ // The optimise could run on only those that call va_start
+ // in exchange for additional book keeping to avoid transforming
+ // the same function multiple times when it contains multiple va_start.
+ // Leaving that compile time optimisation for a later patch.
+
+ for (Function &F : llvm::make_early_inc_range(M))
+ Changed |= runOnFunction(M, Builder, &F);
+
+ // After runOnFunction, all known calls to known variadic functions have been
+ // replaced. va_start intrinsics are presently (and invalidly!) only present
+ // in functions that used to be variadic and have now been replaced to take a
+ // va_list instead. If lowering as opposed to optimising, calls to unknown
+ // variadic functions have also been replaced.
+
+ unsigned Addrspace = 0; // Sufficient for current targets
+ {
+ PointerType *ArgType = PointerType::get(Ctx, Addrspace);
+ // expand vastart before vacopy as vastart may introduce a vacopy
+ Changed |= expandIntrinsicUsers<Intrinsic::vastart, VAStartInst>(M, Builder,
+ ArgType);
+ Changed |=
+ expandIntrinsicUsers<Intrinsic::vaend, VAEndInst>(M, Builder, ArgType);
+ Changed |= expandIntrinsicUsers<Intrinsic::vacopy, VACopyInst>(M, Builder,
+ ArgType);
+ }
+
+ if (Mode != ExpandVariadicsMode::Lowering) {
+ return Changed; // Done
+ }
+
+ for (Function &F : llvm::make_early_inc_range(M)) {
+ if (F.isDeclaration())
+ continue;
+
+ // Now need to track down indirect calls. Can't find those
+ // by walking uses of variadic functions, need to crawl the instruction
+ // stream. Fortunately this is only necessary for the ABI rewrite case.
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : llvm::make_early_inc_range(BB)) {
+ if (CallBase *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->isIndirectCall()) {
+ FunctionType *FTy = CB->getFunctionType();
+ if (FTy->isVarArg()) {
+ Changed |= expandCall(M, Builder, CB, FTy, 0);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool ExpandVariadics::runOnFunction(Module &M, IRBuilder<> &Builder,
+ Function *OriginalFunction) {
+ bool Changed = false;
+
+ // fprintf(stderr, "Called runOn: %s\n",
+ // OriginalFunction->getName().str().c_str());
+
+ // TODO: Check what F.hasExactDefinition() does
+
+ // This check might be too coarse - there are probably cases where
+ // splitting a function is bad but it's usable without splitting
+ if (!expansionApplicableToFunction(M, OriginalFunction))
+ return false;
+
+ // TODO: Leave "thunk" attribute functions alone?
+
+ // Need more tests than this. Weak etc. Some are in expansionApplicable.
+
+ if (OriginalFunction->isDeclaration()) {
+ if (Mode == ExpandVariadicsMode::Optimize) {
+ return false;
+ }
+ }
+
+ const bool OriginalFunctionIsDeclaration = OriginalFunction->isDeclaration();
+
+ // Declare a new function and redirect every use to that new function
+ Function *VariadicWrapper =
+ replaceAllUsesWithNewDeclaration(M, OriginalFunction);
+ assert(VariadicWrapper->isDeclaration());
+ assert(OriginalFunction->use_empty());
+
+ // Create a new function taking va_list containing the implementation of the
+ // original
+ Function *FixedArityReplacement =
+ deriveFixedArityReplacement(M, Builder, OriginalFunction);
+ assert(OriginalFunction->isDeclaration());
+ assert(FixedArityReplacement->isDeclaration() ==
+ OriginalFunctionIsDeclaration);
+ assert(VariadicWrapper->isDeclaration());
+
+ // Create a single block forwarding wrapper that turns a ... into a va_list
+ Function *VariadicWrapperDefine =
+ defineVariadicWrapper(M, Builder, VariadicWrapper, FixedArityReplacement);
+ assert(VariadicWrapperDefine == VariadicWrapper);
+ assert(!VariadicWrapper->isDeclaration());
+
+ // We now have:
+ // 1. the original function, now as a declaration with no uses
+ // 2. a variadic function that unconditionally calls a fixed arity replacement
+ // 3. a fixed arity function equivalent to the original function
+
+ // Replace known calls to the variadic with calls to the va_list equivalent
+ for (User *U : llvm::make_early_inc_range(VariadicWrapper->users())) {
+ if (CallBase *CB = dyn_cast<CallBase>(U)) {
+ Value *calledOperand = CB->getCalledOperand();
+ if (VariadicWrapper == calledOperand) {
+ Changed |=
+ expandCall(M, Builder, CB, VariadicWrapper->getFunctionType(),
+ FixedArityReplacement);
+ }
+ }
+ }
+
+ Function *const ExternallyAccessible =
+ rewriteABI() ? FixedArityReplacement : VariadicWrapper;
+ Function *const InternalOnly =
+ rewriteABI() ? VariadicWrapper : FixedArityReplacement;
+
+ // care needed over other attributes, metadata etc
+
+ ExternallyAccessible->setLinkage(OriginalFunction->getLinkage());
+ ExternallyAccessible->setVisibility(OriginalFunction->getVisibility());
+ ExternallyAccessible->setComdat(OriginalFunction->getComdat());
+ ExternallyAccessible->takeName(OriginalFunction);
+
+ InternalOnly->setVisibility(GlobalValue::DefaultVisibility);
+ InternalOnly->setLinkage(GlobalValue::InternalLinkage);
+
+ OriginalFunction->eraseFromParent();
+
+ InternalOnly->removeDeadConstantUsers();
+
+ if (rewriteABI()) {
+ // All known calls to the function have been removed by expandCall
+ // Resolve everything else by replace all uses with
+
+ VariadicWrapper->replaceAllUsesWith(FixedArityReplacement);
+
+ assert(VariadicWrapper->use_empty());
+ VariadicWrapper->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+Function *
+ExpandVariadics::replaceAllUsesWithNewDeclaration(Module &M,
+ Function *OriginalFunction) {
+ auto &Ctx = M.getContext();
+ Function &F = *OriginalFunction;
+ FunctionType *FTy = F.getFunctionType();
+ Function *NF = Function::Create(FTy, F.getLinkage(), F.getAddressSpace());
+
+ NF->setName(F.getName() + ".varargs");
+ NF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat;
+
+ // Could give it the same visibility/linkage as the original
+ F.getParent()->getFunctionList().insert(F.getIterator(), NF);
+
+ // might have a shorthand
+ AttrBuilder ParamAttrs(Ctx);
+ AttributeList Attrs = NF->getAttributes();
+ Attrs = Attrs.addParamAttributes(Ctx, FTy->getNumParams(), ParamAttrs);
+ NF->setAttributes(Attrs);
+
+ OriginalFunction->replaceAllUsesWith(NF);
+ return NF;
+}
+
+Function *
+ExpandVariadics::deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
+ Function *OriginalFunction) {
+ Function &F = *OriginalFunction;
+ // The purpose here is split the variadic function F into two functions
+ // One is a variadic function that bundles the passed argument into a va_list
+ // and passes it to the second function. The second function does whatever
+ // the original F does, except that it takes a va_list instead of the ...
+
+ assert(expansionApplicableToFunction(M, &F));
+
+ auto &Ctx = M.getContext();
+
+ // Returned value isDeclaration() is equal to F.isDeclaration()
+ // but that invariant is not satisfied throughout this function
+ const bool FunctionIsDefinition = !F.isDeclaration();
+
+ FunctionType *FTy = F.getFunctionType();
+ SmallVector<Type *> ArgTypes(FTy->param_begin(), FTy->param_end());
+ ArgTypes.push_back(ABI->vaListParameterType(M));
+
+ FunctionType *NFTy = inlinableVariadicFunctionType(M, FTy);
+ Function *NF = Function::Create(NFTy, F.getLinkage(), F.getAddressSpace());
+
+ // Note - same attribute handling as DeadArgumentElimination
+ NF->copyAttributesFrom(&F);
+ // NF->setComdat(F.getComdat()); // beware weak
+ F.getParent()->getFunctionList().insert(F.getIterator(), NF);
+ NF->setName(F.getName() + ".valist");
+ NF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat;
+
+ AttrBuilder ParamAttrs(Ctx);
+
+ AttributeList Attrs = NF->getAttributes();
+ Attrs = Attrs.addParamAttributes(Ctx, NFTy->getNumParams() - 1, ParamAttrs);
+ NF->setAttributes(Attrs);
+
+ // Splice the implementation into the new function with minimal changes
+ if (FunctionIsDefinition) {
+ NF->splice(NF->begin(), &F);
+
+ auto NewArg = NF->arg_begin();
+ for (Argument &Arg : F.args()) {
+ Arg.replaceAllUsesWith(NewArg);
+ NewArg->setName(Arg.getName()); // takeName without killing the old one
+ ++NewArg;
+ }
+ NewArg->setName("varargs");
+ }
+
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ F.getAllMetadata(MDs);
+ for (auto [KindID, Node] : MDs)
+ NF->addMetadata(KindID, *Node);
+ F.clearMetadata();
+
+ return NF;
+}
+
+Function *
+ExpandVariadics::defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
+ Function *VariadicWrapper,
+ Function *FixedArityReplacement) {
+ auto &Ctx = Builder.getContext();
+ const DataLayout &DL = M.getDataLayout();
+ assert(VariadicWrapper->isDeclaration());
+ Function &F = *VariadicWrapper;
+
+ assert(F.isDeclaration());
+ Type *VaListTy = ABI->vaListType(Ctx);
+
+ auto *BB = BasicBlock::Create(Ctx, "entry", &F);
+ Builder.SetInsertPoint(BB);
+
+ AllocaInst *VaListInstance =
+ Builder.CreateAlloca(VaListTy, nullptr, "va_list");
+
+ Builder.CreateLifetimeStart(VaListInstance,
+ sizeOfAlloca(Ctx, DL, VaListInstance));
+
+ Builder.CreateIntrinsic(Intrinsic::vastart, {DL.getAllocaPtrType(Ctx)},
+ {VaListInstance});
+
+ SmallVector<Value *> Args;
+ for (Argument &A : F.args())
+ Args.push_back(&A);
+
+ Args.push_back(VaListInstance);
+
+ CallInst *Result = Builder.CreateCall(FixedArityReplacement, Args);
+ Result->setTailCallKind(CallInst::TCK_Tail);
+
+ Builder.CreateIntrinsic(Intrinsic::vaend, {DL.getAllocaPtrType(Ctx)},
+ {VaListInstance});
+ Builder.CreateLifetimeEnd(VaListInstance,
+ sizeOfAlloca(Ctx, DL, VaListInstance));
+
+ if (Result->getType()->isVoidTy())
+ Builder.CreateRetVoid();
+ else
+ Builder.CreateRet(Result);
+
+ return VariadicWrapper;
+}
+
+bool ExpandVariadics::expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB,
+ FunctionType *VarargFunctionType,
+ Function *NF) {
+ bool Changed = false;
+ const DataLayout &DL = M.getDataLayout();
+
+ if (!callinstRewritable(CB)) {
+ if (rewriteABI()) {
+ report_fatal_error("Cannot lower callbase instruction");
+ }
+ return Changed;
+ }
+
+ // This is tricky. The call instruction's function type might not match
+ // the type of the caller. When optimising, can leave it unchanged.
+ // Webassembly detects that inconsistency and repairs it.
+ FunctionType *FuncType = CB->getFunctionType();
+ if (FuncType != VarargFunctionType) {
+ if (!rewriteABI()) {
+ return Changed;
+ }
+ FuncType = VarargFunctionType;
+ }
+
+ auto &Ctx = CB->getContext();
+
+ Align MaxFieldAlign(1);
+
+ // The strategy is to allocate a call frame containing the variadic
+ // arguments laid out such that a target specific va_list can be initialized
+ // with it, such that target specific va_arg instructions will correctly
+ // iterate over it. This means getting the alignment right and sometimes
+ // embedding a pointer to the value instead of embedding the value itself.
+
+ Function *CBF = CB->getParent()->getParent();
+
+ ExpandedCallFrame Frame;
+
+ uint64_t CurrentOffset = 0;
+
+ for (unsigned I = FuncType->getNumParams(), E = CB->arg_size(); I < E; ++I) {
+ Value *ArgVal = CB->getArgOperand(I);
+ bool IsByVal = CB->paramHasAttr(I, Attribute::ByVal);
+
+ // The call argument is either passed by value, or is a pointer passed byval
+ // The varargs frame either stores the value directly or a pointer to it
+
+ // The type of the value being passed, decoded from byval metadata if
+ // required
+ Type *const UnderlyingType =
+ IsByVal ? CB->getParamByValType(I) : ArgVal->getType();
+ const uint64_t UnderlyingSize =
+ DL.getTypeAllocSize(UnderlyingType).getFixedValue();
+
+ // The type to be written into the call frame
+ Type *FrameFieldType = UnderlyingType;
+
+ // The value to copy from when initialising the frame alloca
+ Value *SourceValue = ArgVal;
+
+ // TODO, slotInfo should probably return the right alignment even
+ // when returning true for indirect, somewhat messy
+ VariadicABIInfo::VAArgSlotInfo slotInfo = ABI->slotInfo(DL, UnderlyingType);
+
+#if 0
+ {
+ fprintf(stdout, "Underlying type for param %u (byval %u, indir %u)\n", I,
+ IsByVal, slotInfo.Indirect);
+ UnderlyingType->dump();
+ }
+#endif
+
+ if (slotInfo.Unknown) {
+ if (rewriteABI()) {
+ report_fatal_error("Variadic lowering unimplemented on given type");
+ } else {
+ return Changed;
+ }
+ }
+
+ if (slotInfo.Indirect) {
+ // The va_arg lowering loads through a pointer. Set up an alloca to aim
+ // that pointer at.
+ Builder.SetInsertPointPastAllocas(CBF);
+ Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
+ Value *CallerCopy =
+ Builder.CreateAlloca(UnderlyingType, nullptr, "IndirectAlloca");
+
+ Builder.SetInsertPoint(CB);
+ if (IsByVal)
+ Builder.CreateMemCpy(CallerCopy, {}, ArgVal, {}, UnderlyingSize);
+ else
+ Builder.CreateStore(ArgVal, CallerCopy);
+
+ // Indirection now handled, pass the alloca ptr by value
+ FrameFieldType = DL.getAllocaPtrType(Ctx);
+ SourceValue = CallerCopy;
+ }
+
+ // Alignment of the value within the frame
+ // This probably needs to be controllable as a function of type
+ Align DataAlign = slotInfo.Align;
+
+ MaxFieldAlign = std::max(MaxFieldAlign, DataAlign);
+
+ uint64_t DataAlignV = DataAlign.value();
+ if (uint64_t Rem = CurrentOffset % DataAlignV) {
+ // Inject explicit padding to deal with alignment requirements
+ uint64_t Padding = DataAlignV - Rem;
+ Frame.padding(Ctx, Padding);
+ CurrentOffset += Padding;
+ }
+
+ if (slotInfo.Indirect) {
+ Frame.store(Ctx, FrameFieldType, SourceValue);
+ } else {
+ if (IsByVal) {
+ Frame.memcpy(Ctx, FrameFieldType, SourceValue, UnderlyingSize);
+ } else {
+ Frame.store(Ctx, FrameFieldType, SourceValue);
+ }
+ }
+
+ CurrentOffset += DL.getTypeAllocSize(FrameFieldType).getFixedValue();
+ }
+
+ if (Frame.empty()) {
+ // Not passing any arguments, hopefully va_arg won't try to read any
+ // Creating a single byte frame containing nothing to point the va_list
+ // instance as that is less special-casey in the compiler and probably
+ // easier to interpret in a debugger.
+ Frame.padding(Ctx, 1);
+ }
+
+ StructType *VarargsTy = Frame.asStruct(Ctx, CBF->getName());
+
+ // The struct instance needs to be at least MaxFieldAlign for the alignment of
+ // the fields to be correct at runtime. Use the native stack alignment instead
+ // if that's greater as that tends to give better codegen.
+ // This is an awkward way to guess whether there is a known stack alignment
+ // without hitting an assert in DL.getStackAlignment, 1024 is an arbitrary
+ // number likely to be greater than the natural stack alignment.
+ // TODO: DL.getStackAlignment could return a MaybeAlign instead of assert
+ Align AllocaAlign = MaxFieldAlign;
+ if (DL.exceedsNaturalStackAlignment(Align(1024))) {
+ AllocaAlign = std::max(AllocaAlign, DL.getStackAlignment());
+ }
+
+ // Put the alloca to hold the variadic args in the entry basic block.
+ Builder.SetInsertPointPastAllocas(CBF);
+
+ // SetCurrentDebugLocation when the builder SetInsertPoint method does not
+ Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
+
+ // The awkward construction here is to set the alignment on the instance
+ Changed = true;
+ AllocaInst *Alloced = Builder.Insert(
+ new AllocaInst(VarargsTy, DL.getAllocaAddrSpace(), nullptr, AllocaAlign),
+ "vararg_buffer");
+ assert(Alloced->getAllocatedType() == VarargsTy);
+
+ // Initialize the fields in the struct
+ Builder.SetInsertPoint(CB);
+ Builder.CreateLifetimeStart(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
+ Frame.initializeStructAlloca(DL, Builder, Alloced);
+
+ const unsigned NumArgs = FuncType->getNumParams();
+ SmallVector<Value *> Args;
+ Args.assign(CB->arg_begin(), CB->arg_begin() + NumArgs);
+
+ // Initialize a va_list pointing to that struct and pass it as the last
+ // argument
+ AllocaInst *VaList = nullptr;
+ {
+ if (!ABI->vaListPassedInSSARegister()) {
+ Type *VaListTy = ABI->vaListType(Ctx);
+ Builder.SetInsertPointPastAllocas(CBF);
+ Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
+ VaList = Builder.CreateAlloca(VaListTy, nullptr, "va_list");
+ Builder.SetInsertPoint(CB);
+ Builder.CreateLifetimeStart(VaList, sizeOfAlloca(Ctx, DL, VaList));
+ }
+ Args.push_back(ABI->initializeVAList(Ctx, Builder, VaList, Alloced));
+ }
+
+ // Attributes excluding any on the vararg arguments
+ AttributeList PAL = CB->getAttributes();
+ if (!PAL.isEmpty()) {
+ SmallVector<AttributeSet, 8> ArgAttrs;
+ for (unsigned ArgNo = 0; ArgNo < NumArgs; ArgNo++)
+ ArgAttrs.push_back(PAL.getParamAttrs(ArgNo));
+ PAL =
+ AttributeList::get(Ctx, PAL.getFnAttrs(), PAL.getRetAttrs(), ArgAttrs);
+ }
+
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CB->getOperandBundlesAsDefs(OpBundles);
+
+ CallBase *NewCB = nullptr;
+
+ // Assert won't be true once InvokeInst is implemented in a later patch,
+ // current invariant is established by callinstRewritable() at the start
+ assert(isa<CallInst>(CB));
+
+ if (CallInst *CI = dyn_cast<CallInst>(CB)) {
+
+ Value *Dst = NF ? NF : CI->getCalledOperand();
+ FunctionType *NFTy = inlinableVariadicFunctionType(M, VarargFunctionType);
+
+ NewCB = CallInst::Create(NFTy, Dst, Args, OpBundles, "", CI);
+
+ CallInst::TailCallKind TCK = CI->getTailCallKind();
+ assert(TCK != CallInst::TCK_MustTail); // guarded at prologue
+
+ // It doesn't get to be a tail call any more
+ // might want to guard this with arch, x64 and aarch64 document that
+ // varargs can't be tail called anyway
+ // Not totally convinced this is necessary but dead store elimination
+ // will discard the stores to the Alloca and pass uninitialized memory along
+ // instead when the function is marked tailcall
+ if (TCK == CallInst::TCK_Tail) {
+ TCK = CallInst::TCK_None;
+ }
+ CI->setTailCallKind(TCK);
+
+ } else {
+ llvm_unreachable("unreachable because callinstRewritable() returned false");
+ }
+
+ if (VaList)
+ Builder.CreateLifetimeEnd(VaList, sizeOfAlloca(Ctx, DL, VaList));
+
+ Builder.CreateLifetimeEnd(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
+
+ NewCB->setAttributes(PAL);
+ NewCB->takeName(CB);
+ NewCB->setCallingConv(CB->getCallingConv());
+
+ NewCB->setDebugLoc(DebugLoc());
+
+ // DeadArgElim and ArgPromotion copy exactly this metadata
+ NewCB->copyMetadata(*CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
+
+ CB->replaceAllUsesWith(NewCB);
+ CB->eraseFromParent();
+ return Changed;
+}
+
+bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
+ const DataLayout &DL,
+ VAStartInst *Inst) {
+ // TODO: Document or remove this action at a distance trickery
+ Function *ContainingFunction = Inst->getFunction();
+ if (ContainingFunction->isVarArg())
+ return false;
+
+ // The last argument is a vaListParameterType
+ Argument *PassedVaList =
+ ContainingFunction->getArg(ContainingFunction->arg_size() - 1);
+
+ // va_start takes a pointer to a va_list, e.g. one on the stack
+ Value *VaStartArg = Inst->getArgList();
+
+ Builder.SetInsertPoint(Inst);
+
+ // If the va_list is itself a ptr, emitting a vacopy call requires an alloca
+ // which is then removed, simpler to build the store directly.
+ if (ABI->vaListPassedInSSARegister()) {
+ Builder.CreateStore(PassedVaList, VaStartArg);
+ } else {
+ // Otherwise emit a vacopy to pick up target-specific handling if any
+ auto &Ctx = Builder.getContext();
+ Builder.CreateIntrinsic(Intrinsic::vacopy, {DL.getAllocaPtrType(Ctx)},
+ {VaStartArg, PassedVaList});
+ }
+
+ Inst->eraseFromParent();
+ return true;
+}
+
+bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
+ VAEndInst *Inst) {
+ assert(ABI->vaEndIsNop());
+ // A no-op on all the architectures implemented so far
+ Inst->eraseFromParent();
+ return true;
+}
+
+bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
+ const DataLayout &DL,
+ VACopyInst *Inst) {
+ // TODO: This looks be wrong for non-struct va_list, check it using wasm
+ assert(ABI->vaCopyIsMemcpy());
+ Builder.SetInsertPoint(Inst);
+ memcpyVAListPointers(DL, Builder, Inst->getDest(), Inst->getSrc());
+ Inst->eraseFromParent();
+ return true;
+}
+
+template <uint32_t MinAlign, uint32_t MaxAlign> Align clampAlign(Align A) {
+ // Uses 0 as a sentinel to mean inactive
+ if (MinAlign && A < MinAlign)
+ A = Align(MinAlign);
+
+ if (MaxAlign && A > MaxAlign)
+ A = Align(MaxAlign);
+
+ return A;
+}
+
+bool simpleScalarType(Type *Parameter) {
+ // This is a stop-gap. The MVP can optimise x64 and aarch64 on linux
+ // for sufficiently simple calls.
+ if (Parameter->isDoubleTy())
+ return true;
+
+ if (Parameter->isIntegerTy(32))
+ return true;
+ if (Parameter->isIntegerTy(64))
+ return true;
+
+ if (Parameter->isPointerTy()) {
+ return true;
+ }
+
+ return false;
+}
+
+struct AArch64 final : public VariadicABIInfo {
+ // https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst
+ // big endian, little endian ILP32 have their own triples
+
+ bool vaListPassedInSSARegister() override { return false; }
+
+ Type *vaListType(LLVMContext &Ctx) override {
+#if 0
+ typedef struct va_list {
+ void * stack; // next stack param
+ void * gr_top; // end of GP arg reg save area
+ void * vr_top; // end of FP/SIMD arg reg save area
+ int gr_offs; // offset from gr_top to next GP register arg
+ int vr_offs; // offset from vr_top to next FP/SIMD register arg
+ } va_list;
+#endif
+
+ auto I32 = Type::getInt32Ty(Ctx);
+ auto Ptr = PointerType::getUnqual(Ctx);
+
+ return StructType::get(Ctx, {Ptr, Ptr, Ptr, I32, I32});
+ }
+
+ Type *vaListParameterType(Module &M) override {
+ return PointerType::getUnqual(M.getContext());
+ }
+
+ Value *initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder,
+ AllocaInst *VaList, Value *VoidBuffer) override {
+ assert(VaList->getAllocatedType() == vaListType(Ctx));
+
+ Type *VaListTy = vaListType(Ctx);
+ Type *I32 = Type::getInt32Ty(Ctx);
+ Constant *Zero = ConstantInt::get(I32, 0);
+ Constant *Null = ConstantPointerNull::get(PointerType::getUnqual(Ctx));
+
+ Value *Idxs[2] = {
+ ConstantInt::get(I32, 0),
+ nullptr,
+ };
+
+ Idxs[1] = ConstantInt::get(I32, 0);
+ Builder.CreateStore(
+ VoidBuffer, Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "stack"));
+
+ // The general and vector regions are unused, given by the zero offsets,
+ // with nullptr a reasonable value to use for the pointer fields. That is
+ // all arguments are packed into the "stack" area, leaving the specialised
+ // two area unused.
+
+ Idxs[1] = ConstantInt::get(I32, 1);
+ Builder.CreateStore(
+ Null, Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "gr_top"));
+
+ Idxs[1] = ConstantInt::get(I32, 2);
+ Builder.CreateStore(
+ Null, Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "vr_top"));
+
+ Idxs[1] = ConstantInt::get(I32, 3);
+ Builder.CreateStore(
+ Zero, Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "gr_offs"));
+
+ Idxs[1] = ConstantInt::get(I32, 4);
+ Builder.CreateStore(
+ Zero, Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "vr_offs"));
+
+ return VaList;
+ }
+
+ VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
+ Align A = clampAlign<8, 0u>(DL.getABITypeAlign(Parameter));
+
+ bool Indirect = false; // true for some non-simple types on aarch64
+ bool Unknown = !simpleScalarType(Parameter);
+ return {A, Indirect, Unknown};
+ }
+};
+
+struct Wasm final : public VariadicABIInfo {
+ bool vaListPassedInSSARegister() override { return true; }
+
+ Type *vaListType(LLVMContext &Ctx) override {
+ return PointerType::getUnqual(Ctx);
+ }
+
+ Type *vaListParameterType(Module &M) override {
+ return PointerType::getUnqual(M.getContext());
+ }
+
+ Value *initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder,
+ AllocaInst * /*va_list*/, Value *buffer) override {
+ return buffer;
+ }
+
+ VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
+ LLVMContext &Ctx = Parameter->getContext();
+ Align A = clampAlign<4, 0>(DL.getABITypeAlign(Parameter));
+
+ // TODO, test empty record
+ if (auto s = dyn_cast<StructType>(Parameter)) {
+ if (s->getNumElements() > 1) {
+ return {DL.getABITypeAlign(PointerType::getUnqual(Ctx)), true, false};
+ }
+ }
+
+ return {A, false, false};
+ }
+};
+
+struct X64SystemV final : public VariadicABIInfo {
+ bool vaListPassedInSSARegister() override { return false; }
+
+ Type *vaListType(LLVMContext &Ctx) override {
+ auto I32 = Type::getInt32Ty(Ctx);
+ auto Ptr = PointerType::getUnqual(Ctx);
+ return ArrayType::get(StructType::get(Ctx, {I32, I32, Ptr, Ptr}), 1);
+ }
+
+ Type *vaListParameterType(Module &M) override {
+ return PointerType::getUnqual(M.getContext());
+ }
+
+ Value *initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder,
+ AllocaInst *VaList, Value *VoidBuffer) override {
+ assert(VaList->getAllocatedType() == vaListType(Ctx));
+
+ Type *VaListTy = vaListType(Ctx);
+
+ Type *I32 = Type::getInt32Ty(Ctx);
+ Type *I64 = Type::getInt64Ty(Ctx);
+
+ Value *Idxs[3] = {
+ ConstantInt::get(I64, 0),
+ ConstantInt::get(I32, 0),
+ nullptr,
+ };
+
+ // The magic numbers here set up a va_list instance that has the general
+ // purpose and floating point regions empty, such that only the overflow
+ // area is used. That means a single contiguous struct can be the backing
+ // store and simpler code to optimise in the inlining case.
+
+ Idxs[2] = ConstantInt::get(I32, 0);
+ Builder.CreateStore(
+ ConstantInt::get(I32, 48),
+ Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "gp_offset"));
+
+ Idxs[2] = ConstantInt::get(I32, 1);
+ Builder.CreateStore(
+ ConstantInt::get(I32, 6 * 8 + 8 * 16),
+ Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "fp_offset"));
+
+ Idxs[2] = ConstantInt::get(I32, 2);
+ Builder.CreateStore(
+ VoidBuffer,
+ Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "overfow_arg_area"));
+
+ Idxs[2] = ConstantInt::get(I32, 3);
+ Builder.CreateStore(
+ ConstantPointerNull::get(PointerType::getUnqual(Ctx)),
+ Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "reg_save_area"));
+
+ return VaList;
+ }
+
+ VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
+ // TODO: Make this comment look less scary
+ // SystemV X64 documented behaviour:
+ // Slots are at least eight byte aligned and at most 16 byte aligned.
+ // If the type needs more than sixteen byte alignment, it still only gets
+ // that much alignment on the stack.
+ // X64 behaviour in clang:
+ // Slots are at least eight byte aligned and at most naturally aligned
+ // This matches clang, not the ABI docs.
+
+ Align A = clampAlign<8, 0u>(DL.getABITypeAlign(Parameter));
+ bool Indirect = false;
+ bool Unknown = !simpleScalarType(Parameter);
+ return {A, Indirect, Unknown};
+ }
+};
+
+std::unique_ptr<VariadicABIInfo>
+VariadicABIInfo::create(llvm::Triple const &Triple) {
+
+ switch (Triple.getArch()) {
+
+ case Triple::aarch64: {
+ return std::make_unique<AArch64>();
+ }
+
+ case Triple::wasm32: {
+ return std::make_unique<Wasm>();
+ }
+
+ case Triple::x86_64: {
+ if (Triple.isOSLinux()) {
+ return std::make_unique<X64SystemV>();
+ }
+
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ return {};
+}
+
+} // namespace
+
+char ExpandVariadics::ID = 0;
+
+INITIALIZE_PASS(ExpandVariadics, DEBUG_TYPE, "Expand variadic functions", false,
+ false)
+
+ModulePass *llvm::createExpandVariadicsPass(ExpandVariadicsMode M) {
+ return new ExpandVariadics(M);
+}
+
+PreservedAnalyses ExpandVariadicsPass::run(Module &M, ModuleAnalysisManager &) {
+ return ExpandVariadics(Mode).runOnModule(M) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
+
+ExpandVariadicsPass::ExpandVariadicsPass(OptimizationLevel Level)
+ : ExpandVariadicsPass(Level == OptimizationLevel::O0
+ ? ExpandVariadicsMode::Disable
+ : ExpandVariadicsMode::Optimize) {}
+
+ExpandVariadicsPass::ExpandVariadicsPass(ExpandVariadicsMode M) : Mode(M) {}
diff --git a/llvm/test/CodeGen/AArch64/expand-variadic-call-apcs64-linux.ll b/llvm/test/CodeGen/AArch64/expand-variadic-call-apcs64-linux.ll
new file mode 100644
index 0000000000000..ceb226affc623
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/expand-variadic-call-apcs64-linux.ll
@@ -0,0 +1,289 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+; Check the variables are lowered to the locations this target expects
+
+; The types show the call frames
+; CHECK: %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+; CHECK: %single_i32.vararg = type <{ i32 }>
+; CHECK: %single_double.vararg = type <{ double }>
+; CHECK: %i32_double.vararg = type <{ i32, [4 x i8], double }>
+; CHECK: %double_i32.vararg = type <{ double, i32 }>
+; CHECK: %fptr_single_i32.vararg = type <{ i32 }>
+
+%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
+ at vararg_ptr = global ptr @vararg, align 8
+
+define void @copy(ptr noundef %va) {
+; CHECK-LABEL: define {{[^@]+}}@copy(ptr noundef %va) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %cp = alloca %struct.__va_list, align 8
+; CHECK-NEXT: %byval-temp = alloca %struct.__va_list, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %cp)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %cp, ptr %va, i32 32, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp, ptr noundef nonnull align 8 dereferenceable(32) %cp, i64 32, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %cp)
+; CHECK-NEXT: ret void
+;
+entry:
+ %cp = alloca %struct.__va_list, align 8
+ %byval-temp = alloca %struct.__va_list, align 8
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %cp)
+ call void @llvm.va_copy.p0(ptr nonnull %cp, ptr %va)
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp)
+ call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp, ptr noundef nonnull align 8 dereferenceable(32) %cp, i64 32, i1 false)
+ call void @valist(ptr noundef nonnull %byval-temp)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %cp)
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_copy.p0(ptr, ptr)
+
+declare void @valist(ptr noundef)
+
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+define void @start_once(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_once(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s = alloca %struct.__va_list, align 8
+; CHECK-NEXT: %byval-temp = alloca %struct.__va_list, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %s)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %s, ptr %varargs, i32 32, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp, ptr noundef nonnull align 8 dereferenceable(32) %s, i64 32, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %s)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s = alloca %struct.__va_list, align 8
+ %byval-temp = alloca %struct.__va_list, align 8
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %s)
+ call void @llvm.va_start.p0(ptr nonnull %s)
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp)
+ call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp, ptr noundef nonnull align 8 dereferenceable(32) %s, i64 32, i1 false)
+ call void @valist(ptr noundef nonnull %byval-temp)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp)
+ call void @llvm.va_end.p0(ptr %s)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %s)
+ ret void
+}
+
+declare void @llvm.va_start.p0(ptr)
+
+declare void @llvm.va_end.p0(ptr)
+
+define void @start_twice(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_twice(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s0 = alloca %struct.__va_list, align 8
+; CHECK-NEXT: %s1 = alloca %struct.__va_list, align 8
+; CHECK-NEXT: %byval-temp = alloca %struct.__va_list, align 8
+; CHECK-NEXT: %byval-temp1 = alloca %struct.__va_list, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %s0)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %s1)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %s0, ptr %varargs, i32 32, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp, ptr noundef nonnull align 8 dereferenceable(32) %s0, i64 32, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %s1, ptr %varargs, i32 32, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp1)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp1, ptr noundef nonnull align 8 dereferenceable(32) %s1, i64 32, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %byval-temp1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %s1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %s0)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s0 = alloca %struct.__va_list, align 8
+ %s1 = alloca %struct.__va_list, align 8
+ %byval-temp = alloca %struct.__va_list, align 8
+ %byval-temp1 = alloca %struct.__va_list, align 8
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %s0)
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %s1)
+ call void @llvm.va_start.p0(ptr nonnull %s0)
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp)
+ call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp, ptr noundef nonnull align 8 dereferenceable(32) %s0, i64 32, i1 false)
+ call void @valist(ptr noundef nonnull %byval-temp)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp)
+ call void @llvm.va_end.p0(ptr %s0)
+ call void @llvm.va_start.p0(ptr nonnull %s1)
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp1)
+ call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp1, ptr noundef nonnull align 8 dereferenceable(32) %s1, i64 32, i1 false)
+ call void @valist(ptr noundef nonnull %byval-temp1)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp1)
+ call void @llvm.va_end.p0(ptr %s1)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %s1)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %s0)
+ ret void
+}
+
+define void @single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 16
+; CHECK-NEXT: %va_list = alloca { ptr, ptr, ptr, i32, i32 }, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: %stack = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 0
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %stack, align 8
+; CHECK-NEXT: %gr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 1
+; CHECK-NEXT: store ptr null, ptr %gr_top, align 8
+; CHECK-NEXT: %vr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 2
+; CHECK-NEXT: store ptr null, ptr %vr_top, align 8
+; CHECK-NEXT: %gr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 3
+; CHECK-NEXT: store i32 0, ptr %gr_offs, align 4
+; CHECK-NEXT: %vr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 4
+; CHECK-NEXT: store i32 0, ptr %vr_offs, align 4
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x)
+ ret void
+}
+
+declare void @vararg(...)
+
+define void @single_double(double noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 16
+; CHECK-NEXT: %va_list = alloca { ptr, ptr, ptr, i32, i32 }, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: %stack = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 0
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %stack, align 8
+; CHECK-NEXT: %gr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 1
+; CHECK-NEXT: store ptr null, ptr %gr_top, align 8
+; CHECK-NEXT: %vr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 2
+; CHECK-NEXT: store ptr null, ptr %vr_top, align 8
+; CHECK-NEXT: %gr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 3
+; CHECK-NEXT: store i32 0, ptr %gr_offs, align 4
+; CHECK-NEXT: %vr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 4
+; CHECK-NEXT: store i32 0, ptr %vr_offs, align 4
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x)
+ ret void
+}
+
+define void @i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 16
+; CHECK-NEXT: %va_list = alloca { ptr, ptr, ptr, i32, i32 }, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store double %y, ptr %1, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: %stack = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 0
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %stack, align 8
+; CHECK-NEXT: %gr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 1
+; CHECK-NEXT: store ptr null, ptr %gr_top, align 8
+; CHECK-NEXT: %vr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 2
+; CHECK-NEXT: store ptr null, ptr %vr_top, align 8
+; CHECK-NEXT: %gr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 3
+; CHECK-NEXT: store i32 0, ptr %gr_offs, align 4
+; CHECK-NEXT: %vr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 4
+; CHECK-NEXT: store i32 0, ptr %vr_offs, align 4
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, double noundef %y)
+ ret void
+}
+
+define void @double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 16
+; CHECK-NEXT: %va_list = alloca { ptr, ptr, ptr, i32, i32 }, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: %stack = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 0
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %stack, align 8
+; CHECK-NEXT: %gr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 1
+; CHECK-NEXT: store ptr null, ptr %gr_top, align 8
+; CHECK-NEXT: %vr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 2
+; CHECK-NEXT: store ptr null, ptr %vr_top, align 8
+; CHECK-NEXT: %gr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 3
+; CHECK-NEXT: store i32 0, ptr %gr_offs, align 4
+; CHECK-NEXT: %vr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 4
+; CHECK-NEXT: store i32 0, ptr %vr_offs, align 4
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x, i32 noundef %y)
+ ret void
+}
+
+define void @fptr_single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_single_i32.vararg, align 16
+; CHECK-NEXT: %va_list = alloca { ptr, ptr, ptr, i32, i32 }, align 8
+; CHECK-NEXT: %0 = load volatile ptr, ptr @vararg_ptr, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: %stack = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 0
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %stack, align 8
+; CHECK-NEXT: %gr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 1
+; CHECK-NEXT: store ptr null, ptr %gr_top, align 8
+; CHECK-NEXT: %vr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 2
+; CHECK-NEXT: store ptr null, ptr %vr_top, align 8
+; CHECK-NEXT: %gr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 3
+; CHECK-NEXT: store i32 0, ptr %gr_offs, align 4
+; CHECK-NEXT: %vr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 4
+; CHECK-NEXT: store i32 0, ptr %vr_offs, align 4
+; CHECK-NEXT: call void %0(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 8
+ tail call void (...) %0(i32 noundef %x)
+ ret void
+}
+
+
diff --git a/llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll b/llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll
new file mode 100644
index 0000000000000..93c4614782fd8
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll
@@ -0,0 +1,483 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s
+target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
+target triple = "wasm32-unknown-unknown"
+
+; Check the variables are lowered to the locations this target expects
+
+; The types show the call frames
+; CHECK: %single_i32.vararg = type <{ i32 }>
+; CHECK: %single_double.vararg = type <{ double }>
+; CHECK: %single_v4f32.vararg = type <{ <4 x float> }>
+; CHECK: %single_v8f32.vararg = type <{ <8 x float> }>
+; CHECK: %single_v16f32.vararg = type <{ <16 x float> }>
+; CHECK: %single_v32f32.vararg = type <{ <32 x float> }>
+; CHECK: %i32_double.vararg = type <{ i32, [4 x i8], double }>
+; CHECK: %double_i32.vararg = type <{ double, i32 }>
+; CHECK: %i32_libcS.vararg = type <{ i32, ptr }>
+; CHECK: %libcS_i32.vararg = type <{ ptr, i32 }>
+; CHECK: %i32_v4f32.vararg = type <{ i32, [12 x i8], <4 x float> }>
+; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }>
+; CHECK: %i32_v8f32.vararg = type <{ i32, [28 x i8], <8 x float> }>
+; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }>
+; CHECK: %i32_v16f32.vararg = type <{ i32, [60 x i8], <16 x float> }>
+; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }>
+; CHECK: %i32_v32f32.vararg = type <{ i32, [124 x i8], <32 x float> }>
+; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }>
+; CHECK: %fptr_single_i32.vararg = type <{ i32 }>
+; CHECK: %fptr_libcS.vararg = type <{ ptr }>
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+ at vararg_ptr = hidden global ptr @vararg, align 4
+
+define hidden void @copy(ptr noundef %va) {
+; CHECK-LABEL: define {{[^@]+}}@copy(ptr noundef %va) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va.addr = alloca ptr, align 4
+; CHECK-NEXT: %cp = alloca ptr, align 4
+; CHECK-NEXT: store ptr %va, ptr %va.addr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %cp, ptr %va.addr, i32 4, i1 false)
+; CHECK-NEXT: %0 = load ptr, ptr %cp, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp)
+; CHECK-NEXT: ret void
+;
+entry:
+ %va.addr = alloca ptr, align 4
+ %cp = alloca ptr, align 4
+ store ptr %va, ptr %va.addr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp)
+ call void @llvm.va_copy.p0(ptr nonnull %cp, ptr nonnull %va.addr)
+ %0 = load ptr, ptr %cp, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp)
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_copy.p0(ptr, ptr)
+
+declare void @valist(ptr noundef)
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+define hidden void @start_once(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_once(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s)
+; CHECK-NEXT: store ptr %varargs, ptr %s, align 4
+; CHECK-NEXT: %0 = load ptr, ptr %s, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s)
+ call void @llvm.va_start.p0(ptr nonnull %s)
+ %0 = load ptr, ptr %s, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s)
+ ret void
+}
+
+declare void @llvm.va_start.p0(ptr)
+
+declare void @llvm.va_end.p0(ptr)
+
+define hidden void @start_twice(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_twice(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s0 = alloca ptr, align 4
+; CHECK-NEXT: %s1 = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s0)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s1)
+; CHECK-NEXT: store ptr %varargs, ptr %s0, align 4
+; CHECK-NEXT: %0 = load ptr, ptr %s0, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: store ptr %varargs, ptr %s1, align 4
+; CHECK-NEXT: %1 = load ptr, ptr %s1, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s0)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s0 = alloca ptr, align 4
+ %s1 = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s0)
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.va_start.p0(ptr nonnull %s0)
+ %0 = load ptr, ptr %s0, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s0)
+ call void @llvm.va_start.p0(ptr nonnull %s1)
+ %1 = load ptr, ptr %s1, align 4
+ call void @valist(ptr noundef %1)
+ call void @llvm.va_end.p0(ptr %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s0)
+ ret void
+}
+
+define hidden void @single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x)
+ ret void
+}
+
+declare void @vararg(...)
+
+define hidden void @single_double(double noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x)
+ ret void
+}
+
+define hidden void @single_v4f32(<4 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v8f32(<8 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v8f32(<8 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 32
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v16f32(<16 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v16f32(<16 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 64
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v32f32(<32 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v32f32(<32 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 128
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x)
+ ret void
+}
+
+define hidden void @i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store double %y, ptr %1, align 8
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, double noundef %y)
+ ret void
+}
+
+define hidden void @double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %IndirectAlloca = alloca %struct.libcS, align 8
+; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 16
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %IndirectAlloca, ptr %y, i64 24, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store ptr %IndirectAlloca, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(%struct.libcS) align 8 %y)
+ ret void
+}
+
+define hidden void @libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %IndirectAlloca = alloca %struct.libcS, align 8
+; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %IndirectAlloca, ptr %x, i64 24, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store ptr %IndirectAlloca, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(ptr noundef nonnull byval(%struct.libcS) align 8 %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <4 x float> %y, ptr %1, align 16
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 20, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v8f32(i32 noundef %x, <8 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, <8 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 32
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <8 x float> %y, ptr %1, align 32
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <8 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v8f32_i32(<8 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(<8 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 32
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 36, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32
+; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 36, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v16f32(i32 noundef %x, <16 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, <16 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 64
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <16 x float> %y, ptr %1, align 64
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <16 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v16f32_i32(<16 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(<16 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 64
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 68, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64
+; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 68, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v32f32(i32 noundef %x, <32 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, <32 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 128
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 256, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <32 x float> %y, ptr %1, align 128
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 256, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <32 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v32f32_i32(<32 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(<32 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 128
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 132, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128
+; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 132, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @fptr_single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_single_i32.vararg, align 16
+; CHECK-NEXT: %0 = load volatile ptr, ptr @vararg_ptr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: call void %0(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(i32 noundef %x)
+ ret void
+}
+
+define hidden void @fptr_libcS(ptr noundef byval(%struct.libcS) align 8 %x) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_libcS(ptr noundef byval(%struct.libcS) align 8 %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %IndirectAlloca = alloca %struct.libcS, align 8
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_libcS.vararg, align 16
+; CHECK-NEXT: %0 = load volatile ptr, ptr @vararg_ptr, align 4
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %IndirectAlloca, ptr %x, i64 24, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store ptr %IndirectAlloca, ptr %1, align 4
+; CHECK-NEXT: call void %0(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(ptr noundef nonnull byval(%struct.libcS) align 8 %x)
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/WebAssembly/vararg-frame.ll b/llvm/test/CodeGen/WebAssembly/vararg-frame.ll
new file mode 100644
index 0000000000000..03aeb32de2261
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/vararg-frame.ll
@@ -0,0 +1,525 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
+target triple = "wasm32-unknown-unknown"
+
+; Function Attrs: nounwind
+define void @pass_s0() {
+; CHECK-LABEL: pass_s0:
+; CHECK: .functype pass_s0 () -> ()
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: call sink
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink()
+ ret void
+}
+
+declare void @sink(...)
+
+; Function Attrs: nounwind
+define void @pass_s1(i8 %x) {
+; CHECK-LABEL: pass_s1:
+; CHECK: .functype pass_s1 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i8 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s2(i16 %x) {
+; CHECK-LABEL: pass_s2:
+; CHECK: .functype pass_s2 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i16 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s3(i32 %x) {
+; CHECK-LABEL: pass_s3:
+; CHECK: .functype pass_s3 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s4(i64 %x) {
+; CHECK-LABEL: pass_s4:
+; CHECK: .functype pass_s4 (i64) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i64 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s5(<4 x i32> noundef %x) {
+; CHECK-LABEL: pass_s5:
+; CHECK: .functype pass_s5 (i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 12
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(<4 x i32> noundef %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s0(i32 noundef %i) {
+; CHECK-LABEL: pass_int_s0:
+; CHECK: .functype pass_int_s0 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s1(i32 noundef %i, i8 %x) {
+; CHECK-LABEL: pass_int_s1:
+; CHECK: .functype pass_int_s1 (i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i8 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s2(i32 noundef %i, i16 %x) {
+; CHECK-LABEL: pass_int_s2:
+; CHECK: .functype pass_int_s2 (i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i16 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s3(i32 noundef %i, i32 %x) {
+; CHECK-LABEL: pass_int_s3:
+; CHECK: .functype pass_int_s3 (i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i32 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s4(i32 noundef %i, i64 %x) {
+; CHECK-LABEL: pass_int_s4:
+; CHECK: .functype pass_int_s4 (i32, i64) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.store 8
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i64 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s5(i32 noundef %i, <4 x i32> noundef %x) {
+; CHECK-LABEL: pass_int_s5:
+; CHECK: .functype pass_int_s5 (i32, i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 28
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 20
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, <4 x i32> noundef %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_asc(i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> noundef %x5) {
+; CHECK-LABEL: pass_asc:
+; CHECK: .functype pass_asc (i32, i32, i32, i64, i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 8
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 44
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 40
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 36
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> noundef %x5)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_dsc(<4 x i32> noundef %x0, i64 %x1, i32 %x2, i16 %x3, i8 %x4) {
+; CHECK-LABEL: pass_dsc:
+; CHECK: .functype pass_dsc (i32, i32, i32, i32, i64, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 8
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 28
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 12
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(<4 x i32> noundef %x0, i64 %x1, i32 %x2, i16 %x3, i8 %x4)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_multiple(i32 noundef %i, i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> noundef %x5) {
+; CHECK-LABEL: pass_multiple:
+; CHECK: .functype pass_multiple (i32, i32, i32, i32, i64, i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 9
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.store 40
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 36
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 32
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 28
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 20
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i16 %x2, i64 %x4)
+ tail call void (...) @sink(i32 noundef %i, i8 %x1, i32 %x3, <4 x i32> noundef %x5)
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll b/llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll
new file mode 100644
index 0000000000000..ed45631025e50
--- /dev/null
+++ b/llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll
@@ -0,0 +1,244 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check the variables are lowered to the locations this target expects
+
+; The types show the call frames
+; CHECK: %single_i32.vararg = type <{ i32 }>
+; CHECK: %single_double.vararg = type <{ double }>
+; CHECK: %i32_double.vararg = type <{ i32, [4 x i8], double }>
+; CHECK: %double_i32.vararg = type <{ double, i32 }>
+; CHECK: %fptr_single_i32.vararg = type <{ i32 }>
+
+%struct.__va_list_tag = type { i32, i32, ptr, ptr }
+
+ at vararg_ptr = global ptr @vararg, align 8
+
+define void @copy(ptr noundef %va) {
+; CHECK-LABEL: define {{[^@]+}}@copy(ptr noundef %va) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %cp = alloca [1 x %struct.__va_list_tag], align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %cp)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %cp, ptr %va, i32 24, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %cp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %cp)
+; CHECK-NEXT: ret void
+;
+entry:
+ %cp = alloca [1 x %struct.__va_list_tag], align 16
+ call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %cp)
+ call void @llvm.va_copy.p0(ptr nonnull %cp, ptr %va)
+ call void @valist(ptr noundef nonnull %cp)
+ call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %cp)
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_copy.p0(ptr, ptr)
+
+declare void @valist(ptr noundef)
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+define void @start_once(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_once(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s = alloca [1 x %struct.__va_list_tag], align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %s)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %s, ptr %varargs, i32 24, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %s)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %s)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s = alloca [1 x %struct.__va_list_tag], align 16
+ call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %s)
+ call void @llvm.va_start.p0(ptr nonnull %s)
+ call void @valist(ptr noundef nonnull %s)
+ call void @llvm.va_end.p0(ptr %s)
+ call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %s)
+ ret void
+}
+
+declare void @llvm.va_start.p0(ptr)
+
+declare void @llvm.va_end.p0(ptr)
+
+define void @start_twice(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_twice(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s0 = alloca [1 x %struct.__va_list_tag], align 16
+; CHECK-NEXT: %s1 = alloca [1 x %struct.__va_list_tag], align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %s0)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %s1)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %s0, ptr %varargs, i32 24, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %s0)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %s1, ptr %varargs, i32 24, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %s1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %s1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %s0)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s0 = alloca [1 x %struct.__va_list_tag], align 16
+ %s1 = alloca [1 x %struct.__va_list_tag], align 16
+ call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %s0)
+ call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %s1)
+ call void @llvm.va_start.p0(ptr nonnull %s0)
+ call void @valist(ptr noundef nonnull %s0)
+ call void @llvm.va_end.p0(ptr %s0)
+ call void @llvm.va_start.p0(ptr nonnull %s1)
+ call void @valist(ptr noundef nonnull %s1)
+ call void @llvm.va_end.p0(ptr %s1)
+ call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %s1)
+ call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %s0)
+ ret void
+}
+
+define void @single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 16
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x)
+ ret void
+}
+
+declare void @vararg(...)
+
+define void @single_double(double noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 16
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x)
+ ret void
+}
+
+define void @i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 16
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store double %y, ptr %1, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, double noundef %y)
+ ret void
+}
+
+define void @double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 16
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x, i32 noundef %y)
+ ret void
+}
+
+define void @fptr_single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_single_i32.vararg, align 16
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %0 = load volatile ptr, ptr @vararg_ptr, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void %0(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 8
+ tail call void (...) %0(i32 noundef %x)
+ ret void
+}
+
+
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 51fb93daa4dfa..54c593c98ab66 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -125,6 +125,7 @@
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
+; CHECK-O-NEXT: Running pass: ExpandVariadicsPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
index 064362eabbf83..fa9c0cc7708e6 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
@@ -61,6 +61,7 @@
; CHECK-O-NEXT: Running analysis: TypeBasedAA
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
+; CHECK-O-NEXT: Running pass: ExpandVariadicsPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
index 19a44867e434a..8bd372f4c0691 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -50,6 +50,7 @@
; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
+; CHECK-O-NEXT: Running pass: ExpandVariadicsPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index ac80a31d8fd4b..dd99340e60c0d 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -58,7 +58,7 @@
; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo
-
+; CHECK-O-NEXT: Running pass: ExpandVariadicsPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
index 6486639e07b49..d2a53cea68c37 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
@@ -92,6 +92,7 @@
; CHECK-O-NEXT: Running analysis: TypeBasedAA
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
+; CHECK-O-NEXT: Running pass: ExpandVariadicsPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
index 09f9f0f48badd..cc110b01fb244 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -83,6 +83,7 @@
; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion on
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on foo
+; CHECK-O-NEXT: Running pass: ExpandVariadicsPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
index 47bdbfd2d357d..d2c547e04085e 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
@@ -63,6 +63,7 @@
; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo
+; CHECK-O-NEXT: Running pass: ExpandVariadicsPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
diff --git a/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll
new file mode 100644
index 0000000000000..8a841daef832c
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll
@@ -0,0 +1,239 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s --check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s --check-prefixes=ABI
+
+; Split variadic functions into two functions:
+; - one equivalent to the original, same symbol etc
+; - one implementing the contents of the original but taking a valist
+; IR here is applicable to any target that uses a ptr for valist
+;
+; Defines a function with each linkage (in the order of the llvm documentation).
+; If split applies it does the same transform to each.
+; Whether split applies depends on whether the ABI is being changed or not - e.g. a weak
+; function is not normally useful to split as the contents cannot be called from elsewhere.
+; If the ABI is being rewritten then the function is still converted. Call sites tested elsewhere.
+
+; Update test checks doesn't emit checks for declares
+
+declare void @sink_valist(ptr)
+declare void @llvm.va_start(ptr)
+declare void @llvm.va_end(ptr)
+
+declare void @decl_simple(...)
+define void @defn_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: tail call void @defn_simple.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for private
+define private void @defn_private_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_private_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: tail call void @defn_private_simple.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_private_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for internal
+define internal void @defn_internal_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_internal_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: tail call void @defn_internal_simple.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_internal_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for available_externally
+define available_externally void @available_externally_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@available_externally_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: tail call void @available_externally_simple.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@available_externally_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for linkonce
+define linkonce void @defn_linkonce_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_linkonce_simple(...) {
+; OPT-NEXT: %va = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va)
+; OPT-NEXT: call void @sink_valist(ptr %va)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_linkonce_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for weak
+define weak void @defn_weak_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_weak_simple(...) {
+; OPT-NEXT: %va = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va)
+; OPT-NEXT: call void @sink_valist(ptr %va)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_weak_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; common is not applicable to functions
+; appending is not applicable to functions
+
+declare extern_weak void @decl_extern_weak_simple(...)
+; no define for extern_weak
+
+; no declare for linkonce_odr
+define linkonce_odr void @defn_linkonce_odr_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_linkonce_odr_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: tail call void @defn_linkonce_odr_simple.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_linkonce_odr_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for weak_odr
+define weak_odr void @defn_weak_odr_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_weak_odr_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: tail call void @defn_weak_odr_simple.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_weak_odr_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+declare external void @decl_external_simple(...)
+define external void @defn_external_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_external_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: tail call void @defn_external_simple.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_external_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+
+
diff --git a/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll
new file mode 100644
index 0000000000000..7a40e1290a8ba
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll
@@ -0,0 +1,212 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s --check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s --check-prefixes=ABI
+
+
+; Examples are variadic functions that return the first or the second of an int and a double
+; Split the functions into an internal equivalent that takes a va_list and a ABI preserving wrapper
+
+define i32 @variadic_int_double_get_firstz(...) {
+; OPT-LABEL: define {{[^@]+}}@variadic_int_double_get_firstz(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: %0 = tail call i32 @variadic_int_double_get_firstz.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret i32 %0
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_int_double_get_firstz(ptr %varargs) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; ABI-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; ABI-NEXT: store ptr %argp.next, ptr %va, align 4
+; ABI-NEXT: %0 = load i32, ptr %argp.cur, align 4
+; ABI-NEXT: ret i32 %0
+;
+entry:
+ %va = alloca ptr, align 4
+ call void @llvm.va_start.p0(ptr nonnull %va)
+ %argp.cur = load ptr, ptr %va, align 4
+ %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+ store ptr %argp.next, ptr %va, align 4
+ %0 = load i32, ptr %argp.cur, align 4
+ call void @llvm.va_end.p0(ptr %va)
+ ret i32 %0
+}
+
+; CHECK-LABEL: define i32 @variadic_int_double_get_firstz(...) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va_list = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; CHECK-NEXT: %0 = tail call i32 @variadic_int_double_get_firstz.valist(ptr %va_list)
+; CHECK-NEXT: ret i32 %0
+; CHECK-NEXT: }
+
+; CHECK-LABEL: define internal i32 @variadic_int_double_get_firstz.valist(ptr noalias %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va = alloca ptr, align 4
+; CHECK-NEXT: store ptr %varargs, ptr %va, align 4
+; CHECK-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; CHECK-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; CHECK-NEXT: store ptr %argp.next, ptr %va, align 4
+; CHECK-NEXT: %0 = load i32, ptr %argp.cur, align 4
+; CHECK-NEXT: ret i32 %0
+; CHECK-NEXT: }
+
+define double @variadic_int_double_get_secondz(...) {
+; OPT-LABEL: define {{[^@]+}}@variadic_int_double_get_secondz(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: %0 = tail call double @variadic_int_double_get_secondz.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret double %0
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_int_double_get_secondz(ptr %varargs) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; ABI-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; ABI-NEXT: %argp.next2 = getelementptr inbounds i8, ptr %argp.cur, i32 12
+; ABI-NEXT: store ptr %argp.next2, ptr %va, align 4
+; ABI-NEXT: %0 = load double, ptr %argp.next, align 4
+; ABI-NEXT: ret double %0
+;
+entry:
+ %va = alloca ptr, align 4
+ call void @llvm.va_start.p0(ptr nonnull %va)
+ %argp.cur = load ptr, ptr %va, align 4
+ %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+ %argp.next2 = getelementptr inbounds i8, ptr %argp.cur, i32 12
+ store ptr %argp.next2, ptr %va, align 4
+ %0 = load double, ptr %argp.next, align 4
+ call void @llvm.va_end.p0(ptr %va)
+ ret double %0
+}
+
+; CHECK-LABEL: define double @variadic_int_double_get_secondz(...) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va_list = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; CHECK-NEXT: %0 = tail call double @variadic_int_double_get_secondz.valist(ptr %va_list)
+; CHECK-NEXT: ret double %0
+; CHECK-NEXT: }
+
+; CHECK-LABEL: define internal double @variadic_int_double_get_secondz.valist(ptr noalias %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va = alloca ptr, align 4
+; CHECK-NEXT: store ptr %varargs, ptr %va, align 4
+; CHECK-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; CHECK-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; CHECK-NEXT: %argp.next2 = getelementptr inbounds i8, ptr %argp.cur, i32 12
+; CHECK-NEXT: store ptr %argp.next2, ptr %va, align 4
+; CHECK-NEXT: %0 = load double, ptr %argp.next, align 4
+; CHECK-NEXT: ret double %0
+; CHECK-NEXT: }
+
+
+; CHECK-LABEL: @variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %variadic_can_get_firstIidEEbT_T0_.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store double %y, ptr %1, align 4
+; CHECK-NEXT: %call = call i32 @variadic_int_double_get_firstz.valist(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %cmp.i = icmp eq i32 %call, %x
+; CHECK-NEXT: ret i1 %cmp.i
+; CHECK-NEXT: }
+
+define zeroext i1 @variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; OPT-LABEL: define {{[^@]+}}@variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %vararg_buffer = alloca %variadic_can_get_firstIidEEbT_T0_.vararg, align 16
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %0 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; OPT-NEXT: store i32 %x, ptr %0, align 4
+; OPT-NEXT: %1 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; OPT-NEXT: store double %y, ptr %1, align 8
+; OPT-NEXT: %call = call i32 @variadic_int_double_get_firstz.valist(ptr %vararg_buffer)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %cmp.i = icmp eq i32 %call, %x
+; OPT-NEXT: ret i1 %cmp.i
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %vararg_buffer = alloca %variadic_can_get_firstIidEEbT_T0_.vararg, align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %0 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; ABI-NEXT: store i32 %x, ptr %0, align 4
+; ABI-NEXT: %1 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; ABI-NEXT: store double %y, ptr %1, align 8
+; ABI-NEXT: %call = call i32 @variadic_int_double_get_firstz(ptr %vararg_buffer)
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %cmp.i = icmp eq i32 %call, %x
+; ABI-NEXT: ret i1 %cmp.i
+;
+entry:
+ %call = call i32 (...) @variadic_int_double_get_firstz(i32 %x, double %y)
+ %cmp.i = icmp eq i32 %call, %x
+ ret i1 %cmp.i
+}
+
+; CHECK-LABEL: @variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %variadic_can_get_secondIidEEbT_T0_.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store double %y, ptr %1, align 4
+; CHECK-NEXT: %call = call double @variadic_int_double_get_secondz.valist(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %cmp.i = fcmp oeq double %call, %y
+; CHECK-NEXT: ret i1 %cmp.i
+; CHECK-NEXT: }
+
+define zeroext i1 @variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; OPT-LABEL: define {{[^@]+}}@variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %vararg_buffer = alloca %variadic_can_get_secondIidEEbT_T0_.vararg, align 16
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %0 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; OPT-NEXT: store i32 %x, ptr %0, align 4
+; OPT-NEXT: %1 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; OPT-NEXT: store double %y, ptr %1, align 8
+; OPT-NEXT: %call = call double @variadic_int_double_get_secondz.valist(ptr %vararg_buffer)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %cmp.i = fcmp oeq double %call, %y
+; OPT-NEXT: ret i1 %cmp.i
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %vararg_buffer = alloca %variadic_can_get_secondIidEEbT_T0_.vararg, align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %0 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; ABI-NEXT: store i32 %x, ptr %0, align 4
+; ABI-NEXT: %1 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; ABI-NEXT: store double %y, ptr %1, align 8
+; ABI-NEXT: %call = call double @variadic_int_double_get_secondz(ptr %vararg_buffer)
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %cmp.i = fcmp oeq double %call, %y
+; ABI-NEXT: ret i1 %cmp.i
+;
+entry:
+ %call = call double (...) @variadic_int_double_get_secondz(i32 %x, double %y)
+ %cmp.i = fcmp oeq double %call, %y
+ ret i1 %cmp.i
+}
+
+; Declaration unchanged
+; CHECK: declare void @variadic_without_callers(...)
+declare void @variadic_without_callers(...)
+
+declare void @llvm.va_start.p0(ptr)
+declare void @llvm.va_end.p0(ptr)
diff --git a/llvm/test/Transforms/ExpandVariadics/indirect-calls.ll b/llvm/test/Transforms/ExpandVariadics/indirect-calls.ll
new file mode 100644
index 0000000000000..0310adf936d84
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/indirect-calls.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI
+
+declare void @vararg(...)
+ at vararg_ptr = hidden global ptr @vararg, align 4
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+define hidden void @fptr_single_i32(i32 noundef %x) {
+; OPT-LABEL: @fptr_single_i32(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; OPT-NEXT: tail call void (...) [[TMP0]](i32 noundef [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @fptr_single_i32(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[FPTR_SINGLE_I32_VARARG:%.*]], align 16
+; ABI-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[FPTR_SINGLE_I32_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void [[TMP0]](ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(i32 noundef %x)
+ ret void
+}
+
+define hidden void @fptr_libcS(ptr noundef byval(%struct.libcS) align 8 %x) {
+; OPT-LABEL: @fptr_libcS(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; OPT-NEXT: tail call void (...) [[TMP0]](ptr noundef nonnull byval([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @fptr_libcS(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[FPTR_LIBCS_VARARG:%.*]], align 16
+; ABI-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[INDIRECTALLOCA]], ptr [[X:%.*]], i64 24, i1 false)
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[FPTR_LIBCS_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void [[TMP0]](ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(ptr noundef nonnull byval(%struct.libcS) align 8 %x)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/intrinsics.ll b/llvm/test/Transforms/ExpandVariadics/intrinsics.ll
new file mode 100644
index 0000000000000..172a6ecc900e0
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/intrinsics.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=CHECK,OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=CHECK,ABI
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_copy.p0(ptr, ptr)
+
+declare void @valist(ptr noundef)
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_start.p0(ptr)
+
+declare void @llvm.va_end.p0(ptr)
+
+
+define void @start_once(...) {
+; OPT-LABEL: @start_once(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[VA_LIST:%.*]] = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VA_LIST]])
+; OPT-NEXT: call void @llvm.va_start.p0(ptr [[VA_LIST]])
+; OPT-NEXT: tail call void @start_once.valist(ptr [[VA_LIST]])
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VA_LIST]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @start_once(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[S:%.*]] = alloca ptr, align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[S]])
+; ABI-NEXT: store ptr [[VARARGS:%.*]], ptr [[S]], align 4
+; ABI-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S]], align 4
+; ABI-NEXT: call void @valist(ptr noundef [[TMP0]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[S]])
+; ABI-NEXT: ret void
+;
+entry:
+ %s = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s)
+ call void @llvm.va_start.p0(ptr nonnull %s)
+ %0 = load ptr, ptr %s, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s)
+ ret void
+}
+
+
+define void @start_twice(...) {
+; OPT-LABEL: @start_twice(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[VA_LIST:%.*]] = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VA_LIST]])
+; OPT-NEXT: call void @llvm.va_start.p0(ptr [[VA_LIST]])
+; OPT-NEXT: tail call void @start_twice.valist(ptr [[VA_LIST]])
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VA_LIST]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @start_twice(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[S0:%.*]] = alloca ptr, align 4
+; ABI-NEXT: [[S1:%.*]] = alloca ptr, align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[S0]])
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[S1]])
+; ABI-NEXT: store ptr [[VARARGS:%.*]], ptr [[S0]], align 4
+; ABI-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S0]], align 4
+; ABI-NEXT: call void @valist(ptr noundef [[TMP0]])
+; ABI-NEXT: store ptr [[VARARGS]], ptr [[S1]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S1]], align 4
+; ABI-NEXT: call void @valist(ptr noundef [[TMP1]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[S1]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[S0]])
+; ABI-NEXT: ret void
+;
+entry:
+ %s0 = alloca ptr, align 4
+ %s1 = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s0)
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.va_start.p0(ptr nonnull %s0)
+ %0 = load ptr, ptr %s0, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s0)
+ call void @llvm.va_start.p0(ptr nonnull %s1)
+ %1 = load ptr, ptr %s1, align 4
+ call void @valist(ptr noundef %1)
+ call void @llvm.va_end.p0(ptr %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s0)
+ ret void
+}
+
+define void @copy(ptr noundef %va) {
+; CHECK-LABEL: @copy(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VA_ADDR:%.*]] = alloca ptr, align 4
+; CHECK-NEXT: [[CP:%.*]] = alloca ptr, align 4
+; CHECK-NEXT: store ptr [[VA:%.*]], ptr [[VA_ADDR]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[CP]])
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr [[CP]], ptr [[VA_ADDR]], i32 4, i1 false)
+; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CP]], align 4
+; CHECK-NEXT: call void @valist(ptr noundef [[TMP0]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[CP]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %va.addr = alloca ptr, align 4
+ %cp = alloca ptr, align 4
+ store ptr %va, ptr %va.addr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp)
+ call void @llvm.va_copy.p0(ptr nonnull %cp, ptr nonnull %va.addr)
+ %0 = load ptr, ptr %cp, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/pass-byval.ll b/llvm/test/Transforms/ExpandVariadics/pass-byval.ll
new file mode 100644
index 0000000000000..b21f83fd75587
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/pass-byval.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI
+
+
+; CHECK: @sink
+declare void @sink(...)
+
+
+define void @pass_byval(ptr byval(i32) %b) {
+; OPT-LABEL: @pass_byval(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(ptr byval(i32) [[B:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_byval(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_BYVAL_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP0]], ptr [[B:%.*]], i64 4, i1 false)
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(ptr byval(i32) %b)
+ ret void
+}
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+define void @i32_libcS(i32 %x, ptr noundef byval(%struct.libcS) align 8 %y) {
+; OPT-LABEL: @i32_libcS(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]], ptr byval([[STRUCT_LIBCS:%.*]]) align 8 [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @i32_libcS(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[I32_LIBCS_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[INDIRECTALLOCA]], ptr [[Y:%.*]], i64 24, i1 false)
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[I32_LIBCS_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[I32_LIBCS_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %x, ptr byval(%struct.libcS) align 8 %y)
+ ret void
+}
+
+define void @libcS_i32(ptr byval(%struct.libcS) align 8 %x, i32 %y) {
+; OPT-LABEL: @libcS_i32(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(ptr byval([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]], i32 [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @libcS_i32(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[LIBCS_I32_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[INDIRECTALLOCA]], ptr [[X:%.*]], i64 24, i1 false)
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[LIBCS_I32_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[LIBCS_I32_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i32 [[Y:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(ptr byval(%struct.libcS) align 8 %x, i32 %y)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/pass-integers.ll b/llvm/test/Transforms/ExpandVariadics/pass-integers.ll
new file mode 100644
index 0000000000000..5b1beee9c327a
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/pass-integers.ll
@@ -0,0 +1,344 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI
+
+; Wasm passes struct {char} as an i8 so can check the varargs passing works on integers smaller than the slot size
+
+declare void @sink(...)
+
+
+define void @pass_nothing() {
+; OPT-LABEL: @pass_nothing(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink()
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_nothing(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_NOTHING_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink()
+ ret void
+}
+
+define void @pass_s1(i8 %x) {
+; OPT-LABEL: @pass_s1(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i8 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s1(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S1_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S1_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i8 [[X:%.*]], ptr [[TMP0]], align 1
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i8 %x)
+ ret void
+}
+
+define void @pass_s2(i16 %x) {
+; OPT-LABEL: @pass_s2(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i16 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s2(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S2_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S2_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i16 [[X:%.*]], ptr [[TMP0]], align 2
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i16 %x)
+ ret void
+}
+
+define void @pass_s3(i32 %x) {
+; OPT-LABEL: @pass_s3(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s3(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S3_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S3_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %x)
+ ret void
+}
+
+define void @pass_s4(i64 %x) {
+; OPT-LABEL: @pass_s4(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i64 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s4(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S4_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S4_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i64 [[X:%.*]], ptr [[TMP0]], align 8
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i64 %x)
+ ret void
+}
+
+define void @pass_s5(<4 x i32> %x) {
+; OPT-LABEL: @pass_s5(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(<4 x i32> [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s5(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S5_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S5_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store <4 x i32> [[X:%.*]], ptr [[TMP0]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(<4 x i32> %x)
+ ret void
+}
+
+define void @pass_int_s1(i32 %i, i8 %x) {
+; OPT-LABEL: @pass_int_s1(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i8 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s1(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S1_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 5, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S1_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S1_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i8 [[X:%.*]], ptr [[TMP1]], align 1
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 5, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i8 %x)
+ ret void
+}
+
+define void @pass_int_s2(i32 %i, i16 %x) {
+; OPT-LABEL: @pass_int_s2(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i16 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s2(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S2_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 6, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S2_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S2_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i16 [[X:%.*]], ptr [[TMP1]], align 2
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 6, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i16 %x)
+ ret void
+}
+
+define void @pass_int_s3(i32 %i, i32 %x) {
+; OPT-LABEL: @pass_int_s3(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i32 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s3(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S3_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S3_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S3_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i32 %x)
+ ret void
+}
+
+define void @pass_int_s4(i32 %i, i64 %x) {
+; OPT-LABEL: @pass_int_s4(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i64 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s4(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S4_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S4_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S4_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store i64 [[X:%.*]], ptr [[TMP1]], align 8
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i64 %x)
+ ret void
+}
+
+define void @pass_int_s5(i32 %i, <4 x i32> %x) {
+; OPT-LABEL: @pass_int_s5(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], <4 x i32> [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s5(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S5_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S5_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S5_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store <4 x i32> [[X:%.*]], ptr [[TMP1]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, <4 x i32> %x)
+ ret void
+}
+
+define void @pass_asc(i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> %x5) {
+; OPT-LABEL: @pass_asc(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i8 [[X1:%.*]], i16 [[X2:%.*]], i32 [[X3:%.*]], i64 [[X4:%.*]], <4 x i32> [[X5:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_asc(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_ASC_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 48, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i8 [[X1:%.*]], ptr [[TMP0]], align 1
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store i16 [[X2:%.*]], ptr [[TMP1]], align 2
+; ABI-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 4
+; ABI-NEXT: store i32 [[X3:%.*]], ptr [[TMP2]], align 4
+; ABI-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 6
+; ABI-NEXT: store i64 [[X4:%.*]], ptr [[TMP3]], align 8
+; ABI-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 8
+; ABI-NEXT: store <4 x i32> [[X5:%.*]], ptr [[TMP4]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 48, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> %x5)
+ ret void
+}
+
+define void @pass_dsc(<4 x i32> %x0, i64 %x1, i32 %x2, i16 %x3, i8 %x4) {
+; OPT-LABEL: @pass_dsc(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(<4 x i32> [[X0:%.*]], i64 [[X1:%.*]], i32 [[X2:%.*]], i16 [[X3:%.*]], i8 [[X4:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_dsc(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_DSC_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 33, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store <4 x i32> [[X0:%.*]], ptr [[TMP0]], align 16
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i64 [[X1:%.*]], ptr [[TMP1]], align 8
+; ABI-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store i32 [[X2:%.*]], ptr [[TMP2]], align 4
+; ABI-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 3
+; ABI-NEXT: store i16 [[X3:%.*]], ptr [[TMP3]], align 2
+; ABI-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 5
+; ABI-NEXT: store i8 [[X4:%.*]], ptr [[TMP4]], align 1
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 33, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(<4 x i32> %x0, i64 %x1, i32 %x2, i16 %x3, i8 %x4)
+ ret void
+}
+
+define void @pass_multiple(i32 %i, i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> %x5) {
+; OPT-LABEL: @pass_multiple(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i16 [[X2:%.*]], i64 [[X4:%.*]])
+; OPT-NEXT: tail call void (...) @sink(i32 [[I]], i8 [[X1:%.*]], i32 [[X3:%.*]], <4 x i32> [[X5:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_multiple(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_MULTIPLE_VARARG:%.*]], align 16
+; ABI-NEXT: [[VARARG_BUFFER1:%.*]] = alloca [[PASS_MULTIPLE_VARARG_0:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i16 [[X2:%.*]], ptr [[TMP1]], align 2
+; ABI-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 3
+; ABI-NEXT: store i64 [[X4:%.*]], ptr [[TMP2]], align 8
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[VARARG_BUFFER1]])
+; ABI-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I]], ptr [[TMP3]], align 4
+; ABI-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 1
+; ABI-NEXT: store i8 [[X1:%.*]], ptr [[TMP4]], align 1
+; ABI-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 3
+; ABI-NEXT: store i32 [[X3:%.*]], ptr [[TMP5]], align 4
+; ABI-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 5
+; ABI-NEXT: store <4 x i32> [[X5:%.*]], ptr [[TMP6]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER1]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[VARARG_BUFFER1]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i16 %x2, i64 %x4)
+ tail call void (...) @sink(i32 %i, i8 %x1, i32 %x3, <4 x i32> %x5)
+ ret void
+}
diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
index 0d134c7bdffb7..bcf2ea7510568 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
@@ -33,6 +33,7 @@ static_library("IPO") {
"DeadArgumentElimination.cpp",
"ElimAvailExtern.cpp",
"EmbedBitcodePass.cpp",
+ "ExpandVariadics.cpp",
"ExtractGV.cpp",
"ForceFunctionAttrs.cpp",
"FunctionAttrs.cpp",
More information about the llvm-commits
mailing list