[clang] [llvm] [IPO] Optimise variadic functions (PR #92850)
Jon Chesterfield via llvm-commits
llvm-commits at lists.llvm.org
Mon May 20 20:42:45 PDT 2024
https://github.com/JonChesterfield created https://github.com/llvm/llvm-project/pull/92850
Replace variadic functions with equivalent functions taking a va_list. This composes with optimisations like inlining to give zero cost variadics. Scheduled before the inliner at O1 and callable from a backend to provide a lowering implementation on IR.
Implementation is complete for webassembly. The frame constructed is the same as that done by the backend, a later patch will propose replacing the bespoke lowering there with a call to this pass. Most of the target-independent tests for this pass use wasm as a convenient triple.
Implemented for simple types on x64 and aarch64 to show that more complicated va_list constructs work as expected. Complex types left to a later patch to help keep the test quantity reasonable here. Likewise exceptions are left for a later patch.
Implementing nvptx and amdgpu in later patches as they're straightforward given this pass but may need discussion over what the proper calling convention should be. Both are expected to look similar to wasm. That would be consistent with ptx.
Implementing for targets that have va_arg selected in place for is mechanical - construct whatever frame layout is the complement of va_arg, set up a va_list iterator, add the tests.
The hope is that this is sufficiently useful to land as is. It'll remove any known calls for webassembly and the majority of known calls likely to exist in practice on x64 and aarch64. Having the pass in tree would be very helpful for reviews of amdgpu and nvptx variadic lowering, letting those focus on the what as opposed to the how.
>From 15061bfbc2dc06de5bac32628389386cadaa5632 Mon Sep 17 00:00:00 2001
From: Jon Chesterfield <jonathanchesterfield at gmail.com>
Date: Tue, 21 May 2024 04:22:45 +0100
Subject: [PATCH] [IPO] Optimise variadic functions
---
clang/test/CodeGen/aarch64-ABI-align-packed.c | 3 +-
clang/test/CodeGen/voidptr-vaarg.c | 478 +++++++
.../CodeGenCXX/inline-then-fold-variadics.cpp | 128 ++
llvm/include/llvm/InitializePasses.h | 1 +
.../llvm/Transforms/IPO/ExpandVariadics.h | 43 +
llvm/lib/Passes/PassBuilder.cpp | 1 +
llvm/lib/Passes/PassBuilderPipelines.cpp | 4 +
llvm/lib/Passes/PassRegistry.def | 1 +
llvm/lib/Transforms/IPO/CMakeLists.txt | 1 +
llvm/lib/Transforms/IPO/ExpandVariadics.cpp | 1250 +++++++++++++++++
.../expand-variadic-call-apcs64-linux.ll | 289 ++++
.../WebAssembly/expand-variadic-call.ll | 483 +++++++
llvm/test/CodeGen/WebAssembly/vararg-frame.ll | 525 +++++++
.../X86/expand-variadic-call-x64-linux.ll | 244 ++++
llvm/test/Other/new-pm-defaults.ll | 1 +
.../Other/new-pm-thinlto-postlink-defaults.ll | 1 +
.../new-pm-thinlto-postlink-pgo-defaults.ll | 1 +
...-pm-thinlto-postlink-samplepgo-defaults.ll | 2 +-
.../Other/new-pm-thinlto-prelink-defaults.ll | 1 +
.../new-pm-thinlto-prelink-pgo-defaults.ll | 1 +
...w-pm-thinlto-prelink-samplepgo-defaults.ll | 1 +
.../expand-va-intrinsic-split-linkage.ll | 239 ++++
.../expand-va-intrinsic-split-simple.ll | 212 +++
.../ExpandVariadics/indirect-calls.ll | 58 +
.../Transforms/ExpandVariadics/intrinsics.ll | 117 ++
.../Transforms/ExpandVariadics/pass-byval.ll | 81 ++
.../ExpandVariadics/pass-integers.ll | 344 +++++
.../llvm/lib/Transforms/IPO/BUILD.gn | 1 +
28 files changed, 4509 insertions(+), 2 deletions(-)
create mode 100644 clang/test/CodeGen/voidptr-vaarg.c
create mode 100644 clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
create mode 100644 llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
create mode 100644 llvm/lib/Transforms/IPO/ExpandVariadics.cpp
create mode 100644 llvm/test/CodeGen/AArch64/expand-variadic-call-apcs64-linux.ll
create mode 100644 llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll
create mode 100644 llvm/test/CodeGen/WebAssembly/vararg-frame.ll
create mode 100644 llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll
create mode 100644 llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll
create mode 100644 llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll
create mode 100644 llvm/test/Transforms/ExpandVariadics/indirect-calls.ll
create mode 100644 llvm/test/Transforms/ExpandVariadics/intrinsics.ll
create mode 100644 llvm/test/Transforms/ExpandVariadics/pass-byval.ll
create mode 100644 llvm/test/Transforms/ExpandVariadics/pass-integers.ll
diff --git a/clang/test/CodeGen/aarch64-ABI-align-packed.c b/clang/test/CodeGen/aarch64-ABI-align-packed.c
index 0349ebc8cc639..9ce051369f390 100644
--- a/clang/test/CodeGen/aarch64-ABI-align-packed.c
+++ b/clang/test/CodeGen/aarch64-ABI-align-packed.c
@@ -1,5 +1,6 @@
// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -emit-llvm -O2 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -emit-llvm -O2 -o - %s -mllvm -expand-variadics-override=disable | FileCheck %s
+
#include <stdarg.h>
#include <arm_neon.h>
diff --git a/clang/test/CodeGen/voidptr-vaarg.c b/clang/test/CodeGen/voidptr-vaarg.c
new file mode 100644
index 0000000000000..d023ddf0fb5d2
--- /dev/null
+++ b/clang/test/CodeGen/voidptr-vaarg.c
@@ -0,0 +1,478 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: webassembly-registered-target
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+// Multiple targets use emitVoidPtrVAArg to lower va_arg instructions in clang
+// PPC is complicated, excluding from this case analysis
+// ForceRightAdjust is false for all non-PPC targets
+// AllowHigherAlign is only false for two Microsoft targets, both of which
+// pass most things by reference.
+//
+// Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
+// QualType ValueTy, bool IsIndirect,
+// TypeInfoChars ValueInfo, CharUnits SlotSizeAndAlign,
+// bool AllowHigherAlign, bool ForceRightAdjust =
+// false);
+//
+// Target IsIndirect SlotSize AllowHigher ForceRightAdjust
+// ARC false four true false
+// ARM varies four true false
+// Mips false 4 or 8 true false
+// RISCV varies register true false
+// PPC elided
+// LoongArch varies register true false
+// NVPTX WIP
+// AMDGPU WIP
+// X86_32 false four true false
+// X86_64 MS varies eight false false
+// CSKY false four true false
+// Webassembly varies four true false
+// AArch64 false eight true false
+// AArch64 MS false eight false false
+//
+// Webassembly passes indirectly iff it's an aggregate of multiple values
+// Choosing this as a representative architecture to check IR generation
+// partly because it has a relatively simple variadic calling convention.
+
+// Int, by itself and packed in structs
+// CHECK-LABEL: @raw_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+int raw_int(__builtin_va_list list) { return __builtin_va_arg(list, int); }
+
+typedef struct {
+ int x;
+} one_int_t;
+
+// CHECK-LABEL: @one_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_INT_T:%.*]], align 4
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_INT_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[COERCE_DIVE]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+one_int_t one_int(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_int_t);
+}
+
+typedef struct {
+ int x;
+ int y;
+} two_int_t;
+
+// CHECK-LABEL: @two_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_RESULT:%.*]], ptr align 4 [[TMP0]], i32 8, i1 false)
+// CHECK-NEXT: ret void
+//
+two_int_t two_int(__builtin_va_list list) {
+ return __builtin_va_arg(list, two_int_t);
+}
+
+// Double, by itself and packed in structs
+// CHECK-LABEL: @raw_double(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
+// CHECK-NEXT: ret double [[TMP1]]
+//
+double raw_double(__builtin_va_list list) {
+ return __builtin_va_arg(list, double);
+}
+
+typedef struct {
+ double x;
+} one_double_t;
+
+// CHECK-LABEL: @one_double(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_DOUBLE_T:%.*]], align 8
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[RETVAL]], ptr align 8 [[ARGP_CUR_ALIGNED]], i32 8, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_DOUBLE_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT: ret double [[TMP1]]
+//
+one_double_t one_double(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_double_t);
+}
+
+typedef struct {
+ double x;
+ double y;
+} two_double_t;
+
+// CHECK-LABEL: @two_double(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[AGG_RESULT:%.*]], ptr align 8 [[TMP0]], i32 16, i1 false)
+// CHECK-NEXT: ret void
+//
+two_double_t two_double(__builtin_va_list list) {
+ return __builtin_va_arg(list, two_double_t);
+}
+
+// Scalar smaller than the slot size (C would promote a short to int)
+typedef struct {
+ char x;
+} one_char_t;
+
+// CHECK-LABEL: @one_char(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_CHAR_T:%.*]], align 1
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 1, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_CHAR_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+one_char_t one_char(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_char_t);
+}
+
+typedef struct {
+ short x;
+} one_short_t;
+
+// CHECK-LABEL: @one_short(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_SHORT_T:%.*]], align 2
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 2, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_SHORT_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[COERCE_DIVE]], align 2
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+one_short_t one_short(__builtin_va_list list) {
+ return __builtin_va_arg(list, one_short_t);
+}
+
+// Composite smaller than the slot size
+typedef struct {
+ _Alignas(2) char x;
+ char y;
+} char_pair_t;
+
+// CHECK-LABEL: @char_pair(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[AGG_RESULT:%.*]], ptr align 2 [[TMP0]], i32 2, i1 false)
+// CHECK-NEXT: ret void
+//
+char_pair_t char_pair(__builtin_va_list list) {
+ return __builtin_va_arg(list, char_pair_t);
+}
+
+// Empty struct
+typedef struct {
+} empty_t;
+
+// CHECK-LABEL: @empty(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY_T:%.*]], align 1
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 0
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 0, i1 false)
+// CHECK-NEXT: ret void
+//
+empty_t empty(__builtin_va_list list) {
+ return __builtin_va_arg(list, empty_t);
+}
+
+typedef struct {
+ empty_t x;
+ int y;
+} empty_int_t;
+
+// CHECK-LABEL: @empty_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY_INT_T:%.*]], align 4
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false)
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+empty_int_t empty_int(__builtin_va_list list) {
+ return __builtin_va_arg(list, empty_int_t);
+}
+
+typedef struct {
+ int x;
+ empty_t y;
+} int_empty_t;
+
+// CHECK-LABEL: @int_empty(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT_EMPTY_T:%.*]], align 4
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT_EMPTY_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[COERCE_DIVE]], align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+int_empty_t int_empty(__builtin_va_list list) {
+ return __builtin_va_arg(list, int_empty_t);
+}
+
+// Need multiple va_arg instructions to check the postincrement
+// Using types that are passed directly as the indirect handling
+// is independent of the alignment handling in emitVoidPtrDirectVAArg.
+
+// CHECK-LABEL: @multiple_int(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT0_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT0:%.*]], ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT2:%.*]], ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP1]], align 4
+// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGP_CUR3]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4
+// CHECK-NEXT: ret void
+//
+void multiple_int(__builtin_va_list list, int *out0, int *out1, int *out2) {
+ *out0 = __builtin_va_arg(list, int);
+ *out1 = __builtin_va_arg(list, int);
+ *out2 = __builtin_va_arg(list, int);
+}
+
+// Scalars in structs are an easy way of specifying alignment from C
+// CHECK-LABEL: @increasing_alignment(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT0_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT3_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT0:%.*]], ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT2:%.*]], ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT3:%.*]], ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP0]], ptr align 4 [[ARGP_CUR]], i32 1, i1 false)
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[TMP1]], ptr align 4 [[ARGP_CUR1]], i32 2, i1 false)
+// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR3]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 7
+// CHECK-NEXT: [[ARGP_CUR5_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP4]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT6]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARGP_CUR5_ALIGNED]], align 8
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: store double [[TMP5]], ptr [[TMP6]], align 8
+// CHECK-NEXT: ret void
+//
+void increasing_alignment(__builtin_va_list list, one_char_t *out0,
+ one_short_t *out1, int *out2, double *out3) {
+ *out0 = __builtin_va_arg(list, one_char_t);
+ *out1 = __builtin_va_arg(list, one_short_t);
+ *out2 = __builtin_va_arg(list, int);
+ *out3 = __builtin_va_arg(list, double);
+}
+
+// CHECK-LABEL: @decreasing_alignment(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT0_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT3_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT0:%.*]], ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT2:%.*]], ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT3:%.*]], ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT0_ADDR]], align 4
+// CHECK-NEXT: store double [[TMP1]], ptr [[TMP2]], align 8
+// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[OUT1_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUT2_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[TMP5]], ptr align 4 [[ARGP_CUR3]], i32 2, i1 false)
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUT3_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT6]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP6]], ptr align 4 [[ARGP_CUR5]], i32 1, i1 false)
+// CHECK-NEXT: ret void
+//
+void decreasing_alignment(__builtin_va_list list, double *out0, int *out1,
+ one_short_t *out2, one_char_t *out3) {
+ *out0 = __builtin_va_arg(list, double);
+ *out1 = __builtin_va_arg(list, int);
+ *out2 = __builtin_va_arg(list, one_short_t);
+ *out3 = __builtin_va_arg(list, one_char_t);
+}
+
+// Typical edge cases, none hit special handling in VAArg lowering.
+typedef struct {
+ int x[16];
+ double y[8];
+} large_value_t;
+
+// CHECK-LABEL: @large_value(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[TMP0]], ptr align 8 [[TMP1]], i32 128, i1 false)
+// CHECK-NEXT: ret void
+//
+void large_value(__builtin_va_list list, large_value_t *out) {
+ *out = __builtin_va_arg(list, large_value_t);
+}
+
+typedef int v128_t __attribute__((__vector_size__(16), __aligned__(16)));
+// CHECK-LABEL: @vector(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 15
+// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -16)
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 16
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARGP_CUR_ALIGNED]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 16
+// CHECK-NEXT: ret void
+//
+void vector(__builtin_va_list list, v128_t *out) {
+ *out = __builtin_va_arg(list, v128_t);
+}
+
+typedef struct BF {
+ float not_an_i32[2];
+ int A : 1;
+ char B;
+ int C : 13;
+} BF;
+
+// CHECK-LABEL: @bitfield(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
+// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[TMP1]], i32 12, i1 false)
+// CHECK-NEXT: ret void
+//
+void bitfield(__builtin_va_list list, BF *out) {
+ *out = __builtin_va_arg(list, BF);
+}
diff --git a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
new file mode 100644
index 0000000000000..cdc850a5b9d06
--- /dev/null
+++ b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
@@ -0,0 +1,128 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
+
+// Simple calls to known variadic functions that are completely elided when optimisations are on
+// This is a functional check that the expand-variadic pass is consistent with clang's va_arg handling
+
+// -Wno-varargs avoids warning second argument to 'va_start' is not the last named parameter
+
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -Wno-varargs -O1 -emit-llvm -o - %s | FileCheck %s
+
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -Wno-varargs -O1 -emit-llvm -o - %s | FileCheck %s
+
+// x64 needs O2 to remove the extra SROA layer
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -Wno-varargs -O2 -emit-llvm -o - %s | FileCheck %s
+
+
+#include <stdarg.h>
+#include <stdint.h>
+
+template <typename X, typename Y>
+static X first(...) {
+ va_list va;
+ __builtin_va_start(va, 0);
+ X r = va_arg(va, X);
+ va_end(va);
+ return r;
+}
+
+template <typename X, typename Y>
+static Y second(...) {
+ va_list va;
+ __builtin_va_start(va, 0);
+ va_arg(va, X);
+ Y r = va_arg(va, Y);
+ va_end(va);
+ return r;
+}
+
+
+extern "C"
+{
+
+// CHECK-LABEL: define {{[^@]+}}@first_pair_i32
+// CHECK-SAME: (i32 noundef returned [[X:%.*]], i32 noundef [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret i32 [[X]]
+//
+int first_pair_i32(int x, int y)
+{
+ return first<int,int>(x, y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@second_pair_i32
+// CHECK-SAME: (i32 noundef [[X:%.*]], i32 noundef returned [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret i32 [[Y]]
+//
+int second_pair_i32(int x, int y)
+{
+ return second<int,int>(x, y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@first_pair_f64
+// CHECK-SAME: (double noundef returned [[X:%.*]], double noundef [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret double [[X]]
+//
+double first_pair_f64(double x, double y)
+{
+ return first<double,double>(x, y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@second_pair_f64
+// CHECK-SAME: (double noundef [[X:%.*]], double noundef returned [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret double [[Y]]
+//
+double second_pair_f64(double x, double y)
+{
+ return second<double,double>(x, y);
+}
+
+}
+
+
+
+extern "C"
+{
+// CHECK-LABEL: define {{[^@]+}}@first_i32_f64
+// CHECK-SAME: (i32 noundef returned [[X:%.*]], double noundef [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret i32 [[X]]
+//
+int first_i32_f64(int x, double y)
+{
+ return first<int,double>(x, y);
+}
+
+
+// CHECK-LABEL: define {{[^@]+}}@second_i32_f64
+// CHECK-SAME: (i32 noundef [[X:%.*]], double noundef returned [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret double [[Y]]
+//
+double second_i32_f64(int x, double y)
+{
+ return second<int,double>(x, y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@first_f64_i32
+// CHECK-SAME: (double noundef returned [[X:%.*]], i32 noundef [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret double [[X]]
+//
+double first_f64_i32(double x, int y)
+{
+ return first<double,int>(x, y);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@second_f64_i32
+// CHECK-SAME: (double noundef [[X:%.*]], i32 noundef returned [[Y:%.*]])
+// CHECK-LABEL:{{.}}:
+// CHECK-NEXT: ret i32 [[Y]]
+//
+int second_f64_i32(double x, int y)
+{
+ return second<double,int>(x, y);
+}
+}
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 9ba75d491c1c9..5da681781da97 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -106,6 +106,7 @@ void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
void initializeExpandMemCmpLegacyPassPass(PassRegistry &);
void initializeExpandPostRAPass(PassRegistry&);
void initializeExpandReductionsPass(PassRegistry&);
+void initializeExpandVariadicsPass(PassRegistry &);
void initializeExpandVectorPredicationPass(PassRegistry &);
void initializeExternalAAWrapperPassPass(PassRegistry&);
void initializeFEntryInserterPass(PassRegistry&);
diff --git a/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
new file mode 100644
index 0000000000000..02f3aa45ef1b4
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
@@ -0,0 +1,43 @@
+//===- ExpandVariadics.h - expand variadic functions ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
+#define LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Module;
+class ModulePass;
+class OptimizationLevel;
+
+enum class ExpandVariadicsMode {
+ Unspecified, // Use the implementation defaults
+ Disable, // Disable the pass entirely
+ Optimize, // Optimise without changing ABI
+ Lowering, // Change variadic calling convention
+};
+
+class ExpandVariadicsPass : public PassInfoMixin<ExpandVariadicsPass> {
+ const ExpandVariadicsMode Mode;
+
+public:
+ // Operates under passed mode unless overridden on commandline
+ ExpandVariadicsPass(ExpandVariadicsMode Mode);
+
+ // Chooses disable or optimize based on optimization level
+ ExpandVariadicsPass(OptimizationLevel Level);
+
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+ModulePass *createExpandVariadicsPass(ExpandVariadicsMode);
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 734ca4d5deec9..b1448cc17cf39 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -137,6 +137,7 @@
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 1892e16a06528..f12300f2b2c9b 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -48,6 +48,7 @@
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
@@ -1195,6 +1196,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
if (EnablePGOForceFunctionAttrs && PGOOpt)
MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
+ // ExpandVariadics interacts well with the function inliner.
+ MPM.addPass(ExpandVariadicsPass(Level));
+
MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
if (EnableModuleInliner)
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 50682ca4970f1..e31fd685abcfe 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -59,6 +59,7 @@ MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass())
MODULE_PASS("dxil-upgrade", DXILUpgradePass())
MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))
+MODULE_PASS("expand-variadics", ExpandVariadicsPass(OptimizationLevel::O0))
MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
MODULE_PASS("function-import", FunctionImportPass())
MODULE_PASS("globalopt", GlobalOptPass())
diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
index 5fbdbc3a014f9..92a9697720efd 100644
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -12,6 +12,7 @@ add_llvm_component_library(LLVMipo
DeadArgumentElimination.cpp
ElimAvailExtern.cpp
EmbedBitcodePass.cpp
+ ExpandVariadics.cpp
ExtractGV.cpp
ForceFunctionAttrs.cpp
FunctionAttrs.cpp
diff --git a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
new file mode 100644
index 0000000000000..c04e16cb7bad7
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
@@ -0,0 +1,1250 @@
+//===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an optimization pass for variadic functions. If called from codegen,
+// it can serve as the implementation of variadic functions for a given target.
+//
+// The strategy is to turn the ... part of a variadic function into a va_list
+// and fix up the call sites. The majority of the pass is target independent.
+// The exceptions are the va_list type itself and the rules for where to store
+// variables in memory such that va_arg can iterate over them given a va_list.
+//
+// The majority of the plumbing is splitting the variadic function into a
+// single basic block that packs the variadic arguments into a va_list and
+// a second function that does the work of the original. That packing is
+// exactly what is done by va_start. Further, the transform from ... to va_list
+// replaced va_start with an operation to copy a va_list from the new argument,
+// which is exactly a va_copy. This is useful for reducing target-dependence.
+//
+// A va_list instance is a forward iterator, where the primary operation va_arg
+// is dereference-then-increment. This interface forces significant convergent
+// evolution between target specific implementations. The variation in runtime
+// data layout is limited to that representable by the iterator, parameterised
+// by the type passed to the va_arg instruction.
+//
+// Therefore the majority of the target specific subtlety is packing arguments
+// into a stack allocated buffer such that a va_list can be initialised with it
+// and the va_arg expansion for the target will find the arguments at runtime.
+//
+// The aggregate effect is to unblock other transforms, most critically the
+// general purpose inliner. Known calls to variadic functions become zero cost.
+//
+// Consistency with clang is primarily tested by emitting va_arg using clang
+// then expanding the variadic functions using this pass, followed by trying
+// to constant fold the functions to no-ops.
+//
+// Target specific behaviour is tested in IR - mainly checking that values are
+// put into positions in call frames that make sense for that particular target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Passes/OptimizationLevel.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+#include <cstdio>
+
+#define DEBUG_TYPE "expand-variadics"
+
+using namespace llvm;
+
+cl::opt<ExpandVariadicsMode> ExpandVariadicsModeOption(
+ DEBUG_TYPE "-override", cl::desc("Override the behaviour of " DEBUG_TYPE),
+ cl::init(ExpandVariadicsMode::Unspecified),
+ cl::values(clEnumValN(ExpandVariadicsMode::Unspecified, "unspecified",
+ "Use the implementation defaults"),
+ clEnumValN(ExpandVariadicsMode::Disable, "disable",
+ "Disable the pass entirely"),
+ clEnumValN(ExpandVariadicsMode::Optimize, "optimize",
+ "Optimise without changing ABI"),
+ clEnumValN(ExpandVariadicsMode::Lowering, "lowering",
+ "Change variadic calling convention")));
+
+namespace {
+
+// Instances of this class encapsulate the target-dependant behaviour as a
+// function of triple. Implementing a new ABI is adding a case to the switch
+// in create(llvm::Triple) at the end of this file.
+class VariadicABIInfo {
+protected:
+ VariadicABIInfo() {}
+
+public:
+ static std::unique_ptr<VariadicABIInfo> create(llvm::Triple const &Triple);
+
+ // Whether a valist instance is passed by value or by address
+ // I.e. does it need to be alloca'ed and stored into, or can
+ // it be passed directly in a SSA register
+ virtual bool vaListPassedInSSARegister() = 0;
+
+ // The type of a va_list iterator object
+ virtual Type *vaListType(LLVMContext &Ctx) = 0;
+
+ // The type of a va_list as a function argument as lowered by C
+ virtual Type *vaListParameterType(Module &M) = 0;
+
+ // Initialize an allocated va_list object to point to an already
+ // initialized contiguous memory region.
+ // Return the value to pass as the va_list argument
+ virtual Value *initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder,
+ AllocaInst *, Value * /*buffer*/) = 0;
+
+ struct VAArgSlotInfo
+ {
+ Align Align; // With respect to the call frame
+ bool Indirect; // Passed via a pointer
+ bool Unknown; // Cannot analyse this type, cannot transform the call
+ };
+ virtual VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) = 0;
+
+ // Targets implemented so far all have the same trivial lowering for these
+ bool vaEndIsNop() { return true; }
+ bool vaCopyIsMemcpy() { return true; }
+
+ virtual ~VariadicABIInfo() {}
+};
+
+// Module implements getFunction() which returns nullptr on missing declaration
+// and getOrInsertFunction which creates one when absent. Intrinsics.h only
+// implements getDeclaration which creates one when missing. Checking whether
+// an intrinsic exists thus inserts it in the module and it then needs to be
+// deleted again to clean up.
+// The right name for the two functions on intrinsics would match Module::,
+// but doing that in a single change would introduce nullptr dereferences
+// where currently there are none. The minimal collateral damage approach
+// would split the change over a release to help downstream branches. As it
+// is unclear what approach will be preferred, implementing the trivial
+// function here in the meantime to decouple from that discussion.
+Function *getPreexistingDeclaration(Module *M, Intrinsic::ID id,
+ ArrayRef<Type *> Tys = std::nullopt) {
+ auto *FT = Intrinsic::getType(M->getContext(), id, Tys);
+ return M->getFunction(Tys.empty() ? Intrinsic::getName(id)
+ : Intrinsic::getName(id, Tys, M, FT));
+}
+
+class ExpandVariadics : public ModulePass {
+
+ // The pass construction sets the default to optimize when called from middle
+ // end and lowering when called from the backend. The command line variable
+ // overrides that. This is useful for testing and debugging. It also allows
+ // building an applications with variadic functions wholly removed if one
+ // has sufficient control over the dependencies, e.g. a statically linked
+ // clang that has no variadic function calls remaining in the binary.
+ static ExpandVariadicsMode
+ withCommandLineOverride(ExpandVariadicsMode LLVMRequested) {
+ ExpandVariadicsMode UserRequested = ExpandVariadicsModeOption;
+ return (UserRequested == ExpandVariadicsMode::Unspecified) ? LLVMRequested
+ : UserRequested;
+ }
+
+public:
+ static char ID;
+ const ExpandVariadicsMode Mode;
+ std::unique_ptr<VariadicABIInfo> ABI;
+
+ ExpandVariadics(ExpandVariadicsMode Mode)
+ : ModulePass(ID), Mode(withCommandLineOverride(Mode)) {}
+ StringRef getPassName() const override { return "Expand variadic functions"; }
+
+ // Rewrite a variadic call site
+ bool expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB, FunctionType *,
+ Function *NF);
+
+ Function *replaceAllUsesWithNewDeclaration(Module &M,
+ Function *OriginalFunction);
+ Function *deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
+ Function *OriginalFunction);
+ Function *defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
+ Function *VariadicWrapper,
+ Function *FixedArityReplacement);
+
+ bool runOnModule(Module &M) override;
+ bool runOnFunction(Module &M, IRBuilder<> &Builder, Function *F);
+
+
+ bool rewriteABI() { return Mode == ExpandVariadicsMode::Lowering; }
+
+ void memcpyVAListPointers(const DataLayout &DL, IRBuilder<> &Builder,
+ Value *Dst, Value *Src) {
+ auto &Ctx = Builder.getContext();
+ Type *VaListTy = ABI->vaListType(Ctx);
+ uint64_t Size = DL.getTypeAllocSize(VaListTy).getFixedValue();
+ Builder.CreateMemCpyInline(Dst, {}, Src, {},
+ ConstantInt::get(Type::getInt32Ty(Ctx), Size));
+ }
+
+
+ template <Intrinsic::ID ID, typename InstructionType>
+ bool expandIntrinsicUsers(Module &M, IRBuilder<> &Builder,
+ PointerType *ArgType) {
+ bool Changed = false;
+ const DataLayout &DL = M.getDataLayout();
+ if (Function *Intrinsic = getPreexistingDeclaration(&M, ID, {ArgType})) {
+ for (User *U : llvm::make_early_inc_range(Intrinsic->users())) {
+ if (auto *I = dyn_cast<InstructionType>(U)) {
+ Changed |= expandVAIntrinsicCall(Builder, DL, I);
+ }
+ }
+ if (Intrinsic->use_empty())
+ Intrinsic->eraseFromParent();
+ }
+ return Changed;
+ }
+
+ bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
+ VAStartInst *Inst);
+
+ bool expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
+ VAEndInst *Inst);
+
+ bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
+ VACopyInst *Inst);
+
+
+ FunctionType *inlinableVariadicFunctionType(Module &M, FunctionType *FTy) {
+ // The type of "FTy" with the ... removed and a va_list appended
+ SmallVector<Type *> ArgTypes(FTy->param_begin(), FTy->param_end());
+ ArgTypes.push_back(ABI->vaListParameterType(M));
+ bool IsVarArgs = false;
+ return FunctionType::get(FTy->getReturnType(), ArgTypes, IsVarArgs);
+ }
+
+ static ConstantInt *sizeOfAlloca(LLVMContext &Ctx, const DataLayout &DL,
+ AllocaInst *Alloced) {
+ Type *AllocaType = Alloced->getAllocatedType();
+ TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
+ uint64_t AsInt = AllocaTypeSize.getFixedValue();
+ return ConstantInt::get(Type::getInt64Ty(Ctx), AsInt);
+ }
+
+ bool expansionApplicableToFunction(Module &M, Function *F) {
+ if (F->isIntrinsic() || !F->isVarArg() ||
+ F->hasFnAttribute(Attribute::Naked)) {
+ return false;
+ }
+
+ if (F->getCallingConv() != CallingConv::C)
+ return false;
+
+ if (!rewriteABI()) {
+ // e.g. can't replace a weak function unless changing the original symbol
+ if (GlobalValue::isInterposableLinkage(F->getLinkage())) {
+ return false;
+ }
+ }
+
+ if (!rewriteABI()) {
+ // If optimising, err on the side of leaving things alone
+ for (const Use &U : F->uses()) {
+ const auto *CB = dyn_cast<CallBase>(U.getUser());
+
+ if (!CB)
+ return false;
+
+ if (CB->isMustTailCall())
+ return false;
+
+ if (!CB->isCallee(&U) ||
+ CB->getFunctionType() != F->getFunctionType()) {
+ return false;
+ }
+ }
+ }
+
+ // Branch funnels look like variadic functions but aren't:
+ //
+ // define hidden void @__typeid_typeid1_0_branch_funnel(ptr nest %0, ...) {
+ // musttail call void (...) @llvm.icall.branch.funnel(ptr %0, ptr @vt1_1,
+ // ptr @vf1_1, ...) ret void
+ // }
+ //
+ // %1 = call i32 @__typeid_typeid1_0_branch_funnel(ptr nest %vtable, ptr
+ // %obj, i32 1)
+ //
+ // If this function contains a branch funnel intrinsic, don't transform it.
+
+ if (Function *Funnel =
+ getPreexistingDeclaration(&M, Intrinsic::icall_branch_funnel)) {
+ for (const User *U : Funnel->users()) {
+ if (auto *I = dyn_cast<CallBase>(U)) {
+ if (F == I->getFunction()) {
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+ }
+
+ bool callinstRewritable(CallBase *CB) {
+ if (CallInst *CI = dyn_cast<CallInst>(CB)) {
+ if (CI->isMustTailCall()) {
+ // Cannot expand musttail calls
+ return false;
+ }
+
+ return true;
+ }
+
+ if (isa<InvokeInst>(CB)) {
+ // Invoke not implemented in initial implementation of pass
+ return false;
+ }
+
+ // Other unimplemented derivative of CallBase
+ return false;
+ }
+
+ class ExpandedCallFrame {
+ // Helper for constructing an alloca instance containing the arguments bound
+ // to the variadic ... parameter, rearranged to allow indexing through a
+ // va_list iterator
+ enum { N = 4 };
+ SmallVector<Type *, N> FieldTypes;
+ enum Tag { Store, Memcpy, Padding };
+ SmallVector<std::tuple<Value *, uint64_t, Tag>, N> Source;
+
+ template <Tag tag> void append(Type *FieldType, Value *V, uint64_t Bytes) {
+ FieldTypes.push_back(FieldType);
+ Source.push_back({V, Bytes, tag});
+ }
+
+ public:
+ void store(LLVMContext &Ctx, Type *T, Value *V) { append<Store>(T, V, 0); }
+
+ void memcpy(LLVMContext &Ctx, Type *T, Value *V, uint64_t Bytes) {
+ append<Memcpy>(T, V, Bytes);
+ }
+
+ void padding(LLVMContext &Ctx, uint64_t By) {
+ append<Padding>(ArrayType::get(Type::getInt8Ty(Ctx), By), nullptr, 0);
+ }
+
+ size_t size() const { return FieldTypes.size(); }
+ bool empty() const { return FieldTypes.empty(); }
+
+ StructType *asStruct(LLVMContext &Ctx, StringRef Name) {
+ const bool IsPacked = true;
+ return StructType::create(Ctx, FieldTypes,
+ (Twine(Name) + ".vararg").str(), IsPacked);
+ }
+
+ void initializeStructAlloca(const DataLayout &DL, IRBuilder<> &Builder,
+ AllocaInst *Alloced) {
+
+ StructType *VarargsTy = cast<StructType>(Alloced->getAllocatedType());
+
+ for (size_t I = 0; I < size(); I++) {
+
+ auto [V, bytes, tag] = Source[I];
+
+ if (tag == Padding) {
+ assert(V == nullptr);
+ continue;
+ }
+
+ auto Dst = Builder.CreateStructGEP(VarargsTy, Alloced, I);
+
+ assert(V != nullptr);
+
+ if (tag == Store) {
+ Builder.CreateStore(V, Dst);
+ }
+
+ if (tag == Memcpy) {
+ Builder.CreateMemCpy(Dst, {}, V, {}, bytes);
+ }
+ }
+ }
+ };
+};
+
+bool ExpandVariadics::runOnModule(Module &M) {
+ bool Changed = false;
+
+ if (Mode == ExpandVariadicsMode::Disable)
+ return Changed;
+
+ llvm::Triple Triple(M.getTargetTriple());
+
+ if (Triple.getArch() == Triple::UnknownArch) {
+ // If we don't know the triple, we can't lower varargs
+ return false;
+ }
+
+ ABI = VariadicABIInfo::create(Triple);
+ if (!ABI) {
+ if (Mode == ExpandVariadicsMode::Lowering) {
+ report_fatal_error(
+ "Requested variadic lowering is unimplemented on this target");
+ }
+ return Changed;
+ }
+
+ auto &Ctx = M.getContext();
+ IRBuilder<> Builder(Ctx);
+
+ // At pass input, va_start intrinsics only occur in variadic functions, as
+ // checked by the IR verifier.
+
+ // The lowering pass needs to run on all variadic functions.
+ // The optimise could run on only those that call va_start
+ // in exchange for additional book keeping to avoid transforming
+ // the same function multiple times when it contains multiple va_start.
+ // Leaving that compile time optimisation for a later patch.
+
+ for (Function &F : llvm::make_early_inc_range(M))
+ Changed |= runOnFunction(M, Builder, &F);
+
+ // After runOnFunction, all known calls to known variadic functions have been
+ // replaced. va_start intrinsics are presently (and invalidly!) only present
+ // in functions that used to be variadic and have now been replaced to take a
+ // va_list instead. If lowering as opposed to optimising, calls to unknown
+ // variadic functions have also been replaced.
+
+ unsigned Addrspace = 0; // Sufficient for current targets
+ {
+ PointerType *ArgType = PointerType::get(Ctx, Addrspace);
+ // expand vastart before vacopy as vastart may introduce a vacopy
+ Changed |= expandIntrinsicUsers<Intrinsic::vastart, VAStartInst>(M, Builder,
+ ArgType);
+ Changed |=
+ expandIntrinsicUsers<Intrinsic::vaend, VAEndInst>(M, Builder, ArgType);
+ Changed |= expandIntrinsicUsers<Intrinsic::vacopy, VACopyInst>(M, Builder,
+ ArgType);
+ }
+
+ if (Mode != ExpandVariadicsMode::Lowering) {
+ return Changed; // Done
+ }
+
+ for (Function &F : llvm::make_early_inc_range(M)) {
+ if (F.isDeclaration())
+ continue;
+
+ // Now need to track down indirect calls. Can't find those
+ // by walking uses of variadic functions, need to crawl the instruction
+ // stream. Fortunately this is only necessary for the ABI rewrite case.
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : llvm::make_early_inc_range(BB)) {
+ if (CallBase *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->isIndirectCall()) {
+ FunctionType *FTy = CB->getFunctionType();
+ if (FTy->isVarArg()) {
+ Changed |= expandCall(M, Builder, CB, FTy, 0);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool ExpandVariadics::runOnFunction(Module &M, IRBuilder<> &Builder,
+ Function *OriginalFunction) {
+ bool Changed = false;
+
+ // fprintf(stderr, "Called runOn: %s\n",
+ // OriginalFunction->getName().str().c_str());
+
+ // TODO: Check what F.hasExactDefinition() does
+
+ // This check might be too coarse - there are probably cases where
+ // splitting a function is bad but it's usable without splitting
+ if (!expansionApplicableToFunction(M, OriginalFunction))
+ return false;
+
+ // TODO: Leave "thunk" attribute functions alone?
+
+ // Need more tests than this. Weak etc. Some are in expansionApplicable.
+
+ if (OriginalFunction->isDeclaration()) {
+ if (Mode == ExpandVariadicsMode::Optimize) {
+ return false;
+ }
+ }
+
+ const bool OriginalFunctionIsDeclaration = OriginalFunction->isDeclaration();
+
+ // Declare a new function and redirect every use to that new function
+ Function *VariadicWrapper =
+ replaceAllUsesWithNewDeclaration(M, OriginalFunction);
+ assert(VariadicWrapper->isDeclaration());
+ assert(OriginalFunction->use_empty());
+
+ // Create a new function taking va_list containing the implementation of the original
+ Function *FixedArityReplacement =
+ deriveFixedArityReplacement(M, Builder, OriginalFunction);
+ assert(OriginalFunction->isDeclaration());
+ assert(FixedArityReplacement->isDeclaration() ==
+ OriginalFunctionIsDeclaration);
+ assert(VariadicWrapper->isDeclaration());
+
+ // Create a single block forwarding wrapper that turns a ... into a va_list
+ Function *VariadicWrapperDefine =
+ defineVariadicWrapper(M, Builder, VariadicWrapper, FixedArityReplacement);
+ assert(VariadicWrapperDefine == VariadicWrapper);
+ assert(!VariadicWrapper->isDeclaration());
+
+ // We now have:
+ // 1. the original function, now as a declaration with no uses
+ // 2. a variadic function that unconditionally calls a fixed arity replacement
+ // 3. a fixed arity function equivalent to the original function
+
+
+ // Replace known calls to the variadic with calls to the va_list equivalent
+ for (User *U : llvm::make_early_inc_range(VariadicWrapper->users())) {
+ if (CallBase *CB = dyn_cast<CallBase>(U)) {
+ Value *calledOperand = CB->getCalledOperand();
+ if (VariadicWrapper == calledOperand) {
+ Changed |=
+ expandCall(M, Builder, CB, VariadicWrapper->getFunctionType(),
+ FixedArityReplacement);
+ }
+ }
+ }
+
+
+ Function *const ExternallyAccessible =
+ rewriteABI() ? FixedArityReplacement : VariadicWrapper;
+ Function *const InternalOnly =
+ rewriteABI() ? VariadicWrapper : FixedArityReplacement;
+
+
+ // care needed over other attributes, metadata etc
+
+ ExternallyAccessible->setLinkage(OriginalFunction->getLinkage());
+ ExternallyAccessible->setVisibility(OriginalFunction->getVisibility());
+ ExternallyAccessible->setComdat(OriginalFunction->getComdat());
+ ExternallyAccessible->takeName(OriginalFunction);
+
+ InternalOnly->setVisibility(GlobalValue::DefaultVisibility);
+ InternalOnly->setLinkage(GlobalValue::InternalLinkage);
+
+ OriginalFunction->eraseFromParent();
+
+ InternalOnly->removeDeadConstantUsers();
+
+ if (rewriteABI()) {
+ // All known calls to the function have been removed by expandCall
+ // Resolve everything else by replace all uses with
+
+ VariadicWrapper->replaceAllUsesWith(FixedArityReplacement);
+
+ assert (VariadicWrapper->use_empty());
+ VariadicWrapper->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+Function *
+ExpandVariadics::replaceAllUsesWithNewDeclaration(Module &M,
+ Function *OriginalFunction) {
+ auto &Ctx = M.getContext();
+ Function &F = *OriginalFunction;
+ FunctionType *FTy = F.getFunctionType();
+ Function *NF = Function::Create(FTy, F.getLinkage(), F.getAddressSpace());
+
+ NF->setName(F.getName() + ".varargs");
+ NF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat;
+
+ // Could give it the same visibility/linkage as the original
+ F.getParent()->getFunctionList().insert(F.getIterator(), NF);
+
+ // might have a shorthand
+ AttrBuilder ParamAttrs(Ctx);
+ AttributeList Attrs = NF->getAttributes();
+ Attrs = Attrs.addParamAttributes(Ctx, FTy->getNumParams(), ParamAttrs);
+ NF->setAttributes(Attrs);
+
+ OriginalFunction->replaceAllUsesWith(NF);
+ return NF;
+}
+
+Function *
+ExpandVariadics::deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
+ Function *OriginalFunction) {
+ Function &F = *OriginalFunction;
+ // The purpose here is split the variadic function F into two functions
+ // One is a variadic function that bundles the passed argument into a va_list
+ // and passes it to the second function. The second function does whatever
+ // the original F does, except that it takes a va_list instead of the ...
+
+ assert(expansionApplicableToFunction(M, &F));
+
+ auto &Ctx = M.getContext();
+
+ // Returned value isDeclaration() is equal to F.isDeclaration()
+ // but that invariant is not satisfied throughout this function
+ const bool FunctionIsDefinition = !F.isDeclaration();
+
+ FunctionType *FTy = F.getFunctionType();
+ SmallVector<Type *> ArgTypes(FTy->param_begin(), FTy->param_end());
+ ArgTypes.push_back(ABI->vaListParameterType(M));
+
+ FunctionType *NFTy = inlinableVariadicFunctionType(M, FTy);
+ Function *NF = Function::Create(NFTy, F.getLinkage(), F.getAddressSpace());
+
+ // Note - same attribute handling as DeadArgumentElimination
+ NF->copyAttributesFrom(&F);
+ // NF->setComdat(F.getComdat()); // beware weak
+ F.getParent()->getFunctionList().insert(F.getIterator(), NF);
+ NF->setName(F.getName() + ".valist");
+ NF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat;
+
+
+ AttrBuilder ParamAttrs(Ctx);
+
+
+ AttributeList Attrs = NF->getAttributes();
+ Attrs = Attrs.addParamAttributes(Ctx, NFTy->getNumParams() - 1, ParamAttrs);
+ NF->setAttributes(Attrs);
+
+ // Splice the implementation into the new function with minimal changes
+ if (FunctionIsDefinition) {
+ NF->splice(NF->begin(), &F);
+
+ auto NewArg = NF->arg_begin();
+ for (Argument &Arg : F.args()) {
+ Arg.replaceAllUsesWith(NewArg);
+ NewArg->setName(Arg.getName()); // takeName without killing the old one
+ ++NewArg;
+ }
+ NewArg->setName("varargs");
+ }
+
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ F.getAllMetadata(MDs);
+ for (auto [KindID, Node] : MDs)
+ NF->addMetadata(KindID, *Node);
+ F.clearMetadata();
+
+ return NF;
+}
+
+Function *
+ExpandVariadics::defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
+ Function *VariadicWrapper,
+ Function *FixedArityReplacement) {
+ auto &Ctx = Builder.getContext();
+ const DataLayout &DL = M.getDataLayout();
+ assert(VariadicWrapper->isDeclaration());
+ Function &F = *VariadicWrapper;
+
+ assert(F.isDeclaration());
+ Type *VaListTy = ABI->vaListType(Ctx);
+
+ auto *BB = BasicBlock::Create(Ctx, "entry", &F);
+ Builder.SetInsertPoint(BB);
+
+ AllocaInst *VaListInstance = Builder.CreateAlloca(VaListTy, nullptr, "va_list");
+
+ Builder.CreateLifetimeStart(VaListInstance, sizeOfAlloca(Ctx, DL, VaListInstance));
+
+ Builder.CreateIntrinsic(Intrinsic::vastart, {DL.getAllocaPtrType(Ctx)},
+ {VaListInstance});
+
+ SmallVector<Value *> Args;
+ for (Argument &A : F.args())
+ Args.push_back(&A);
+
+ Args.push_back(VaListInstance);
+
+ CallInst *Result = Builder.CreateCall(FixedArityReplacement, Args);
+ Result->setTailCallKind(CallInst::TCK_Tail);
+
+ Builder.CreateIntrinsic(Intrinsic::vaend, {DL.getAllocaPtrType(Ctx)}, {VaListInstance});
+ Builder.CreateLifetimeEnd(VaListInstance, sizeOfAlloca(Ctx, DL, VaListInstance));
+
+ if (Result->getType()->isVoidTy())
+ Builder.CreateRetVoid();
+ else
+ Builder.CreateRet(Result);
+
+ return VariadicWrapper;
+}
+
+bool ExpandVariadics::expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB,
+ FunctionType *VarargFunctionType,
+ Function *NF) {
+ bool Changed = false;
+ const DataLayout &DL = M.getDataLayout();
+
+ if (!callinstRewritable(CB)) {
+ if (rewriteABI()) {
+ report_fatal_error("Cannot lower callbase instruction");
+ }
+ return Changed;
+ }
+
+ // This is tricky. The call instruction's function type might not match
+ // the type of the caller. When optimising, can leave it unchanged.
+ // Webassembly detects that inconsistency and repairs it.
+ FunctionType *FuncType = CB->getFunctionType();
+ if (FuncType != VarargFunctionType) {
+ if (!rewriteABI()) {
+ return Changed;
+ }
+ FuncType = VarargFunctionType;
+ }
+
+ auto &Ctx = CB->getContext();
+
+ Align MaxFieldAlign(1);
+
+ // The strategy is to allocate a call frame containing the variadic
+ // arguments laid out such that a target specific va_list can be initialized
+ // with it, such that target specific va_arg instructions will correctly
+ // iterate over it. This means getting the alignment right and sometimes
+ // embedding a pointer to the value instead of embedding the value itself.
+
+ Function *CBF = CB->getParent()->getParent();
+
+ ExpandedCallFrame Frame;
+
+ uint64_t CurrentOffset = 0;
+
+
+ for (unsigned I = FuncType->getNumParams(), E = CB->arg_size(); I < E; ++I) {
+ Value *ArgVal = CB->getArgOperand(I);
+ bool IsByVal = CB->paramHasAttr(I, Attribute::ByVal);
+
+ // The call argument is either passed by value, or is a pointer passed byval
+ // The varargs frame either stores the value directly or a pointer to it
+
+ // The type of the value being passed, decoded from byval metadata if
+ // required
+ Type *const UnderlyingType =
+ IsByVal ? CB->getParamByValType(I) : ArgVal->getType();
+ const uint64_t UnderlyingSize =
+ DL.getTypeAllocSize(UnderlyingType).getFixedValue();
+
+ // The type to be written into the call frame
+ Type *FrameFieldType = UnderlyingType;
+
+ // The value to copy from when initialising the frame alloca
+ Value *SourceValue = ArgVal;
+
+ // TODO, slotInfo should probably return the right alignment even
+ // when returning true for indirect, somewhat messy
+ VariadicABIInfo::VAArgSlotInfo slotInfo = ABI->slotInfo(DL, UnderlyingType);
+
+#if 0
+ {
+ fprintf(stdout, "Underlying type for param %u (byval %u, indir %u)\n", I,
+ IsByVal, slotInfo.Indirect);
+ UnderlyingType->dump();
+ }
+#endif
+
+ if (slotInfo.Unknown) {
+ if (rewriteABI()) {
+ report_fatal_error("Variadic lowering unimplemented on given type");
+ } else {
+ return Changed;
+ }
+ }
+
+ if (slotInfo.Indirect) {
+ // The va_arg lowering loads through a pointer. Set up an alloca to aim
+ // that pointer at.
+ Builder.SetInsertPointPastAllocas(CBF);
+ Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
+ Value *CallerCopy =
+ Builder.CreateAlloca(UnderlyingType, nullptr, "IndirectAlloca");
+
+ Builder.SetInsertPoint(CB);
+ if (IsByVal)
+ Builder.CreateMemCpy(CallerCopy, {}, ArgVal, {}, UnderlyingSize);
+ else
+ Builder.CreateStore(ArgVal, CallerCopy);
+
+ // Indirection now handled, pass the alloca ptr by value
+ FrameFieldType = DL.getAllocaPtrType(Ctx);
+ SourceValue = CallerCopy;
+ }
+
+ // Alignment of the value within the frame
+ // This probably needs to be controllable as a function of type
+ Align DataAlign = slotInfo.Align;
+
+ MaxFieldAlign = std::max(MaxFieldAlign, DataAlign);
+
+ uint64_t DataAlignV = DataAlign.value();
+ if (uint64_t Rem = CurrentOffset % DataAlignV) {
+ // Inject explicit padding to deal with alignment requirements
+ uint64_t Padding = DataAlignV - Rem;
+ Frame.padding(Ctx, Padding);
+ CurrentOffset += Padding;
+ }
+
+ if (slotInfo.Indirect) {
+ Frame.store(Ctx, FrameFieldType, SourceValue);
+ } else {
+ if (IsByVal) {
+ Frame.memcpy(Ctx, FrameFieldType, SourceValue, UnderlyingSize);
+ } else {
+ Frame.store(Ctx, FrameFieldType, SourceValue);
+ }
+ }
+
+ CurrentOffset += DL.getTypeAllocSize(FrameFieldType).getFixedValue();
+ }
+
+ if (Frame.empty()) {
+ // Not passing any arguments, hopefully va_arg won't try to read any
+ // Creating a single byte frame containing nothing to point the va_list
+ // instance as that is less special-casey in the compiler and probably
+ // easier to interpret in a debugger.
+ Frame.padding(Ctx, 1);
+ }
+
+ StructType *VarargsTy = Frame.asStruct(Ctx, CBF->getName());
+
+ // The struct instance needs to be at least MaxFieldAlign for the alignment of
+ // the fields to be correct at runtime. Use the native stack alignment instead
+ // if that's greater as that tends to give better codegen.
+ // This is an awkward way to guess whether there is a known stack alignment
+ // without hitting an assert in DL.getStackAlignment, 1024 is an arbitrary
+ // number likely to be greater than the natural stack alignment.
+ // TODO: DL.getStackAlignment could return a MaybeAlign instead of assert
+ Align AllocaAlign = MaxFieldAlign;
+ if (DL.exceedsNaturalStackAlignment(Align(1024))) {
+ AllocaAlign = std::max(AllocaAlign, DL.getStackAlignment());
+ }
+
+ // Put the alloca to hold the variadic args in the entry basic block.
+ Builder.SetInsertPointPastAllocas(CBF);
+
+ // SetCurrentDebugLocation when the builder SetInsertPoint method does not
+ Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
+
+ // The awkward construction here is to set the alignment on the instance
+ Changed = true;
+ AllocaInst *Alloced = Builder.Insert(
+ new AllocaInst(VarargsTy, DL.getAllocaAddrSpace(), nullptr, AllocaAlign),
+ "vararg_buffer");
+ assert(Alloced->getAllocatedType() == VarargsTy);
+
+ // Initialize the fields in the struct
+ Builder.SetInsertPoint(CB);
+ Builder.CreateLifetimeStart(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
+ Frame.initializeStructAlloca(DL, Builder, Alloced);
+
+ const unsigned NumArgs = FuncType->getNumParams();
+ SmallVector<Value *> Args;
+ Args.assign(CB->arg_begin(), CB->arg_begin() + NumArgs);
+
+ // Initialize a va_list pointing to that struct and pass it as the last
+ // argument
+ AllocaInst *VaList = nullptr;
+ {
+ if (!ABI->vaListPassedInSSARegister()) {
+ Type *VaListTy = ABI->vaListType(Ctx);
+ Builder.SetInsertPointPastAllocas(CBF);
+ Builder.SetCurrentDebugLocation(
+ CB->getStableDebugLoc());
+ VaList = Builder.CreateAlloca(VaListTy, nullptr, "va_list");
+ Builder.SetInsertPoint(CB);
+ Builder.CreateLifetimeStart(VaList, sizeOfAlloca(Ctx, DL, VaList));
+ }
+ Args.push_back(ABI->initializeVAList(Ctx, Builder, VaList, Alloced));
+ }
+
+ // Attributes excluding any on the vararg arguments
+ AttributeList PAL = CB->getAttributes();
+ if (!PAL.isEmpty()) {
+ SmallVector<AttributeSet, 8> ArgAttrs;
+ for (unsigned ArgNo = 0; ArgNo < NumArgs; ArgNo++)
+ ArgAttrs.push_back(PAL.getParamAttrs(ArgNo));
+ PAL =
+ AttributeList::get(Ctx, PAL.getFnAttrs(), PAL.getRetAttrs(), ArgAttrs);
+ }
+
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CB->getOperandBundlesAsDefs(OpBundles);
+
+ CallBase *NewCB = nullptr;
+
+ // Assert won't be true once InvokeInst is implemented in a later patch,
+ // current invariant is established by callinstRewritable() at the start
+ assert(isa<CallInst>(CB));
+
+ if (CallInst *CI = dyn_cast<CallInst>(CB)) {
+
+ Value *Dst = NF ? NF : CI->getCalledOperand();
+ FunctionType *NFTy = inlinableVariadicFunctionType(M, VarargFunctionType);
+
+ NewCB = CallInst::Create(NFTy, Dst, Args, OpBundles, "", CI);
+
+ CallInst::TailCallKind TCK = CI->getTailCallKind();
+ assert(TCK != CallInst::TCK_MustTail); // guarded at prologue
+
+ // It doesn't get to be a tail call any more
+ // might want to guard this with arch, x64 and aarch64 document that
+ // varargs can't be tail called anyway
+ // Not totally convinced this is necessary but dead store elimination
+ // will discard the stores to the Alloca and pass uninitialized memory along
+ // instead when the function is marked tailcall
+ if (TCK == CallInst::TCK_Tail) {
+ TCK = CallInst::TCK_None;
+ }
+ CI->setTailCallKind(TCK);
+
+ } else {
+ llvm_unreachable("unreachable because callinstRewritable() returned false");
+ }
+
+ if (VaList)
+ Builder.CreateLifetimeEnd(VaList, sizeOfAlloca(Ctx, DL, VaList));
+
+ Builder.CreateLifetimeEnd(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
+
+ NewCB->setAttributes(PAL);
+ NewCB->takeName(CB);
+ NewCB->setCallingConv(CB->getCallingConv());
+
+ NewCB->setDebugLoc(DebugLoc());
+
+ // DeadArgElim and ArgPromotion copy exactly this metadata
+ NewCB->copyMetadata(*CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
+
+ CB->replaceAllUsesWith(NewCB);
+ CB->eraseFromParent();
+ return Changed;
+}
+
+bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
+ const DataLayout &DL,
+ VAStartInst *Inst) {
+ // TODO: Document or remove this action at a distance trickery
+ Function *ContainingFunction = Inst->getFunction();
+ if (ContainingFunction->isVarArg())
+ return false;
+
+ // The last argument is a vaListParameterType
+ Argument *PassedVaList =
+ ContainingFunction->getArg(ContainingFunction->arg_size() - 1);
+
+ // va_start takes a pointer to a va_list, e.g. one on the stack
+ Value *VaStartArg = Inst->getArgList();
+
+ Builder.SetInsertPoint(Inst);
+
+ // If the va_list is itself a ptr, emitting a vacopy call requires an alloca
+ // which is then removed, simpler to build the store directly.
+ if (ABI->vaListPassedInSSARegister()) {
+ Builder.CreateStore(PassedVaList, VaStartArg);
+ } else {
+ // Otherwise emit a vacopy to pick up target-specific handling if any
+ auto &Ctx = Builder.getContext();
+ Builder.CreateIntrinsic(Intrinsic::vacopy, {DL.getAllocaPtrType(Ctx)}, {VaStartArg, PassedVaList});
+ }
+
+ Inst->eraseFromParent();
+ return true;
+}
+
+bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
+ VAEndInst *Inst) {
+ assert(ABI->vaEndIsNop());
+ // A no-op on all the architectures implemented so far
+ Inst->eraseFromParent();
+ return true;
+}
+
+bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
+ const DataLayout &DL,
+ VACopyInst *Inst) {
+ // TODO: This looks be wrong for non-struct va_list, check it using wasm
+ assert(ABI->vaCopyIsMemcpy());
+ Builder.SetInsertPoint(Inst);
+ memcpyVAListPointers(DL, Builder, Inst->getDest(), Inst->getSrc());
+ Inst->eraseFromParent();
+ return true;
+}
+
+template <uint32_t MinAlign, uint32_t MaxAlign> Align clampAlign(Align A) {
+ // Uses 0 as a sentinel to mean inactive
+ if (MinAlign && A < MinAlign)
+ A = Align(MinAlign);
+
+ if (MaxAlign && A > MaxAlign)
+ A = Align(MaxAlign);
+
+ return A;
+}
+
+ bool simpleScalarType(Type* Parameter)
+ {
+ // This is a stop-gap. The MVP can optimise x64 and aarch64 on linux
+ // for sufficiently simple calls.
+ if (Parameter->isDoubleTy()) return true;
+
+ if (Parameter->isIntegerTy(32)) return true;
+ if (Parameter->isIntegerTy(64)) return true;
+
+ if (Parameter->isPointerTy()) { return true; }
+
+ return false;
+ }
+
+
+struct AArch64 final : public VariadicABIInfo {
+ // https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst
+ // big endian, little endian ILP32 have their own triples
+
+ bool vaListPassedInSSARegister() override { return false; }
+
+ Type *vaListType(LLVMContext &Ctx) override {
+#if 0
+ typedef struct va_list {
+ void * stack; // next stack param
+ void * gr_top; // end of GP arg reg save area
+ void * vr_top; // end of FP/SIMD arg reg save area
+ int gr_offs; // offset from gr_top to next GP register arg
+ int vr_offs; // offset from vr_top to next FP/SIMD register arg
+ } va_list;
+#endif
+
+ auto I32 = Type::getInt32Ty(Ctx);
+ auto Ptr = PointerType::getUnqual(Ctx);
+
+ return StructType::get(Ctx, {Ptr, Ptr, Ptr, I32, I32});
+ }
+
+ Type *vaListParameterType(Module &M) override {
+ return PointerType::getUnqual(M.getContext());
+ }
+
+ Value *initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder,
+ AllocaInst *VaList, Value *VoidBuffer) override {
+ assert(VaList->getAllocatedType() == vaListType(Ctx));
+
+ Type *VaListTy = vaListType(Ctx);
+ Type *I32 = Type::getInt32Ty(Ctx);
+ Constant *Zero = ConstantInt::get(I32, 0);
+ Constant *Null = ConstantPointerNull::get(PointerType::getUnqual(Ctx));
+
+ Value *Idxs[2] = {
+ ConstantInt::get(I32, 0),
+ nullptr,
+ };
+
+ Idxs[1] = ConstantInt::get(I32, 0);
+ Builder.CreateStore(
+ VoidBuffer, Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "stack"));
+
+ // The general and vector regions are unused, given by the zero offsets,
+ // with nullptr a reasonable value to use for the pointer fields. That is
+ // all arguments are packed into the "stack" area, leaving the specialised
+ // two area unused.
+
+ Idxs[1] = ConstantInt::get(I32, 1);
+ Builder.CreateStore(
+ Null, Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "gr_top"));
+
+ Idxs[1] = ConstantInt::get(I32, 2);
+ Builder.CreateStore(
+ Null, Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "vr_top"));
+
+ Idxs[1] = ConstantInt::get(I32, 3);
+ Builder.CreateStore(
+ Zero, Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "gr_offs"));
+
+ Idxs[1] = ConstantInt::get(I32, 4);
+ Builder.CreateStore(
+ Zero, Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "vr_offs"));
+
+ return VaList;
+ }
+
+ VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
+ Align A = clampAlign<8, 0u>(DL.getABITypeAlign(Parameter));
+
+ bool Indirect = false; // true for some non-simple types on aarch64
+ bool Unknown = !simpleScalarType(Parameter);
+ return {A, Indirect, Unknown};
+ }
+};
+
+
+struct Wasm final : public VariadicABIInfo {
+ bool vaListPassedInSSARegister() override { return true; }
+
+ Type *vaListType(LLVMContext &Ctx) override {
+ return PointerType::getUnqual(Ctx);
+ }
+
+ Type *vaListParameterType(Module &M) override {
+ return PointerType::getUnqual(M.getContext());
+ }
+
+ Value *initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder,
+ AllocaInst * /*va_list*/, Value *buffer) override {
+ return buffer;
+ }
+
+ VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
+ LLVMContext &Ctx = Parameter->getContext();
+ Align A = clampAlign<4, 0>(DL.getABITypeAlign(Parameter));
+
+ // TODO, test empty record
+ if (auto s = dyn_cast<StructType>(Parameter)) {
+ if (s->getNumElements() > 1) {
+ return {DL.getABITypeAlign(PointerType::getUnqual(Ctx)), true, false};
+ }
+ }
+
+ return {A, false, false};
+ }
+};
+
+
+struct X64SystemV final : public VariadicABIInfo {
+ bool vaListPassedInSSARegister() override { return false; }
+
+ Type *vaListType(LLVMContext &Ctx) override {
+ auto I32 = Type::getInt32Ty(Ctx);
+ auto Ptr = PointerType::getUnqual(Ctx);
+ return ArrayType::get(StructType::get(Ctx, {I32, I32, Ptr, Ptr}), 1);
+ }
+
+ Type *vaListParameterType(Module &M) override {
+ return PointerType::getUnqual(M.getContext());
+ }
+
+ Value *initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder,
+ AllocaInst *VaList, Value *VoidBuffer) override {
+ assert(VaList->getAllocatedType() == vaListType(Ctx));
+
+ Type *VaListTy = vaListType(Ctx);
+
+ Type *I32 = Type::getInt32Ty(Ctx);
+ Type *I64 = Type::getInt64Ty(Ctx);
+
+ Value *Idxs[3] = {
+ ConstantInt::get(I64, 0),
+ ConstantInt::get(I32, 0),
+ nullptr,
+ };
+
+ // The magic numbers here set up a va_list instance that has the general
+ // purpose and floating point regions empty, such that only the overflow
+ // area is used. That means a single contiguous struct can be the backing
+ // store and simpler code to optimise in the inlining case.
+
+ Idxs[2] = ConstantInt::get(I32, 0);
+ Builder.CreateStore(
+ ConstantInt::get(I32, 48),
+ Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "gp_offset"));
+
+ Idxs[2] = ConstantInt::get(I32, 1);
+ Builder.CreateStore(
+ ConstantInt::get(I32, 6 * 8 + 8 * 16),
+ Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "fp_offset"));
+
+ Idxs[2] = ConstantInt::get(I32, 2);
+ Builder.CreateStore(
+ VoidBuffer,
+ Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "overfow_arg_area"));
+
+ Idxs[2] = ConstantInt::get(I32, 3);
+ Builder.CreateStore(
+ ConstantPointerNull::get(PointerType::getUnqual(Ctx)),
+ Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "reg_save_area"));
+
+ return VaList;
+ }
+
+ VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
+ // TODO: Make this comment look less scary
+ // SystemV X64 documented behaviour:
+ // Slots are at least eight byte aligned and at most 16 byte aligned.
+ // If the type needs more than sixteen byte alignment, it still only gets
+ // that much alignment on the stack.
+ // X64 behaviour in clang:
+ // Slots are at least eight byte aligned and at most naturally aligned
+ // This matches clang, not the ABI docs.
+
+ Align A = clampAlign<8, 0u>(DL.getABITypeAlign(Parameter));
+ bool Indirect = false;
+ bool Unknown = !simpleScalarType(Parameter);
+ return {A, Indirect, Unknown};
+ }
+};
+
+std::unique_ptr<VariadicABIInfo>
+VariadicABIInfo::create(llvm::Triple const &Triple) {
+
+ switch (Triple.getArch()) {
+
+ case Triple::aarch64: {
+ return std::make_unique<AArch64>();
+ }
+
+
+ case Triple::wasm32: {
+ return std::make_unique<Wasm>();
+ }
+
+ case Triple::x86_64: {
+ if (Triple.isOSLinux()) {
+ return std::make_unique<X64SystemV>();
+ }
+
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ return {};
+}
+
+} // namespace
+
+char ExpandVariadics::ID = 0;
+
+INITIALIZE_PASS(ExpandVariadics, DEBUG_TYPE, "Expand variadic functions", false,
+ false)
+
+ModulePass *llvm::createExpandVariadicsPass(ExpandVariadicsMode M) {
+ return new ExpandVariadics(M);
+}
+
+PreservedAnalyses ExpandVariadicsPass::run(Module &M, ModuleAnalysisManager &) {
+ return ExpandVariadics(Mode).runOnModule(M) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
+
+ExpandVariadicsPass::ExpandVariadicsPass(OptimizationLevel Level)
+ : ExpandVariadicsPass(Level == OptimizationLevel::O0
+ ? ExpandVariadicsMode::Disable
+ : ExpandVariadicsMode::Optimize)
+{
+}
+
+ExpandVariadicsPass::ExpandVariadicsPass(ExpandVariadicsMode M) : Mode(M) {}
diff --git a/llvm/test/CodeGen/AArch64/expand-variadic-call-apcs64-linux.ll b/llvm/test/CodeGen/AArch64/expand-variadic-call-apcs64-linux.ll
new file mode 100644
index 0000000000000..ceb226affc623
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/expand-variadic-call-apcs64-linux.ll
@@ -0,0 +1,289 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+; Check the variables are lowered to the locations this target expects
+
+; The types show the call frames
+; CHECK: %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+; CHECK: %single_i32.vararg = type <{ i32 }>
+; CHECK: %single_double.vararg = type <{ double }>
+; CHECK: %i32_double.vararg = type <{ i32, [4 x i8], double }>
+; CHECK: %double_i32.vararg = type <{ double, i32 }>
+; CHECK: %fptr_single_i32.vararg = type <{ i32 }>
+
+%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
+ at vararg_ptr = global ptr @vararg, align 8
+
+define void @copy(ptr noundef %va) {
+; CHECK-LABEL: define {{[^@]+}}@copy(ptr noundef %va) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %cp = alloca %struct.__va_list, align 8
+; CHECK-NEXT: %byval-temp = alloca %struct.__va_list, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %cp)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %cp, ptr %va, i32 32, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp, ptr noundef nonnull align 8 dereferenceable(32) %cp, i64 32, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %cp)
+; CHECK-NEXT: ret void
+;
+entry:
+ %cp = alloca %struct.__va_list, align 8
+ %byval-temp = alloca %struct.__va_list, align 8
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %cp)
+ call void @llvm.va_copy.p0(ptr nonnull %cp, ptr %va)
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp)
+ call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp, ptr noundef nonnull align 8 dereferenceable(32) %cp, i64 32, i1 false)
+ call void @valist(ptr noundef nonnull %byval-temp)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %cp)
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_copy.p0(ptr, ptr)
+
+declare void @valist(ptr noundef)
+
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+define void @start_once(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_once(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s = alloca %struct.__va_list, align 8
+; CHECK-NEXT: %byval-temp = alloca %struct.__va_list, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %s)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %s, ptr %varargs, i32 32, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp, ptr noundef nonnull align 8 dereferenceable(32) %s, i64 32, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %s)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s = alloca %struct.__va_list, align 8
+ %byval-temp = alloca %struct.__va_list, align 8
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %s)
+ call void @llvm.va_start.p0(ptr nonnull %s)
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp)
+ call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp, ptr noundef nonnull align 8 dereferenceable(32) %s, i64 32, i1 false)
+ call void @valist(ptr noundef nonnull %byval-temp)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp)
+ call void @llvm.va_end.p0(ptr %s)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %s)
+ ret void
+}
+
+declare void @llvm.va_start.p0(ptr)
+
+declare void @llvm.va_end.p0(ptr)
+
+define void @start_twice(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_twice(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s0 = alloca %struct.__va_list, align 8
+; CHECK-NEXT: %s1 = alloca %struct.__va_list, align 8
+; CHECK-NEXT: %byval-temp = alloca %struct.__va_list, align 8
+; CHECK-NEXT: %byval-temp1 = alloca %struct.__va_list, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %s0)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %s1)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %s0, ptr %varargs, i32 32, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp, ptr noundef nonnull align 8 dereferenceable(32) %s0, i64 32, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %s1, ptr %varargs, i32 32, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp1)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp1, ptr noundef nonnull align 8 dereferenceable(32) %s1, i64 32, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %byval-temp1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %s1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %s0)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s0 = alloca %struct.__va_list, align 8
+ %s1 = alloca %struct.__va_list, align 8
+ %byval-temp = alloca %struct.__va_list, align 8
+ %byval-temp1 = alloca %struct.__va_list, align 8
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %s0)
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %s1)
+ call void @llvm.va_start.p0(ptr nonnull %s0)
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp)
+ call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp, ptr noundef nonnull align 8 dereferenceable(32) %s0, i64 32, i1 false)
+ call void @valist(ptr noundef nonnull %byval-temp)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp)
+ call void @llvm.va_end.p0(ptr %s0)
+ call void @llvm.va_start.p0(ptr nonnull %s1)
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %byval-temp1)
+ call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %byval-temp1, ptr noundef nonnull align 8 dereferenceable(32) %s1, i64 32, i1 false)
+ call void @valist(ptr noundef nonnull %byval-temp1)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %byval-temp1)
+ call void @llvm.va_end.p0(ptr %s1)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %s1)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %s0)
+ ret void
+}
+
+define void @single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 16
+; CHECK-NEXT: %va_list = alloca { ptr, ptr, ptr, i32, i32 }, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: %stack = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 0
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %stack, align 8
+; CHECK-NEXT: %gr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 1
+; CHECK-NEXT: store ptr null, ptr %gr_top, align 8
+; CHECK-NEXT: %vr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 2
+; CHECK-NEXT: store ptr null, ptr %vr_top, align 8
+; CHECK-NEXT: %gr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 3
+; CHECK-NEXT: store i32 0, ptr %gr_offs, align 4
+; CHECK-NEXT: %vr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 4
+; CHECK-NEXT: store i32 0, ptr %vr_offs, align 4
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x)
+ ret void
+}
+
+declare void @vararg(...)
+
+define void @single_double(double noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 16
+; CHECK-NEXT: %va_list = alloca { ptr, ptr, ptr, i32, i32 }, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: %stack = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 0
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %stack, align 8
+; CHECK-NEXT: %gr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 1
+; CHECK-NEXT: store ptr null, ptr %gr_top, align 8
+; CHECK-NEXT: %vr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 2
+; CHECK-NEXT: store ptr null, ptr %vr_top, align 8
+; CHECK-NEXT: %gr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 3
+; CHECK-NEXT: store i32 0, ptr %gr_offs, align 4
+; CHECK-NEXT: %vr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 4
+; CHECK-NEXT: store i32 0, ptr %vr_offs, align 4
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x)
+ ret void
+}
+
+define void @i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 16
+; CHECK-NEXT: %va_list = alloca { ptr, ptr, ptr, i32, i32 }, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store double %y, ptr %1, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: %stack = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 0
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %stack, align 8
+; CHECK-NEXT: %gr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 1
+; CHECK-NEXT: store ptr null, ptr %gr_top, align 8
+; CHECK-NEXT: %vr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 2
+; CHECK-NEXT: store ptr null, ptr %vr_top, align 8
+; CHECK-NEXT: %gr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 3
+; CHECK-NEXT: store i32 0, ptr %gr_offs, align 4
+; CHECK-NEXT: %vr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 4
+; CHECK-NEXT: store i32 0, ptr %vr_offs, align 4
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, double noundef %y)
+ ret void
+}
+
+define void @double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 16
+; CHECK-NEXT: %va_list = alloca { ptr, ptr, ptr, i32, i32 }, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: %stack = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 0
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %stack, align 8
+; CHECK-NEXT: %gr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 1
+; CHECK-NEXT: store ptr null, ptr %gr_top, align 8
+; CHECK-NEXT: %vr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 2
+; CHECK-NEXT: store ptr null, ptr %vr_top, align 8
+; CHECK-NEXT: %gr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 3
+; CHECK-NEXT: store i32 0, ptr %gr_offs, align 4
+; CHECK-NEXT: %vr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 4
+; CHECK-NEXT: store i32 0, ptr %vr_offs, align 4
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x, i32 noundef %y)
+ ret void
+}
+
+define void @fptr_single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_single_i32.vararg, align 16
+; CHECK-NEXT: %va_list = alloca { ptr, ptr, ptr, i32, i32 }, align 8
+; CHECK-NEXT: %0 = load volatile ptr, ptr @vararg_ptr, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: %stack = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 0
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %stack, align 8
+; CHECK-NEXT: %gr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 1
+; CHECK-NEXT: store ptr null, ptr %gr_top, align 8
+; CHECK-NEXT: %vr_top = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 2
+; CHECK-NEXT: store ptr null, ptr %vr_top, align 8
+; CHECK-NEXT: %gr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 3
+; CHECK-NEXT: store i32 0, ptr %gr_offs, align 4
+; CHECK-NEXT: %vr_offs = getelementptr inbounds { ptr, ptr, ptr, i32, i32 }, ptr %va_list, i32 0, i32 4
+; CHECK-NEXT: store i32 0, ptr %vr_offs, align 4
+; CHECK-NEXT: call void %0(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 8
+ tail call void (...) %0(i32 noundef %x)
+ ret void
+}
+
+
diff --git a/llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll b/llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll
new file mode 100644
index 0000000000000..93c4614782fd8
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll
@@ -0,0 +1,483 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s
+target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
+target triple = "wasm32-unknown-unknown"
+
+; Check the variables are lowered to the locations this target expects
+
+; The types show the call frames
+; CHECK: %single_i32.vararg = type <{ i32 }>
+; CHECK: %single_double.vararg = type <{ double }>
+; CHECK: %single_v4f32.vararg = type <{ <4 x float> }>
+; CHECK: %single_v8f32.vararg = type <{ <8 x float> }>
+; CHECK: %single_v16f32.vararg = type <{ <16 x float> }>
+; CHECK: %single_v32f32.vararg = type <{ <32 x float> }>
+; CHECK: %i32_double.vararg = type <{ i32, [4 x i8], double }>
+; CHECK: %double_i32.vararg = type <{ double, i32 }>
+; CHECK: %i32_libcS.vararg = type <{ i32, ptr }>
+; CHECK: %libcS_i32.vararg = type <{ ptr, i32 }>
+; CHECK: %i32_v4f32.vararg = type <{ i32, [12 x i8], <4 x float> }>
+; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }>
+; CHECK: %i32_v8f32.vararg = type <{ i32, [28 x i8], <8 x float> }>
+; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }>
+; CHECK: %i32_v16f32.vararg = type <{ i32, [60 x i8], <16 x float> }>
+; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }>
+; CHECK: %i32_v32f32.vararg = type <{ i32, [124 x i8], <32 x float> }>
+; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }>
+; CHECK: %fptr_single_i32.vararg = type <{ i32 }>
+; CHECK: %fptr_libcS.vararg = type <{ ptr }>
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+ at vararg_ptr = hidden global ptr @vararg, align 4
+
+define hidden void @copy(ptr noundef %va) {
+; CHECK-LABEL: define {{[^@]+}}@copy(ptr noundef %va) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va.addr = alloca ptr, align 4
+; CHECK-NEXT: %cp = alloca ptr, align 4
+; CHECK-NEXT: store ptr %va, ptr %va.addr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %cp, ptr %va.addr, i32 4, i1 false)
+; CHECK-NEXT: %0 = load ptr, ptr %cp, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp)
+; CHECK-NEXT: ret void
+;
+entry:
+ %va.addr = alloca ptr, align 4
+ %cp = alloca ptr, align 4
+ store ptr %va, ptr %va.addr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp)
+ call void @llvm.va_copy.p0(ptr nonnull %cp, ptr nonnull %va.addr)
+ %0 = load ptr, ptr %cp, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp)
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_copy.p0(ptr, ptr)
+
+declare void @valist(ptr noundef)
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+define hidden void @start_once(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_once(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s)
+; CHECK-NEXT: store ptr %varargs, ptr %s, align 4
+; CHECK-NEXT: %0 = load ptr, ptr %s, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s)
+ call void @llvm.va_start.p0(ptr nonnull %s)
+ %0 = load ptr, ptr %s, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s)
+ ret void
+}
+
+declare void @llvm.va_start.p0(ptr)
+
+declare void @llvm.va_end.p0(ptr)
+
+define hidden void @start_twice(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_twice(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s0 = alloca ptr, align 4
+; CHECK-NEXT: %s1 = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s0)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s1)
+; CHECK-NEXT: store ptr %varargs, ptr %s0, align 4
+; CHECK-NEXT: %0 = load ptr, ptr %s0, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %0)
+; CHECK-NEXT: store ptr %varargs, ptr %s1, align 4
+; CHECK-NEXT: %1 = load ptr, ptr %s1, align 4
+; CHECK-NEXT: call void @valist(ptr noundef %1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s0)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s0 = alloca ptr, align 4
+ %s1 = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s0)
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.va_start.p0(ptr nonnull %s0)
+ %0 = load ptr, ptr %s0, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s0)
+ call void @llvm.va_start.p0(ptr nonnull %s1)
+ %1 = load ptr, ptr %s1, align 4
+ call void @valist(ptr noundef %1)
+ call void @llvm.va_end.p0(ptr %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s0)
+ ret void
+}
+
+define hidden void @single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x)
+ ret void
+}
+
+declare void @vararg(...)
+
+define hidden void @single_double(double noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x)
+ ret void
+}
+
+define hidden void @single_v4f32(<4 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v8f32(<8 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v8f32(<8 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 32
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v16f32(<16 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v16f32(<16 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 64
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x)
+ ret void
+}
+
+define hidden void @single_v32f32(<32 x float> noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_v32f32(<32 x float> noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 128
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x)
+ ret void
+}
+
+define hidden void @i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store double %y, ptr %1, align 8
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, double noundef %y)
+ ret void
+}
+
+define hidden void @double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %IndirectAlloca = alloca %struct.libcS, align 8
+; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 16
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %IndirectAlloca, ptr %y, i64 24, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store ptr %IndirectAlloca, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(%struct.libcS) align 8 %y)
+ ret void
+}
+
+define hidden void @libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %IndirectAlloca = alloca %struct.libcS, align 8
+; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %IndirectAlloca, ptr %x, i64 24, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store ptr %IndirectAlloca, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(ptr noundef nonnull byval(%struct.libcS) align 8 %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <4 x float> %y, ptr %1, align 16
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 20, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v8f32(i32 noundef %x, <8 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, <8 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 32
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <8 x float> %y, ptr %1, align 32
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <8 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v8f32_i32(<8 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(<8 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 32
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 36, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32
+; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 36, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v16f32(i32 noundef %x, <16 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, <16 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 64
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <16 x float> %y, ptr %1, align 64
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 128, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <16 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v16f32_i32(<16 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(<16 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 64
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 68, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64
+; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 68, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @i32_v32f32(i32 noundef %x, <32 x float> noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, <32 x float> noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 128
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 256, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <32 x float> %y, ptr %1, align 128
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 256, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <32 x float> noundef %y)
+ ret void
+}
+
+define hidden void @v32f32_i32(<32 x float> noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(<32 x float> noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 128
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 132, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128
+; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @vararg(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 132, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x, i32 noundef %y)
+ ret void
+}
+
+define hidden void @fptr_single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_single_i32.vararg, align 16
+; CHECK-NEXT: %0 = load volatile ptr, ptr @vararg_ptr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: call void %0(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(i32 noundef %x)
+ ret void
+}
+
+define hidden void @fptr_libcS(ptr noundef byval(%struct.libcS) align 8 %x) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_libcS(ptr noundef byval(%struct.libcS) align 8 %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %IndirectAlloca = alloca %struct.libcS, align 8
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_libcS.vararg, align 16
+; CHECK-NEXT: %0 = load volatile ptr, ptr @vararg_ptr, align 4
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %IndirectAlloca, ptr %x, i64 24, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store ptr %IndirectAlloca, ptr %1, align 4
+; CHECK-NEXT: call void %0(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(ptr noundef nonnull byval(%struct.libcS) align 8 %x)
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/WebAssembly/vararg-frame.ll b/llvm/test/CodeGen/WebAssembly/vararg-frame.ll
new file mode 100644
index 0000000000000..03aeb32de2261
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/vararg-frame.ll
@@ -0,0 +1,525 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
+target triple = "wasm32-unknown-unknown"
+
+; Function Attrs: nounwind
+define void @pass_s0() {
+; CHECK-LABEL: pass_s0:
+; CHECK: .functype pass_s0 () -> ()
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: call sink
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink()
+ ret void
+}
+
+declare void @sink(...)
+
+; Function Attrs: nounwind
+define void @pass_s1(i8 %x) {
+; CHECK-LABEL: pass_s1:
+; CHECK: .functype pass_s1 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i8 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s2(i16 %x) {
+; CHECK-LABEL: pass_s2:
+; CHECK: .functype pass_s2 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i16 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s3(i32 %x) {
+; CHECK-LABEL: pass_s3:
+; CHECK: .functype pass_s3 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s4(i64 %x) {
+; CHECK-LABEL: pass_s4:
+; CHECK: .functype pass_s4 (i64) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i64 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_s5(<4 x i32> noundef %x) {
+; CHECK-LABEL: pass_s5:
+; CHECK: .functype pass_s5 (i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 12
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(<4 x i32> noundef %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s0(i32 noundef %i) {
+; CHECK-LABEL: pass_int_s0:
+; CHECK: .functype pass_int_s0 (i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s1(i32 noundef %i, i8 %x) {
+; CHECK-LABEL: pass_int_s1:
+; CHECK: .functype pass_int_s1 (i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i8 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s2(i32 noundef %i, i16 %x) {
+; CHECK-LABEL: pass_int_s2:
+; CHECK: .functype pass_int_s2 (i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i16 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s3(i32 noundef %i, i32 %x) {
+; CHECK-LABEL: pass_int_s3:
+; CHECK: .functype pass_int_s3 (i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i32 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s4(i32 noundef %i, i64 %x) {
+; CHECK-LABEL: pass_int_s4:
+; CHECK: .functype pass_int_s4 (i32, i64) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.store 8
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i64 %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_int_s5(i32 noundef %i, <4 x i32> noundef %x) {
+; CHECK-LABEL: pass_int_s5:
+; CHECK: .functype pass_int_s5 (i32, i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 28
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 20
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, <4 x i32> noundef %x)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_asc(i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> noundef %x5) {
+; CHECK-LABEL: pass_asc:
+; CHECK: .functype pass_asc (i32, i32, i32, i64, i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 8
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 44
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 40
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 36
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> noundef %x5)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_dsc(<4 x i32> noundef %x0, i64 %x1, i32 %x2, i16 %x3, i8 %x4) {
+; CHECK-LABEL: pass_dsc:
+; CHECK: .functype pass_dsc (i32, i32, i32, i32, i64, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 8
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 28
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 12
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(<4 x i32> noundef %x0, i64 %x1, i32 %x2, i16 %x3, i8 %x4)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @pass_multiple(i32 noundef %i, i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> noundef %x5) {
+; CHECK-LABEL: pass_multiple:
+; CHECK: .functype pass_multiple (i32, i32, i32, i32, i64, i32, i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 9
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.store 40
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.store 36
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 32
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 28
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 24
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 20
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i32.store 8
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.store 4
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.store 0
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: call sink
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32.const 48
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: # fallthrough-return
+entry:
+ tail call void (...) @sink(i32 noundef %i, i16 %x2, i64 %x4)
+ tail call void (...) @sink(i32 noundef %i, i8 %x1, i32 %x3, <4 x i32> noundef %x5)
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll b/llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll
new file mode 100644
index 0000000000000..ed45631025e50
--- /dev/null
+++ b/llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll
@@ -0,0 +1,244 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check the variables are lowered to the locations this target expects
+
+; The types show the call frames
+; CHECK: %single_i32.vararg = type <{ i32 }>
+; CHECK: %single_double.vararg = type <{ double }>
+; CHECK: %i32_double.vararg = type <{ i32, [4 x i8], double }>
+; CHECK: %double_i32.vararg = type <{ double, i32 }>
+; CHECK: %fptr_single_i32.vararg = type <{ i32 }>
+
+%struct.__va_list_tag = type { i32, i32, ptr, ptr }
+
+ at vararg_ptr = global ptr @vararg, align 8
+
+define void @copy(ptr noundef %va) {
+; CHECK-LABEL: define {{[^@]+}}@copy(ptr noundef %va) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %cp = alloca [1 x %struct.__va_list_tag], align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %cp)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %cp, ptr %va, i32 24, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %cp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %cp)
+; CHECK-NEXT: ret void
+;
+entry:
+ %cp = alloca [1 x %struct.__va_list_tag], align 16
+ call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %cp)
+ call void @llvm.va_copy.p0(ptr nonnull %cp, ptr %va)
+ call void @valist(ptr noundef nonnull %cp)
+ call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %cp)
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_copy.p0(ptr, ptr)
+
+declare void @valist(ptr noundef)
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+define void @start_once(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_once(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s = alloca [1 x %struct.__va_list_tag], align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %s)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %s, ptr %varargs, i32 24, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %s)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %s)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s = alloca [1 x %struct.__va_list_tag], align 16
+ call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %s)
+ call void @llvm.va_start.p0(ptr nonnull %s)
+ call void @valist(ptr noundef nonnull %s)
+ call void @llvm.va_end.p0(ptr %s)
+ call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %s)
+ ret void
+}
+
+declare void @llvm.va_start.p0(ptr)
+
+declare void @llvm.va_end.p0(ptr)
+
+define void @start_twice(...) {
+; CHECK-LABEL: define {{[^@]+}}@start_twice(ptr %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s0 = alloca [1 x %struct.__va_list_tag], align 16
+; CHECK-NEXT: %s1 = alloca [1 x %struct.__va_list_tag], align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %s0)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %s1)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %s0, ptr %varargs, i32 24, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %s0)
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr %s1, ptr %varargs, i32 24, i1 false)
+; CHECK-NEXT: call void @valist(ptr noundef nonnull %s1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %s1)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %s0)
+; CHECK-NEXT: ret void
+;
+entry:
+ %s0 = alloca [1 x %struct.__va_list_tag], align 16
+ %s1 = alloca [1 x %struct.__va_list_tag], align 16
+ call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %s0)
+ call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %s1)
+ call void @llvm.va_start.p0(ptr nonnull %s0)
+ call void @valist(ptr noundef nonnull %s0)
+ call void @llvm.va_end.p0(ptr %s0)
+ call void @llvm.va_start.p0(ptr nonnull %s1)
+ call void @valist(ptr noundef nonnull %s1)
+ call void @llvm.va_end.p0(ptr %s1)
+ call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %s1)
+ call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %s0)
+ ret void
+}
+
+define void @single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 16
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x)
+ ret void
+}
+
+declare void @vararg(...)
+
+define void @single_double(double noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 16
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x)
+ ret void
+}
+
+define void @i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 16
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store double %y, ptr %1, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, double noundef %y)
+ ret void
+}
+
+define void @double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 16
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @vararg(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x, i32 noundef %y)
+ ret void
+}
+
+define void @fptr_single_i32(i32 noundef %x) {
+; CHECK-LABEL: define {{[^@]+}}@fptr_single_i32(i32 noundef %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %fptr_single_i32.vararg, align 16
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %0 = load volatile ptr, ptr @vararg_ptr, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: %1 = getelementptr inbounds %fptr_single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void %0(ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr %va_list)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer)
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 8
+ tail call void (...) %0(i32 noundef %x)
+ ret void
+}
+
+
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 51fb93daa4dfa..54c593c98ab66 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -125,6 +125,7 @@
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
+; CHECK-O-NEXT: Running pass: ExpandVariadicsPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
index 064362eabbf83..fa9c0cc7708e6 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
@@ -61,6 +61,7 @@
; CHECK-O-NEXT: Running analysis: TypeBasedAA
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
+; CHECK-O-NEXT: Running pass: ExpandVariadicsPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
index 19a44867e434a..8bd372f4c0691 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -50,6 +50,7 @@
; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
+; CHECK-O-NEXT: Running pass: ExpandVariadicsPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index ac80a31d8fd4b..dd99340e60c0d 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -58,7 +58,7 @@
; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo
-
+; CHECK-O-NEXT: Running pass: ExpandVariadicsPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
index 6486639e07b49..d2a53cea68c37 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
@@ -92,6 +92,7 @@
; CHECK-O-NEXT: Running analysis: TypeBasedAA
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
+; CHECK-O-NEXT: Running pass: ExpandVariadicsPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
index 09f9f0f48badd..cc110b01fb244 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -83,6 +83,7 @@
; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion on
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on foo
+; CHECK-O-NEXT: Running pass: ExpandVariadicsPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
index 47bdbfd2d357d..d2c547e04085e 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
@@ -63,6 +63,7 @@
; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo
+; CHECK-O-NEXT: Running pass: ExpandVariadicsPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
diff --git a/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll
new file mode 100644
index 0000000000000..8a841daef832c
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll
@@ -0,0 +1,239 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s --check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s --check-prefixes=ABI
+
+; Split variadic functions into two functions:
+; - one equivalent to the original, same symbol etc
+; - one implementing the contents of the original but taking a valist
+; IR here is applicable to any target that uses a ptr for valist
+;
+; Defines a function with each linkage (in the order of the llvm documentation).
+; If split applies it does the same transform to each.
+; Whether split applies depends on whether the ABI is being changed or not - e.g. a weak
+; function is not normally useful to split as the contents cannot be called from elsewhere.
+; If the ABI is being rewritten then the function is still converted. Call sites tested elsewhere.
+
+; Update test checks doesn't emit checks for declares
+
+declare void @sink_valist(ptr)
+declare void @llvm.va_start(ptr)
+declare void @llvm.va_end(ptr)
+
+declare void @decl_simple(...)
+define void @defn_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: tail call void @defn_simple.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for private
+define private void @defn_private_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_private_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: tail call void @defn_private_simple.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_private_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for internal
+define internal void @defn_internal_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_internal_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: tail call void @defn_internal_simple.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_internal_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for available_externally
+define available_externally void @available_externally_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@available_externally_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: tail call void @available_externally_simple.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@available_externally_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for linkonce
+define linkonce void @defn_linkonce_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_linkonce_simple(...) {
+; OPT-NEXT: %va = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va)
+; OPT-NEXT: call void @sink_valist(ptr %va)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_linkonce_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for weak
+define weak void @defn_weak_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_weak_simple(...) {
+; OPT-NEXT: %va = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va)
+; OPT-NEXT: call void @sink_valist(ptr %va)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_weak_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; common is not applicable to functions
+; appending is not applicable to functions
+
+declare extern_weak void @decl_extern_weak_simple(...)
+; no define for extern_weak
+
+; no declare for linkonce_odr
+define linkonce_odr void @defn_linkonce_odr_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_linkonce_odr_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: tail call void @defn_linkonce_odr_simple.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_linkonce_odr_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+; no declare for weak_odr
+define weak_odr void @defn_weak_odr_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_weak_odr_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: tail call void @defn_weak_odr_simple.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_weak_odr_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+declare external void @decl_external_simple(...)
+define external void @defn_external_simple(...) {
+; OPT-LABEL: define {{[^@]+}}@defn_external_simple(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: tail call void @defn_external_simple.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: define {{[^@]+}}@defn_external_simple(ptr %varargs) {
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: call void @sink_valist(ptr %va)
+; ABI-NEXT: ret void
+;
+ %va = alloca ptr, align 4
+ call void @llvm.va_start(ptr %va)
+ call void @sink_valist(ptr %va)
+ call void @llvm.va_end(ptr %va)
+ ret void
+}
+
+
+
diff --git a/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll
new file mode 100644
index 0000000000000..7a40e1290a8ba
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll
@@ -0,0 +1,212 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s --check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s --check-prefixes=ABI
+
+
+; Examples are variadic functions that return the first or the second of an int and a double
+; Split the functions into an internal equivalent that takes a va_list and a ABI preserving wrapper
+
+define i32 @variadic_int_double_get_firstz(...) {
+; OPT-LABEL: define {{[^@]+}}@variadic_int_double_get_firstz(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: %0 = tail call i32 @variadic_int_double_get_firstz.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret i32 %0
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_int_double_get_firstz(ptr %varargs) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; ABI-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; ABI-NEXT: store ptr %argp.next, ptr %va, align 4
+; ABI-NEXT: %0 = load i32, ptr %argp.cur, align 4
+; ABI-NEXT: ret i32 %0
+;
+entry:
+ %va = alloca ptr, align 4
+ call void @llvm.va_start.p0(ptr nonnull %va)
+ %argp.cur = load ptr, ptr %va, align 4
+ %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+ store ptr %argp.next, ptr %va, align 4
+ %0 = load i32, ptr %argp.cur, align 4
+ call void @llvm.va_end.p0(ptr %va)
+ ret i32 %0
+}
+
+; CHECK-LABEL: define i32 @variadic_int_double_get_firstz(...) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va_list = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; CHECK-NEXT: %0 = tail call i32 @variadic_int_double_get_firstz.valist(ptr %va_list)
+; CHECK-NEXT: ret i32 %0
+; CHECK-NEXT: }
+
+; CHECK-LABEL: define internal i32 @variadic_int_double_get_firstz.valist(ptr noalias %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va = alloca ptr, align 4
+; CHECK-NEXT: store ptr %varargs, ptr %va, align 4
+; CHECK-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; CHECK-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; CHECK-NEXT: store ptr %argp.next, ptr %va, align 4
+; CHECK-NEXT: %0 = load i32, ptr %argp.cur, align 4
+; CHECK-NEXT: ret i32 %0
+; CHECK-NEXT: }
+
+define double @variadic_int_double_get_secondz(...) {
+; OPT-LABEL: define {{[^@]+}}@variadic_int_double_get_secondz(...) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %va_list = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_list)
+; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; OPT-NEXT: %0 = tail call double @variadic_int_double_get_secondz.valist(ptr %va_list)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_list)
+; OPT-NEXT: ret double %0
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_int_double_get_secondz(ptr %varargs) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %va = alloca ptr, align 4
+; ABI-NEXT: store ptr %varargs, ptr %va, align 4
+; ABI-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; ABI-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; ABI-NEXT: %argp.next2 = getelementptr inbounds i8, ptr %argp.cur, i32 12
+; ABI-NEXT: store ptr %argp.next2, ptr %va, align 4
+; ABI-NEXT: %0 = load double, ptr %argp.next, align 4
+; ABI-NEXT: ret double %0
+;
+entry:
+ %va = alloca ptr, align 4
+ call void @llvm.va_start.p0(ptr nonnull %va)
+ %argp.cur = load ptr, ptr %va, align 4
+ %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+ %argp.next2 = getelementptr inbounds i8, ptr %argp.cur, i32 12
+ store ptr %argp.next2, ptr %va, align 4
+ %0 = load double, ptr %argp.next, align 4
+ call void @llvm.va_end.p0(ptr %va)
+ ret double %0
+}
+
+; CHECK-LABEL: define double @variadic_int_double_get_secondz(...) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va_list = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.va_start.p0(ptr %va_list)
+; CHECK-NEXT: %0 = tail call double @variadic_int_double_get_secondz.valist(ptr %va_list)
+; CHECK-NEXT: ret double %0
+; CHECK-NEXT: }
+
+; CHECK-LABEL: define internal double @variadic_int_double_get_secondz.valist(ptr noalias %varargs) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va = alloca ptr, align 4
+; CHECK-NEXT: store ptr %varargs, ptr %va, align 4
+; CHECK-NEXT: %argp.cur = load ptr, ptr %va, align 4
+; CHECK-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4
+; CHECK-NEXT: %argp.next2 = getelementptr inbounds i8, ptr %argp.cur, i32 12
+; CHECK-NEXT: store ptr %argp.next2, ptr %va, align 4
+; CHECK-NEXT: %0 = load double, ptr %argp.next, align 4
+; CHECK-NEXT: ret double %0
+; CHECK-NEXT: }
+
+
+; CHECK-LABEL: @variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %variadic_can_get_firstIidEEbT_T0_.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store double %y, ptr %1, align 4
+; CHECK-NEXT: %call = call i32 @variadic_int_double_get_firstz.valist(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %cmp.i = icmp eq i32 %call, %x
+; CHECK-NEXT: ret i1 %cmp.i
+; CHECK-NEXT: }
+
+define zeroext i1 @variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; OPT-LABEL: define {{[^@]+}}@variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %vararg_buffer = alloca %variadic_can_get_firstIidEEbT_T0_.vararg, align 16
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %0 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; OPT-NEXT: store i32 %x, ptr %0, align 4
+; OPT-NEXT: %1 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; OPT-NEXT: store double %y, ptr %1, align 8
+; OPT-NEXT: %call = call i32 @variadic_int_double_get_firstz.valist(ptr %vararg_buffer)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %cmp.i = icmp eq i32 %call, %x
+; OPT-NEXT: ret i1 %cmp.i
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %vararg_buffer = alloca %variadic_can_get_firstIidEEbT_T0_.vararg, align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %0 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; ABI-NEXT: store i32 %x, ptr %0, align 4
+; ABI-NEXT: %1 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; ABI-NEXT: store double %y, ptr %1, align 8
+; ABI-NEXT: %call = call i32 @variadic_int_double_get_firstz(ptr %vararg_buffer)
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %cmp.i = icmp eq i32 %call, %x
+; ABI-NEXT: ret i1 %cmp.i
+;
+entry:
+ %call = call i32 (...) @variadic_int_double_get_firstz(i32 %x, double %y)
+ %cmp.i = icmp eq i32 %call, %x
+ ret i1 %cmp.i
+}
+
+; CHECK-LABEL: @variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %variadic_can_get_secondIidEEbT_T0_.vararg, align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %0 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store double %y, ptr %1, align 4
+; CHECK-NEXT: %call = call double @variadic_int_double_get_secondz.valist(ptr %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer)
+; CHECK-NEXT: %cmp.i = fcmp oeq double %call, %y
+; CHECK-NEXT: ret i1 %cmp.i
+; CHECK-NEXT: }
+
+define zeroext i1 @variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; OPT-LABEL: define {{[^@]+}}@variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; OPT-NEXT: entry:
+; OPT-NEXT: %vararg_buffer = alloca %variadic_can_get_secondIidEEbT_T0_.vararg, align 16
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %0 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; OPT-NEXT: store i32 %x, ptr %0, align 4
+; OPT-NEXT: %1 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; OPT-NEXT: store double %y, ptr %1, align 8
+; OPT-NEXT: %call = call double @variadic_int_double_get_secondz.valist(ptr %vararg_buffer)
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; OPT-NEXT: %cmp.i = fcmp oeq double %call, %y
+; OPT-NEXT: ret i1 %cmp.i
+;
+; ABI-LABEL: define {{[^@]+}}@variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) {
+; ABI-NEXT: entry:
+; ABI-NEXT: %vararg_buffer = alloca %variadic_can_get_secondIidEEbT_T0_.vararg, align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %0 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0
+; ABI-NEXT: store i32 %x, ptr %0, align 4
+; ABI-NEXT: %1 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2
+; ABI-NEXT: store double %y, ptr %1, align 8
+; ABI-NEXT: %call = call double @variadic_int_double_get_secondz(ptr %vararg_buffer)
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer)
+; ABI-NEXT: %cmp.i = fcmp oeq double %call, %y
+; ABI-NEXT: ret i1 %cmp.i
+;
+entry:
+ %call = call double (...) @variadic_int_double_get_secondz(i32 %x, double %y)
+ %cmp.i = fcmp oeq double %call, %y
+ ret i1 %cmp.i
+}
+
+; Declaration unchanged
+; CHECK: declare void @variadic_without_callers(...)
+declare void @variadic_without_callers(...)
+
+declare void @llvm.va_start.p0(ptr)
+declare void @llvm.va_end.p0(ptr)
diff --git a/llvm/test/Transforms/ExpandVariadics/indirect-calls.ll b/llvm/test/Transforms/ExpandVariadics/indirect-calls.ll
new file mode 100644
index 0000000000000..0310adf936d84
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/indirect-calls.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI
+
+declare void @vararg(...)
+ at vararg_ptr = hidden global ptr @vararg, align 4
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+define hidden void @fptr_single_i32(i32 noundef %x) {
+; OPT-LABEL: @fptr_single_i32(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; OPT-NEXT: tail call void (...) [[TMP0]](i32 noundef [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @fptr_single_i32(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[FPTR_SINGLE_I32_VARARG:%.*]], align 16
+; ABI-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[FPTR_SINGLE_I32_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void [[TMP0]](ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(i32 noundef %x)
+ ret void
+}
+
+define hidden void @fptr_libcS(ptr noundef byval(%struct.libcS) align 8 %x) {
+; OPT-LABEL: @fptr_libcS(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; OPT-NEXT: tail call void (...) [[TMP0]](ptr noundef nonnull byval([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @fptr_libcS(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[FPTR_LIBCS_VARARG:%.*]], align 16
+; ABI-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[INDIRECTALLOCA]], ptr [[X:%.*]], i64 24, i1 false)
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[FPTR_LIBCS_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void [[TMP0]](ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ %0 = load volatile ptr, ptr @vararg_ptr, align 4
+ tail call void (...) %0(ptr noundef nonnull byval(%struct.libcS) align 8 %x)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/intrinsics.ll b/llvm/test/Transforms/ExpandVariadics/intrinsics.ll
new file mode 100644
index 0000000000000..172a6ecc900e0
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/intrinsics.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=CHECK,OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=CHECK,ABI
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_copy.p0(ptr, ptr)
+
+declare void @valist(ptr noundef)
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.va_start.p0(ptr)
+
+declare void @llvm.va_end.p0(ptr)
+
+
+define void @start_once(...) {
+; OPT-LABEL: @start_once(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[VA_LIST:%.*]] = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VA_LIST]])
+; OPT-NEXT: call void @llvm.va_start.p0(ptr [[VA_LIST]])
+; OPT-NEXT: tail call void @start_once.valist(ptr [[VA_LIST]])
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VA_LIST]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @start_once(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[S:%.*]] = alloca ptr, align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[S]])
+; ABI-NEXT: store ptr [[VARARGS:%.*]], ptr [[S]], align 4
+; ABI-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S]], align 4
+; ABI-NEXT: call void @valist(ptr noundef [[TMP0]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[S]])
+; ABI-NEXT: ret void
+;
+entry:
+ %s = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s)
+ call void @llvm.va_start.p0(ptr nonnull %s)
+ %0 = load ptr, ptr %s, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s)
+ ret void
+}
+
+
+define void @start_twice(...) {
+; OPT-LABEL: @start_twice(
+; OPT-NEXT: entry:
+; OPT-NEXT: [[VA_LIST:%.*]] = alloca ptr, align 4
+; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VA_LIST]])
+; OPT-NEXT: call void @llvm.va_start.p0(ptr [[VA_LIST]])
+; OPT-NEXT: tail call void @start_twice.valist(ptr [[VA_LIST]])
+; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VA_LIST]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @start_twice(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[S0:%.*]] = alloca ptr, align 4
+; ABI-NEXT: [[S1:%.*]] = alloca ptr, align 4
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[S0]])
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[S1]])
+; ABI-NEXT: store ptr [[VARARGS:%.*]], ptr [[S0]], align 4
+; ABI-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S0]], align 4
+; ABI-NEXT: call void @valist(ptr noundef [[TMP0]])
+; ABI-NEXT: store ptr [[VARARGS]], ptr [[S1]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S1]], align 4
+; ABI-NEXT: call void @valist(ptr noundef [[TMP1]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[S1]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[S0]])
+; ABI-NEXT: ret void
+;
+entry:
+ %s0 = alloca ptr, align 4
+ %s1 = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s0)
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.va_start.p0(ptr nonnull %s0)
+ %0 = load ptr, ptr %s0, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.va_end.p0(ptr %s0)
+ call void @llvm.va_start.p0(ptr nonnull %s1)
+ %1 = load ptr, ptr %s1, align 4
+ call void @valist(ptr noundef %1)
+ call void @llvm.va_end.p0(ptr %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s1)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s0)
+ ret void
+}
+
+define void @copy(ptr noundef %va) {
+; CHECK-LABEL: @copy(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VA_ADDR:%.*]] = alloca ptr, align 4
+; CHECK-NEXT: [[CP:%.*]] = alloca ptr, align 4
+; CHECK-NEXT: store ptr [[VA:%.*]], ptr [[VA_ADDR]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[CP]])
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr [[CP]], ptr [[VA_ADDR]], i32 4, i1 false)
+; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CP]], align 4
+; CHECK-NEXT: call void @valist(ptr noundef [[TMP0]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[CP]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %va.addr = alloca ptr, align 4
+ %cp = alloca ptr, align 4
+ store ptr %va, ptr %va.addr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp)
+ call void @llvm.va_copy.p0(ptr nonnull %cp, ptr nonnull %va.addr)
+ %0 = load ptr, ptr %cp, align 4
+ call void @valist(ptr noundef %0)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/pass-byval.ll b/llvm/test/Transforms/ExpandVariadics/pass-byval.ll
new file mode 100644
index 0000000000000..b21f83fd75587
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/pass-byval.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI
+
+
+; CHECK: @sink
+declare void @sink(...)
+
+
+define void @pass_byval(ptr byval(i32) %b) {
+; OPT-LABEL: @pass_byval(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(ptr byval(i32) [[B:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_byval(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_BYVAL_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP0]], ptr [[B:%.*]], i64 4, i1 false)
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(ptr byval(i32) %b)
+ ret void
+}
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+define void @i32_libcS(i32 %x, ptr noundef byval(%struct.libcS) align 8 %y) {
+; OPT-LABEL: @i32_libcS(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]], ptr byval([[STRUCT_LIBCS:%.*]]) align 8 [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @i32_libcS(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[I32_LIBCS_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[INDIRECTALLOCA]], ptr [[Y:%.*]], i64 24, i1 false)
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[I32_LIBCS_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[I32_LIBCS_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %x, ptr byval(%struct.libcS) align 8 %y)
+ ret void
+}
+
+define void @libcS_i32(ptr byval(%struct.libcS) align 8 %x, i32 %y) {
+; OPT-LABEL: @libcS_i32(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(ptr byval([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]], i32 [[Y:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @libcS_i32(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[LIBCS_I32_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[INDIRECTALLOCA]], ptr [[X:%.*]], i64 24, i1 false)
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[LIBCS_I32_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[LIBCS_I32_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i32 [[Y:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(ptr byval(%struct.libcS) align 8 %x, i32 %y)
+ ret void
+}
diff --git a/llvm/test/Transforms/ExpandVariadics/pass-integers.ll b/llvm/test/Transforms/ExpandVariadics/pass-integers.ll
new file mode 100644
index 0000000000000..5b1beee9c327a
--- /dev/null
+++ b/llvm/test/Transforms/ExpandVariadics/pass-integers.ll
@@ -0,0 +1,344 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT
+; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI
+
+; Wasm passes struct {char} as an i8 so can check the varargs passing works on integers smaller than the slot size
+
+declare void @sink(...)
+
+
+define void @pass_nothing() {
+; OPT-LABEL: @pass_nothing(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink()
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_nothing(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_NOTHING_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink()
+ ret void
+}
+
+define void @pass_s1(i8 %x) {
+; OPT-LABEL: @pass_s1(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i8 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s1(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S1_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S1_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i8 [[X:%.*]], ptr [[TMP0]], align 1
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i8 %x)
+ ret void
+}
+
+define void @pass_s2(i16 %x) {
+; OPT-LABEL: @pass_s2(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i16 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s2(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S2_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S2_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i16 [[X:%.*]], ptr [[TMP0]], align 2
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i16 %x)
+ ret void
+}
+
+define void @pass_s3(i32 %x) {
+; OPT-LABEL: @pass_s3(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s3(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S3_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S3_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %x)
+ ret void
+}
+
+define void @pass_s4(i64 %x) {
+; OPT-LABEL: @pass_s4(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i64 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s4(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S4_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S4_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i64 [[X:%.*]], ptr [[TMP0]], align 8
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i64 %x)
+ ret void
+}
+
+define void @pass_s5(<4 x i32> %x) {
+; OPT-LABEL: @pass_s5(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(<4 x i32> [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_s5(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S5_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S5_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store <4 x i32> [[X:%.*]], ptr [[TMP0]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(<4 x i32> %x)
+ ret void
+}
+
+define void @pass_int_s1(i32 %i, i8 %x) {
+; OPT-LABEL: @pass_int_s1(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i8 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s1(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S1_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 5, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S1_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S1_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i8 [[X:%.*]], ptr [[TMP1]], align 1
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 5, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i8 %x)
+ ret void
+}
+
+define void @pass_int_s2(i32 %i, i16 %x) {
+; OPT-LABEL: @pass_int_s2(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i16 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s2(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S2_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 6, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S2_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S2_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i16 [[X:%.*]], ptr [[TMP1]], align 2
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 6, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i16 %x)
+ ret void
+}
+
+define void @pass_int_s3(i32 %i, i32 %x) {
+; OPT-LABEL: @pass_int_s3(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i32 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s3(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S3_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S3_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S3_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP1]], align 4
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i32 %x)
+ ret void
+}
+
+define void @pass_int_s4(i32 %i, i64 %x) {
+; OPT-LABEL: @pass_int_s4(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i64 [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s4(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S4_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S4_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S4_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store i64 [[X:%.*]], ptr [[TMP1]], align 8
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i64 %x)
+ ret void
+}
+
+define void @pass_int_s5(i32 %i, <4 x i32> %x) {
+; OPT-LABEL: @pass_int_s5(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], <4 x i32> [[X:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_int_s5(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S5_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S5_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S5_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store <4 x i32> [[X:%.*]], ptr [[TMP1]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, <4 x i32> %x)
+ ret void
+}
+
+define void @pass_asc(i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> %x5) {
+; OPT-LABEL: @pass_asc(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i8 [[X1:%.*]], i16 [[X2:%.*]], i32 [[X3:%.*]], i64 [[X4:%.*]], <4 x i32> [[X5:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_asc(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_ASC_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 48, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i8 [[X1:%.*]], ptr [[TMP0]], align 1
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store i16 [[X2:%.*]], ptr [[TMP1]], align 2
+; ABI-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 4
+; ABI-NEXT: store i32 [[X3:%.*]], ptr [[TMP2]], align 4
+; ABI-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 6
+; ABI-NEXT: store i64 [[X4:%.*]], ptr [[TMP3]], align 8
+; ABI-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 8
+; ABI-NEXT: store <4 x i32> [[X5:%.*]], ptr [[TMP4]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 48, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> %x5)
+ ret void
+}
+
+define void @pass_dsc(<4 x i32> %x0, i64 %x1, i32 %x2, i16 %x3, i8 %x4) {
+; OPT-LABEL: @pass_dsc(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(<4 x i32> [[X0:%.*]], i64 [[X1:%.*]], i32 [[X2:%.*]], i16 [[X3:%.*]], i8 [[X4:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_dsc(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_DSC_VARARG:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 33, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store <4 x i32> [[X0:%.*]], ptr [[TMP0]], align 16
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i64 [[X1:%.*]], ptr [[TMP1]], align 8
+; ABI-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2
+; ABI-NEXT: store i32 [[X2:%.*]], ptr [[TMP2]], align 4
+; ABI-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 3
+; ABI-NEXT: store i16 [[X3:%.*]], ptr [[TMP3]], align 2
+; ABI-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 5
+; ABI-NEXT: store i8 [[X4:%.*]], ptr [[TMP4]], align 1
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 33, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(<4 x i32> %x0, i64 %x1, i32 %x2, i16 %x3, i8 %x4)
+ ret void
+}
+
+define void @pass_multiple(i32 %i, i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> %x5) {
+; OPT-LABEL: @pass_multiple(
+; OPT-NEXT: entry:
+; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i16 [[X2:%.*]], i64 [[X4:%.*]])
+; OPT-NEXT: tail call void (...) @sink(i32 [[I]], i8 [[X1:%.*]], i32 [[X3:%.*]], <4 x i32> [[X5:%.*]])
+; OPT-NEXT: ret void
+;
+; ABI-LABEL: @pass_multiple(
+; ABI-NEXT: entry:
+; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_MULTIPLE_VARARG:%.*]], align 16
+; ABI-NEXT: [[VARARG_BUFFER1:%.*]] = alloca [[PASS_MULTIPLE_VARARG_0:%.*]], align 16
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4
+; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1
+; ABI-NEXT: store i16 [[X2:%.*]], ptr [[TMP1]], align 2
+; ABI-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 3
+; ABI-NEXT: store i64 [[X4:%.*]], ptr [[TMP2]], align 8
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]])
+; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[VARARG_BUFFER1]])
+; ABI-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 0
+; ABI-NEXT: store i32 [[I]], ptr [[TMP3]], align 4
+; ABI-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 1
+; ABI-NEXT: store i8 [[X1:%.*]], ptr [[TMP4]], align 1
+; ABI-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 3
+; ABI-NEXT: store i32 [[X3:%.*]], ptr [[TMP5]], align 4
+; ABI-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 5
+; ABI-NEXT: store <4 x i32> [[X5:%.*]], ptr [[TMP6]], align 16
+; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER1]])
+; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[VARARG_BUFFER1]])
+; ABI-NEXT: ret void
+;
+entry:
+ tail call void (...) @sink(i32 %i, i16 %x2, i64 %x4)
+ tail call void (...) @sink(i32 %i, i8 %x1, i32 %x3, <4 x i32> %x5)
+ ret void
+}
diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
index 0d134c7bdffb7..bcf2ea7510568 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
@@ -33,6 +33,7 @@ static_library("IPO") {
"DeadArgumentElimination.cpp",
"ElimAvailExtern.cpp",
"EmbedBitcodePass.cpp",
+ "ExpandVariadics.cpp",
"ExtractGV.cpp",
"ForceFunctionAttrs.cpp",
"FunctionAttrs.cpp",
More information about the llvm-commits
mailing list