[clang] [llvm] [transforms] Inline simple variadic functions (PR #81058)
Jon Chesterfield via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 7 17:13:25 PST 2024
https://github.com/JonChesterfield updated https://github.com/llvm/llvm-project/pull/81058
>From 54b09d56f64795ed8c6966e54f169110a0d4fb6e Mon Sep 17 00:00:00 2001
From: Jon Chesterfield <jonathanchesterfield at gmail.com>
Date: Wed, 7 Feb 2024 13:47:19 +0000
Subject: [PATCH] [transforms] Inline simple variadic functions
---
clang/test/CodeGen/expand-variadic-call.c | 273 +++++++
clang/test/CodeGen/variadic-wrapper-removal.c | 86 +++
.../CodeGenCXX/inline-then-fold-variadics.cpp | 117 +++
llvm/include/llvm/CodeGen/Passes.h | 4 +
llvm/include/llvm/InitializePasses.h | 1 +
.../llvm/Transforms/IPO/ExpandVariadics.h | 17 +
llvm/lib/Passes/PassBuilder.cpp | 1 +
llvm/lib/Passes/PassRegistry.def | 1 +
llvm/lib/Transforms/IPO/CMakeLists.txt | 1 +
llvm/lib/Transforms/IPO/ExpandVariadics.cpp | 715 ++++++++++++++++++
.../X86/expand-variadic-call-i386-darwin.ll | 385 ++++++++++
.../X86/expand-variadic-call-i386-linux.ll | 385 ++++++++++
.../X86/expand-variadic-call-i686-msvc.ll | 402 ++++++++++
.../X86/expand-variadic-call-x64-darwin.ll | 589 +++++++++++++++
.../X86/expand-variadic-call-x64-linux.ll | 589 +++++++++++++++
.../llvm/lib/Transforms/IPO/BUILD.gn | 1 +
16 files changed, 3567 insertions(+)
create mode 100644 clang/test/CodeGen/expand-variadic-call.c
create mode 100644 clang/test/CodeGen/variadic-wrapper-removal.c
create mode 100644 clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
create mode 100644 llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
create mode 100644 llvm/lib/Transforms/IPO/ExpandVariadics.cpp
create mode 100644 llvm/test/CodeGen/X86/expand-variadic-call-i386-darwin.ll
create mode 100644 llvm/test/CodeGen/X86/expand-variadic-call-i386-linux.ll
create mode 100644 llvm/test/CodeGen/X86/expand-variadic-call-i686-msvc.ll
create mode 100644 llvm/test/CodeGen/X86/expand-variadic-call-x64-darwin.ll
create mode 100644 llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll
diff --git a/clang/test/CodeGen/expand-variadic-call.c b/clang/test/CodeGen/expand-variadic-call.c
new file mode 100644
index 00000000000000..fa2b984bec08a5
--- /dev/null
+++ b/clang/test/CodeGen/expand-variadic-call.c
@@ -0,0 +1,273 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-cpu x86-64-v4 -std=c23 -O1 -ffreestanding -emit-llvm -o - %s | FileCheck %s
+
+// This test sanity checks calling a variadic function with the expansion transform disabled.
+// The IR test cases {arch}/expand-variadic-call-*.ll correspond to IR generated from this test case.
+
+typedef __builtin_va_list va_list;
+#define va_copy(dest, src) __builtin_va_copy(dest, src)
+#define va_start(ap, ...) __builtin_va_start(ap, 0)
+#define va_end(ap) __builtin_va_end(ap)
+#define va_arg(ap, type) __builtin_va_arg(ap, type)
+
+// 32 bit x86 alignment uses getTypeStackAlign for special cases
+// Whitebox testing.
+// Needs a type >= 16 which is either a simd or a struct containing a simd
+// darwinvectorabi should force 4 bytes
+// linux vectors with align 16/32/64 return that alignment
+
+
+void wrapped(va_list);
+
+// CHECK-LABEL: @codegen_for_copy(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[CP:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[CP]]) #[[ATTR7:[0-9]+]]
+// CHECK-NEXT: call void @llvm.va_copy(ptr nonnull [[CP]], ptr [[X:%.*]])
+// CHECK-NEXT: call void @wrapped(ptr noundef nonnull [[CP]]) #[[ATTR8:[0-9]+]]
+// CHECK-NEXT: call void @llvm.va_end(ptr [[CP]])
+// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[CP]]) #[[ATTR7]]
+// CHECK-NEXT: ret void
+//
+void codegen_for_copy(va_list x)
+{
+ va_list cp;
+ va_copy(cp, x);
+ wrapped(cp);
+ va_end(cp);
+}
+
+
+// CHECK-LABEL: @vararg(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[VA:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[VA]]) #[[ATTR7]]
+// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VA]])
+// CHECK-NEXT: call void @wrapped(ptr noundef nonnull [[VA]]) #[[ATTR8]]
+// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[VA]]) #[[ATTR7]]
+// CHECK-NEXT: ret void
+//
+ void vararg(...)
+{
+ va_list va;
+ __builtin_va_start(va, 0);
+ wrapped(va);
+ va_end(va);
+}
+
+// vectors with alignment 16/32/64 are natively aligned on linux x86
+// v32f32 would be a m1024 type, larger than x64 defines at time of writing
+typedef int i32;
+typedef float v4f32 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef float v8f32 __attribute__((__vector_size__(32), __aligned__(32)));
+typedef float v16f32 __attribute__((__vector_size__(64), __aligned__(64)));
+typedef float v32f32 __attribute__((__vector_size__(128), __aligned__(128)));
+
+
+// Pass a single value to wrapped() via vararg(...)
+// CHECK-LABEL: @single_i32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void (...) @vararg(i32 noundef [[X:%.*]]) #[[ATTR9:[0-9]+]]
+// CHECK-NEXT: ret void
+//
+void single_i32(i32 x)
+{
+ vararg(x);
+}
+
+// CHECK-LABEL: @single_double(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void (...) @vararg(double noundef [[X:%.*]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void single_double(double x)
+{
+ vararg(x);
+}
+
+// CHECK-LABEL: @single_v4f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void (...) @vararg(<4 x float> noundef [[X:%.*]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void single_v4f32(v4f32 x)
+{
+ vararg(x);
+}
+
+// CHECK-LABEL: @single_v8f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void (...) @vararg(<8 x float> noundef [[X:%.*]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void single_v8f32(v8f32 x)
+{
+ vararg(x);
+}
+
+// CHECK-LABEL: @single_v16f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void (...) @vararg(<16 x float> noundef [[X:%.*]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void single_v16f32(v16f32 x)
+{
+ vararg(x);
+}
+
+// CHECK-LABEL: @single_v32f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[INDIRECT_ARG_TEMP:%.*]] = alloca <32 x float>, align 128
+// CHECK-NEXT: [[X:%.*]] = load <32 x float>, ptr [[TMP0:%.*]], align 128, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT: store <32 x float> [[X]], ptr [[INDIRECT_ARG_TEMP]], align 128, !tbaa [[TBAA2]]
+// CHECK-NEXT: tail call void (...) @vararg(ptr noundef nonnull byval(<32 x float>) align 128 [[INDIRECT_ARG_TEMP]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void single_v32f32(v32f32 x)
+{
+ vararg(x);
+}
+
+
+
+// CHECK-LABEL: @i32_double(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void (...) @vararg(i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void i32_double(i32 x, double y)
+{
+ vararg(x, y);
+}
+
+// CHECK-LABEL: @double_i32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void (...) @vararg(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void double_i32(double x, i32 y)
+{
+ vararg(x, y);
+}
+
+
+// A struct used by libc variadic tests
+
+typedef struct {
+ char c;
+ short s;
+ int i;
+ long l;
+ float f;
+ double d;
+} libcS;
+
+// CHECK-LABEL: @i32_libcS(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void (...) @vararg(i32 noundef [[X:%.*]], ptr noundef nonnull byval([[STRUCT_LIBCS:%.*]]) align 8 [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void i32_libcS(i32 x, libcS y)
+{
+ vararg(x, y);
+}
+
+// CHECK-LABEL: @libcS_i32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void (...) @vararg(ptr noundef nonnull byval([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void libcS_i32(libcS x, i32 y)
+{
+ vararg(x, y);
+}
+
+
+// CHECK-LABEL: @i32_v4f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void (...) @vararg(i32 noundef [[X:%.*]], <4 x float> noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void i32_v4f32(i32 x, v4f32 y)
+{
+ vararg(x, y);
+}
+
+// CHECK-LABEL: @v4f32_i32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void (...) @vararg(<4 x float> noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void v4f32_i32(v4f32 x, i32 y)
+{
+ vararg(x, y);
+}
+
+// CHECK-LABEL: @i32_v8f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void (...) @vararg(i32 noundef [[X:%.*]], <8 x float> noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void i32_v8f32(i32 x, v8f32 y)
+{
+ vararg(x, y);
+}
+
+// CHECK-LABEL: @v8f32_i32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void (...) @vararg(<8 x float> noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void v8f32_i32(v8f32 x, i32 y)
+{
+ vararg(x, y);
+}
+
+// CHECK-LABEL: @i32_v16f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void (...) @vararg(i32 noundef [[X:%.*]], <16 x float> noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void i32_v16f32(i32 x, v16f32 y)
+{
+ vararg(x, y);
+}
+
+// CHECK-LABEL: @v16f32_i32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void (...) @vararg(<16 x float> noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void v16f32_i32(v16f32 x, i32 y)
+{
+ vararg(x, y);
+}
+
+// CHECK-LABEL: @i32_v32f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[INDIRECT_ARG_TEMP:%.*]] = alloca <32 x float>, align 128
+// CHECK-NEXT: [[Y:%.*]] = load <32 x float>, ptr [[TMP0:%.*]], align 128, !tbaa [[TBAA2]]
+// CHECK-NEXT: store <32 x float> [[Y]], ptr [[INDIRECT_ARG_TEMP]], align 128, !tbaa [[TBAA2]]
+// CHECK-NEXT: tail call void (...) @vararg(i32 noundef [[X:%.*]], ptr noundef nonnull byval(<32 x float>) align 128 [[INDIRECT_ARG_TEMP]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void i32_v32f32(i32 x, v32f32 y)
+{
+ vararg(x, y);
+}
+
+// CHECK-LABEL: @v32f32_i32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[INDIRECT_ARG_TEMP:%.*]] = alloca <32 x float>, align 128
+// CHECK-NEXT: [[X:%.*]] = load <32 x float>, ptr [[TMP0:%.*]], align 128, !tbaa [[TBAA2]]
+// CHECK-NEXT: store <32 x float> [[X]], ptr [[INDIRECT_ARG_TEMP]], align 128, !tbaa [[TBAA2]]
+// CHECK-NEXT: tail call void (...) @vararg(ptr noundef nonnull byval(<32 x float>) align 128 [[INDIRECT_ARG_TEMP]], i32 noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT: ret void
+//
+void v32f32_i32(v32f32 x, i32 y)
+{
+ vararg(x, y);
+}
diff --git a/clang/test/CodeGen/variadic-wrapper-removal.c b/clang/test/CodeGen/variadic-wrapper-removal.c
new file mode 100644
index 00000000000000..da41dde16f3d73
--- /dev/null
+++ b/clang/test/CodeGen/variadic-wrapper-removal.c
@@ -0,0 +1,86 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O1 -emit-llvm -o - %s | opt --passes=expand-variadics -S | FileCheck %s
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu -O1 -emit-llvm -o - %s | opt --passes=expand-variadics -S | FileCheck %s
+
+// neither arm arch is implemented yet, leaving it here as a reminder
+// armv6 is a ptr as far as the struct is concerned, but possibly also a [1 x i32] passed by value
+// that seems insistent, maybe leave 32 bit arm alone for now
+// aarch64 is a struct of five things passed using byval memcpy
+
+// R-N: %clang_cc1 -triple=armv6-none--eabi -O1 -emit-llvm -o - %s | opt --passes=expand-variadics -S | FileCheck %s
+
+// R-N: %clang_cc1 -triple=aarch64-none-linux-gnu -O1 -emit-llvm -o - %s | opt --passes=expand-variadics -S | FileCheck %s
+
+
+
+// expand-variadics rewrites calls to variadic functions into calls to
+// equivalent functions that take a va_list argument. A property of the
+// implementation is that said "equivalent function" may be a pre-existing one.
+// This is equivalent to inlining a sufficiently simple variadic wrapper.
+
+#include <stdarg.h>
+
+typedef int FILE; // close enough for this test
+
+// fprintf is sometimes implemented as a call to vfprintf. That fits the
+// pattern the transform pass recognises - given an implementation of fprintf
+// in the IR module, calls to it can be rewritten into calls into vfprintf.
+
+// CHECK-LABEL: define{{.*}} i32 @fprintf(
+// CHECK-LABEL: define{{.*}} i32 @call_fprintf(
+// CHECK-NOT: @fprintf
+// CHECK: @vfprintf
+int vfprintf(FILE *restrict f, const char *restrict fmt, va_list ap);
+int fprintf(FILE *restrict f, const char *restrict fmt, ...)
+{
+ int ret;
+ va_list ap;
+ va_start(ap, fmt);
+ ret = vfprintf(f, fmt, ap);
+ va_end(ap);
+ return ret;
+}
+int call_fprintf(FILE *f)
+{
+ int x = 42;
+ double y = 3.14;
+ return fprintf(f, "int %d dbl %g\n", x, y);
+}
+
+// Void return type is also OK
+
+// CHECK-LABEL: define{{.*}} void @no_result(
+// CHECK-LABEL: define{{.*}} void @call_no_result(
+// CHECK-NOT: @no_result
+// CHECK: @vno_result
+void vno_result(const char * fmt, va_list);
+void no_result(const char * fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vno_result(fmt, ap);
+ va_end(ap);
+}
+void call_no_result(FILE *f)
+{
+ int x = 101;
+ no_result("", x);
+}
+
+// The vaend in the forwarding implementation is optional where it's a no-op
+
+// CHECK-LABEL: define{{.*}} i32 @no_vaend(
+// CHECK-LABEL: define{{.*}} i32 @call_no_vaend(
+// CHECK-NOT: @no_vaend
+// CHECK: @vno_vaend
+int vno_vaend(int x, va_list);
+int no_vaend(int x, ...)
+{
+ va_list ap;
+ va_start(ap, x);
+ return vno_vaend(x, ap);
+}
+int call_no_vaend(int x)
+{
+ return no_vaend(x, 10, 2.5f);
+}
diff --git a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
new file mode 100644
index 00000000000000..cf436ead77a2cb
--- /dev/null
+++ b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
@@ -0,0 +1,117 @@
+// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default<O1>" -S | FileCheck %s --check-prefixes=CHECK,X86Linux
+
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default<O1>" -S | FileCheck %s --check-prefixes=CHECK,X64SystemV
+
+// RUN: %clang_cc1 -triple i386-apple-darwin -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default<O1>" -S | FileCheck %s --check-prefixes=CHECK,X86Darwin
+
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default<O1>" -S | FileCheck %s --check-prefixes=CHECK,X64SystemV
+
+// RUN: %clang_cc1 -triple i686-windows-msvc -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default<O1>" -S | FileCheck %s --check-prefixes=CHECK,X86Windows
+
+// 64 bit windows va_arg passes most types indirectly but the call instruction uses the types by value
+// ___: %clang_cc1 -triple x86_64-pc-windows-msvc -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default<O1>" -S | FileCheck %s --check-prefixes=CHECK
+
+// Checks for consistency between clang and expand-variadics
+// 1. Use clang to lower va_arg
+// 2. Use expand-variadics to lower the rest of the variadic operations
+// 3. Use opt -O1 to simplify the result for simpler filecheck patterns
+// The simplification will fail when the two are not consistent, modulo bugs elsewhere.
+
+#include <stdarg.h>
+
+// This test can be simplified when expand-variadics is extended to apply to more patterns.
+// The first_valist and second_valist functions can then be inlined, either in the test or
+// by enabling optimisaton passes in the clang invocation.
+// The explicit instcombine pass canonicalises the variadic function IR.
+
+// More complicated tests need instcombine of ptrmask to land first.
+
+template <typename X, typename Y>
+static X first_valist(va_list va) {
+ return va_arg(va, X);
+}
+
+template <typename X, typename Y>
+static X first(...) {
+ va_list va;
+ __builtin_va_start(va, 0);
+ return first_valist<X,Y>(va);
+}
+
+template <typename X, typename Y>
+static Y second_valist(va_list va) {
+ va_arg(va, X);
+ Y r = va_arg(va, Y);
+ return r;
+}
+
+
+template <typename X, typename Y>
+static Y second(...) {
+ va_list va;
+ __builtin_va_start(va, 0);
+ return second_valist<X,Y>(va);
+}
+
+extern "C"
+{
+// CHECK-LABEL: define{{.*}} i32 @first_i32_i32(i32{{.*}} %x, i32{{.*}} %y)
+// CHECK: entry:
+// CHECK: ret i32 %x
+int first_i32_i32(int x, int y)
+{
+ return first<int,int>(x, y);
+}
+
+// CHECK-LABEL: define{{.*}} i32 @second_i32_i32(i32{{.*}} %x, i32{{.*}} %y)
+// CHECK: entry:
+// CHECK: ret i32 %y
+int second_i32_i32(int x, int y)
+{
+ return second<int,int>(x, y);
+}
+}
+
+// Permutations of an int and a double
+extern "C"
+{
+// CHECK-LABEL: define{{.*}} i32 @first_i32_f64(i32{{.*}} %x, double{{.*}} %y)
+// CHECK: entry:
+// CHECK: ret i32 %x
+int first_i32_f64(int x, double y)
+{
+ return first<int,double>(x, y);
+}
+
+// CHECK-LABEL: define{{.*}} double @second_i32_f64(i32{{.*}} %x, double{{.*}} %y)
+// CHECK: entry:
+
+// X86Linux: ret double %y
+// X64SystemV: ret double %y
+// X86Darwin: ret double %y
+// X86Windows: [[TMP0:%.*]] = alloca <{ [4 x i8], double }>, align 4
+// X86Windows: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4
+// X86Windows: store double %y, ptr [[TMP1]], align 4
+// X86Windows: [[TMP2:%.*]] = load double, ptr [[TMP0]], align 4
+// X86Windows: ret double [[TMP2]]
+double second_i32_f64(int x, double y)
+{
+ return second<int,double>(x, y);
+}
+
+// CHECK-LABEL: define{{.*}} double @first_f64_i32(double{{.*}} %x, i32{{.*}} %y)
+// CHECK: entry:
+// CHECK: ret double %x
+double first_f64_i32(double x, int y)
+{
+ return first<double,int>(x, y);
+}
+
+// CHECK-LABEL: define{{.*}} i32 @second_f64_i32(double{{.*}} %x, i32{{.*}} %y)
+// CHECK: entry:
+// CHECK: ret i32 %y
+int second_f64_i32(double x, int y)
+{
+ return second<double,int>(x, y);
+}
+}
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index bbfb8a0dbe26a4..fe3208df7a23b2 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -600,6 +600,10 @@ namespace llvm {
/// Lowers KCFI operand bundles for indirect calls.
FunctionPass *createKCFIPass();
+
+ // Inline variadic functions and expand variadic intrinsics.
+ ModulePass *createExpandVariadicsPass();
+
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 3db639a6872407..6487d0a5e26d1b 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -106,6 +106,7 @@ void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
void initializeExpandMemCmpLegacyPassPass(PassRegistry &);
void initializeExpandPostRAPass(PassRegistry&);
void initializeExpandReductionsPass(PassRegistry&);
+void initializeExpandVariadicsPass(PassRegistry &);
void initializeExpandVectorPredicationPass(PassRegistry &);
void initializeExternalAAWrapperPassPass(PassRegistry&);
void initializeFEntryInserterPass(PassRegistry&);
diff --git a/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
new file mode 100644
index 00000000000000..e7ffe343b940e9
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
@@ -0,0 +1,17 @@
+#ifndef LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
+#define LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Module;
+
+class ExpandVariadicsPass : public PassInfoMixin<ExpandVariadicsPass> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index c934ec42f6eb15..624fffd233ce56 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -131,6 +131,7 @@
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 44511800ccff8d..4ea9493208315a 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -59,6 +59,7 @@ MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass())
MODULE_PASS("dxil-upgrade", DXILUpgradePass())
MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))
+MODULE_PASS("expand-variadics", ExpandVariadicsPass())
MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
MODULE_PASS("function-import", FunctionImportPass())
MODULE_PASS("globalopt", GlobalOptPass())
diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
index 034f1587ae8df4..b8bd0be91d2232 100644
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -12,6 +12,7 @@ add_llvm_component_library(LLVMipo
DeadArgumentElimination.cpp
ElimAvailExtern.cpp
EmbedBitcodePass.cpp
+ ExpandVariadics.cpp
ExtractGV.cpp
ForceFunctionAttrs.cpp
FunctionAttrs.cpp
diff --git a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
new file mode 100644
index 00000000000000..ff22d99333e62a
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
@@ -0,0 +1,715 @@
+//===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an optimisation pass for variadic functions. If called from codegen,
+// it can serve as the implementation of variadic functions for a given target.
+//
+// The target-dependent parts are in namespace VariadicABIInfo. Enabling a new
+// target means adding a case to VariadicABIInfo::create() along with tests.
+//
+// The module pass using that information is class ExpandVariadics.
+//
+// The strategy is:
+// 1. Test whether a variadic function is sufficiently simple
+// 2. If it was, calls to it can be replaced with calls to a different function
+// 3. If it wasn't, try to split it into a simple function and a remainder
+// 4. Optionally rewrite the varadic function calling convention as well
+//
+// This pass considers "sufficiently simple" to mean a variadic function that
+// calls into a different function taking a va_list to do the real work. For
+// example, libc might implement fprintf as a single basic block calling into
+// vfprintf. This pass can then rewrite call to the variadic into some code
+// to construct a target-specific value to use for the va_list and a call
+// into the non-variadic implementation function. There's a test for that.
+//
+// Most other variadic functions whose definition is known can be converted into
+// that form. Create a new internal function taking a va_list where the original
+// took a ... parameter. Move the blocks across. Create a new block containing a
+// va_start that calls into the new function. This is nearly target independent.
+//
+// Where this transform is consistent with the ABI, e.g. AMDGPU or NVPTX, or
+// where the ABI can be chosen to align with this transform, the function
+// interface can be rewritten along with calls to unknown variadic functions.
+//
+// The aggregate effect is to unblock other transforms, most critically the
+// general purpose inliner. Known calls to variadic functions become zero cost.
+//
+// This pass does define some target specific information which is partially
+// redundant with other parts of the compiler. In particular, the call frame
+// it builds must be the exact complement of the va_arg lowering performed
+// by clang. The va_list construction is similar to work done by the backend
+// for targets that lower variadics there, though distinct in that this pass
+// constructs the pieces using alloca instead of relative to stack pointers.
+//
+// Consistency with clang is primarily tested by emitting va_arg using clang
+// then expanding the variadic functions using this pass, followed by trying
+// to constant fold the functions to no-ops.
+//
+// Target specific behaviour is tested in IR - mainly checking that values are
+// put into positions in call frames that make sense for that particular target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/TargetParser/Triple.h"
+
+#define DEBUG_TYPE "expand-variadics"
+
+using namespace llvm;
+
+namespace {
+namespace VariadicABIInfo {
+
+// calling convention for passing as valist object, same as it would be in C
+// aarch64 uses byval
+enum class valistCC { value, pointer, /*byval*/ };
+
+struct Interface {
+protected:
+ Interface() {}
+
+public:
+ virtual ~Interface() {}
+
+ // Most ABIs use a void* or char* for va_list, others can specialise
+ virtual Type *vaListType(LLVMContext &Ctx) {
+ return PointerType::getUnqual(Ctx);
+ }
+
+ // How the vaListType is passed
+ virtual valistCC vaListCC() { return valistCC::value; }
+
+ // The valist might need to be stack allocated.
+ virtual bool valistOnStack() { return false; }
+
+ virtual void initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder,
+ AllocaInst * /*va_list*/, Value * /*buffer*/) {
+ // Function needs to be implemented if valist is on the stack
+ assert(!valistOnStack());
+ __builtin_unreachable();
+ }
+
+ virtual uint32_t minimum_slot_align() = 0;
+ virtual uint32_t maximum_slot_align() = 0;
+
+ // Could make these virtual, fair chance that's free since all
+ // classes choose not to override them at present
+
+ // All targets currently implemented use a ptr for the valist parameter
+ Type *vaListParameterType(LLVMContext &Ctx) {
+ return PointerType::getUnqual(Ctx);
+ }
+
+ bool VAEndIsNop() { return true; }
+
+ bool VACopyIsMemcpy() { return true; }
+};
+
+struct X64SystemV final : public Interface {
+ Type *vaListType(LLVMContext &Ctx) override {
+ auto I32 = Type::getInt32Ty(Ctx);
+ auto Ptr = PointerType::getUnqual(Ctx);
+ return ArrayType::get(StructType::get(Ctx, {I32, I32, Ptr, Ptr}), 1);
+ }
+ valistCC vaListCC() override { return valistCC::pointer; }
+
+ bool valistOnStack() override { return true; }
+
+ void initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder,
+ AllocaInst *va_list, Value *voidBuffer) override {
+ assert(valistOnStack());
+ assert(va_list != nullptr);
+ assert(va_list->getAllocatedType() == vaListType(Ctx));
+
+ Type *va_list_ty = vaListType(Ctx);
+
+ Type *I32 = Type::getInt32Ty(Ctx);
+ Type *I64 = Type::getInt64Ty(Ctx);
+
+ Value *Idxs[3] = {
+ ConstantInt::get(I64, 0),
+ ConstantInt::get(I32, 0),
+ nullptr,
+ };
+
+ Idxs[2] = ConstantInt::get(I32, 0);
+ Builder.CreateStore(
+ ConstantInt::get(I32, 48),
+ Builder.CreateInBoundsGEP(va_list_ty, va_list, Idxs, "gp_offset"));
+
+ Idxs[2] = ConstantInt::get(I32, 1);
+ Builder.CreateStore(
+ ConstantInt::get(I32, 6 * 8 + 8 * 16),
+ Builder.CreateInBoundsGEP(va_list_ty, va_list, Idxs, "fp_offset"));
+
+ Idxs[2] = ConstantInt::get(I32, 2);
+ Builder.CreateStore(voidBuffer,
+ Builder.CreateInBoundsGEP(va_list_ty, va_list, Idxs,
+ "overfow_arg_area"));
+
+ Idxs[2] = ConstantInt::get(I32, 3);
+ Builder.CreateStore(
+ ConstantPointerNull::get(PointerType::getUnqual(Ctx)),
+ Builder.CreateInBoundsGEP(va_list_ty, va_list, Idxs, "reg_save_area"));
+ }
+
+ // X64 documented behaviour:
+ // Slots are at least eight byte aligned and at most 16 byte aligned.
+ // If the type needs more than sixteen byte alignment, it still only gets
+ // that much alignment on the stack.
+ // X64 behaviour in clang:
+ // Slots are at least eight byte aligned and at most naturally aligned
+ // This matches clang, not the ABI docs.
+ uint32_t minimum_slot_align() override { return 8; }
+ uint32_t maximum_slot_align() override { return 0; }
+};
+
+std::unique_ptr<Interface> create(Module &M) {
+ llvm::Triple Triple(M.getTargetTriple());
+ const bool IsLinuxABI = Triple.isOSLinux() || Triple.isOSCygMing();
+
+ switch (Triple.getArch()) {
+
+ case Triple::x86: {
+ // These seem to all fall out the same, despite getTypeStackAlign
+ // implying otherwise.
+ if (Triple.isOSDarwin()) {
+ struct X86Darwin final : public Interface {
+ // X86_32ABIInfo::getTypeStackAlignInBytes is misleading for this.
+ // The slotSize(4) implies a minimum alignment
+ // The AllowHigherAlign = true means there is no maximum alignment.
+ uint32_t minimum_slot_align() override { return 4; }
+ uint32_t maximum_slot_align() override { return 0; }
+ };
+
+ return std::make_unique<X86Darwin>();
+ }
+ if (Triple.getOS() == llvm::Triple::Win32) {
+ struct X86Windows final : public Interface {
+ uint32_t minimum_slot_align() override { return 4; }
+ uint32_t maximum_slot_align() override { return 0; }
+ };
+ return std::make_unique<X86Windows>();
+ }
+
+ if (IsLinuxABI) {
+ struct X86Linux final : public Interface {
+ uint32_t minimum_slot_align() override { return 4; }
+ uint32_t maximum_slot_align() override { return 0; }
+ };
+ return std::make_unique<X86Linux>();
+ }
+ break;
+ }
+
+ case Triple::x86_64: {
+ if (Triple.isWindowsMSVCEnvironment() || Triple.isOSWindows()) {
+ struct X64Windows final : public Interface {
+ uint32_t minimum_slot_align() override { return 8; }
+ uint32_t maximum_slot_align() override { return 8; }
+ };
+ // x64 msvc emit vaarg passes > 8 byte values by pointer
+ // however the variadic call instruction created does not, e.g.
+ // a <4 x f32> will be passed as itself, not as a pointer or byval.
+ // Postponing resolution of that for now.
+ return nullptr;
+ }
+
+ if (Triple.isOSDarwin()) {
+ return std::make_unique<VariadicABIInfo::X64SystemV>();
+ }
+
+ if (IsLinuxABI) {
+ return std::make_unique<VariadicABIInfo::X64SystemV>();
+ }
+
+ break;
+ }
+
+ default:
+ return nullptr;
+ }
+
+ return nullptr;
+}
+
+} // namespace VariadicABIInfo
+
+class ExpandVariadics : public ModulePass {
+public:
+ static char ID;
+ std::unique_ptr<VariadicABIInfo::Interface> ABI;
+
+ ExpandVariadics() : ModulePass(ID) {}
+ StringRef getPassName() const override { return "Expand variadic functions"; }
+
+ // A predicate in that return nullptr means false
+ // Returns the function target to use when inlining on success
+ Function *isFunctionInlinable(Module &M, Function *F);
+
+ // Rewrite a call site.
+ void ExpandCall(Module &M, CallInst *CB, Function *VarargF, Function *NF);
+
+ // this could be partially target specific
+ bool expansionApplicableToFunction(Module &M, Function *F) {
+ if (F->isIntrinsic() || !F->isVarArg() ||
+ F->hasFnAttribute(Attribute::Naked))
+ return false;
+
+ if (F->getCallingConv() != CallingConv::C)
+ return false;
+
+ if (GlobalValue::isInterposableLinkage(F->getLinkage()))
+ return false;
+
+ for (const Use &U : F->uses()) {
+ const auto *CB = dyn_cast<CallBase>(U.getUser());
+
+ if (!CB)
+ return false;
+
+ if (CB->isMustTailCall()) {
+ return false;
+ }
+
+ if (!CB->isCallee(&U) || CB->getFunctionType() != F->getFunctionType()) {
+ return false;
+ }
+ }
+
+ // Branch funnels look like variadic functions but arent:
+ //
+ // define hidden void @__typeid_typeid1_0_branch_funnel(ptr nest %0, ...) {
+ // musttail call void (...) @llvm.icall.branch.funnel(ptr %0, ptr @vt1_1,
+ // ptr @vf1_1, ...) ret void
+ // }
+ //
+ // %1 = call i32 @__typeid_typeid1_0_branch_funnel(ptr nest %vtable, ptr
+ // %obj, i32 1)
+
+ // TODO: there should be a reasonable way to check for an intrinsic
+ // without inserting a prototype that then needs to be removed
+ Function *funnel =
+ Intrinsic::getDeclaration(&M, Intrinsic::icall_branch_funnel);
+ for (const User *U : funnel->users()) {
+ if (auto *I = dyn_cast<CallBase>(U)) {
+ if (F == I->getFunction()) {
+ return false;
+ }
+ }
+ }
+ if (funnel->use_empty())
+ funnel->eraseFromParent();
+
+ return true;
+ }
+
+ template <Intrinsic::ID ID>
+ static BasicBlock::iterator
+ skipIfInstructionIsSpecificIntrinsic(BasicBlock::iterator Iter) {
+ if (auto *Intrinsic = dyn_cast<IntrinsicInst>(&*Iter))
+ if (Intrinsic->getIntrinsicID() == ID)
+ Iter++;
+ return Iter;
+ }
+
+ bool callinstRewritable(CallBase *CB, Function *NF) {
+ if (CallInst *CI = dyn_cast<CallInst>(CB))
+ if (CI->isMustTailCall())
+ return false;
+
+ return true;
+ }
+
+ bool runOnFunction(Module &M, Function *F) {
+ bool changed = false;
+
+ if (!expansionApplicableToFunction(M, F))
+ return false;
+
+ Function *Equivalent = isFunctionInlinable(M, F);
+
+ if (!Equivalent)
+ return changed;
+
+ for (User *U : llvm::make_early_inc_range(F->users()))
+ if (CallInst *CB = dyn_cast<CallInst>(U)) {
+ Value *calledOperand = CB->getCalledOperand();
+ if (F == calledOperand) {
+ ExpandCall(M, CB, F, Equivalent);
+ changed = true;
+ }
+ }
+
+ return changed;
+ }
+
+ bool runOnModule(Module &M) override {
+ ABI = VariadicABIInfo::create(M);
+ if (!ABI)
+ return false;
+
+ bool Changed = false;
+ for (Function &F : llvm::make_early_inc_range(M)) {
+ Changed |= runOnFunction(M, &F);
+ }
+
+ return Changed;
+ }
+};
+
+Function *ExpandVariadics::isFunctionInlinable(Module &M, Function *F) {
+ assert(F->isVarArg());
+ assert(expansionApplicableToFunction(M, F));
+
+ if (F->isDeclaration())
+ return nullptr;
+
+ // A variadic function is inlinable if it is sufficiently simple.
+ // Specifically, if it is a single basic block which is functionally
+ // equivalent to packing the variadic arguments into a va_list which is
+ // passed to another function. The inlining strategy is to build a va_list
+ // in the caller and then call said inner function.
+
+ // Single basic block.
+ BasicBlock &BB = F->getEntryBlock();
+ if (!isa<ReturnInst>(BB.getTerminator())) {
+ return nullptr;
+ }
+
+ // Walk the block in order checking for specific instructions, some of them
+ // optional.
+ BasicBlock::iterator it = BB.begin();
+
+ AllocaInst *alloca = dyn_cast<AllocaInst>(&*it++);
+ if (!alloca)
+ return nullptr;
+
+ Value *valist_argument = alloca;
+
+ it = skipIfInstructionIsSpecificIntrinsic<Intrinsic::lifetime_start>(it);
+
+ VAStartInst *start = dyn_cast<VAStartInst>(&*it++);
+ if (!start || start->getArgList() != valist_argument) {
+ return nullptr;
+ }
+
+ // The va_list instance is stack allocated
+ // The ... replacement is a va_list passed "by value"
+ // That involves a load for some ABIs and passing the pointer for others
+ Value *valist_trailing_argument = nullptr;
+ switch (ABI->vaListCC()) {
+ case VariadicABIInfo::valistCC::value: {
+ // If it's being passed by value, need a load
+ // TODO: Check it's loading the right thing
+ auto *load = dyn_cast<LoadInst>(&*it);
+ if (!load)
+ return nullptr;
+ valist_trailing_argument = load;
+ it++;
+ break;
+ }
+ case VariadicABIInfo::valistCC::pointer: {
+ // If it's being passed by pointer, going to use the alloca directly
+ valist_trailing_argument = valist_argument;
+ break;
+ }
+ }
+
+ CallInst *call = dyn_cast<CallInst>(&*it++);
+ if (!call)
+ return nullptr;
+
+ if (auto *end = dyn_cast<VAEndInst>(&*it)) {
+ if (end->getArgList() != valist_argument)
+ return nullptr;
+ it++;
+ } else {
+ // Only fail on a missing va_end if it wasn't a no-op
+ if (!ABI->VAEndIsNop())
+ return nullptr;
+ }
+
+ it = skipIfInstructionIsSpecificIntrinsic<Intrinsic::lifetime_end>(it);
+
+ ReturnInst *ret = dyn_cast<ReturnInst>(&*it++);
+ if (!ret || it != BB.end())
+ return nullptr;
+
+ // The function call is expected to take the fixed arguments then the alloca
+ // TODO: Drop the vectors here, iterate over them both together instead.
+ SmallVector<Value *> FuncArgs;
+ for (Argument &A : F->args())
+ FuncArgs.push_back(&A);
+
+ SmallVector<Value *> CallArgs;
+ for (Use &A : call->args())
+ CallArgs.push_back(A);
+
+ size_t Fixed = FuncArgs.size();
+ if (Fixed + 1 != CallArgs.size())
+ return nullptr;
+
+ for (size_t i = 0; i < Fixed; i++)
+ if (FuncArgs[i] != CallArgs[i])
+ return nullptr;
+
+ if (CallArgs[Fixed] != valist_trailing_argument)
+ return nullptr;
+
+ // Check the varadic function returns the result of the inner call
+ Value *maybeReturnValue = ret->getReturnValue();
+ if (call->getType()->isVoidTy()) {
+ if (maybeReturnValue != nullptr)
+ return nullptr;
+ } else {
+ if (maybeReturnValue != call)
+ return nullptr;
+ }
+
+ // All checks passed. Found a va_list taking function we can use.
+ return call->getCalledFunction();
+}
+
+void ExpandVariadics::ExpandCall(Module &M, CallInst *CB, Function *VarargF,
+ Function *NF) {
+ const DataLayout &DL = M.getDataLayout();
+
+ if (!callinstRewritable(CB, NF)) {
+ return;
+ }
+
+ // This is something of a problem because the call instructions' idea of the
+ // function type doesn't necessarily match reality, before or after this
+ // pass
+ // Since the plan here is to build a new instruction there is no
+ // particular benefit to trying to preserve an incorrect initial type
+ // If the types don't match and we aren't changing ABI, leave it alone
+ // in case someone is deliberately doing dubious type punning through a
+ // varargs
+ FunctionType *FuncType = CB->getFunctionType();
+ if (FuncType != VarargF->getFunctionType()) {
+ return;
+ }
+
+ auto &Ctx = CB->getContext();
+
+ struct slotAlignTy {
+ uint32_t min;
+ uint32_t max;
+ };
+
+ slotAlignTy slotAlign;
+ slotAlign.min = ABI->minimum_slot_align();
+ slotAlign.max = ABI->maximum_slot_align();
+
+ // Align the struct on slotAlign.min to start with
+ Align MaxFieldAlign(slotAlign.min ? slotAlign.min : 1);
+
+ // The strategy here is to allocate a call frame containing the variadic
+ // arguments laid out such that a target specific va_list can be initialised
+ // with it, such that target specific va_arg instructions will correctly
+ // iterate over it. Primarily this means getting the alignment right.
+
+ class {
+ // The awkward memory layout is to allow access to a contiguous array of
+ // types
+ enum { N = 4 };
+ SmallVector<Type *, N> fieldTypes;
+ SmallVector<std::pair<Value *, bool>, N> maybeValueIsByval;
+
+ public:
+ void append(Type *t, Value *v, bool isByval) {
+ fieldTypes.push_back(t);
+ maybeValueIsByval.push_back({v, isByval});
+ }
+
+ void padding(LLVMContext &Ctx, uint64_t by) {
+ append(ArrayType::get(Type::getInt8Ty(Ctx), by), nullptr, false);
+ }
+
+ size_t size() { return fieldTypes.size(); }
+ bool empty() { return fieldTypes.empty(); }
+
+ StructType *asStruct(LLVMContext &Ctx, StringRef Name) {
+ const bool isPacked = true;
+ return StructType::create(Ctx, fieldTypes,
+ (Twine(Name) + ".vararg").str(), isPacked);
+ }
+
+ void initialiseStructAlloca(const DataLayout &DL, IRBuilder<> &Builder,
+ AllocaInst *alloced) {
+
+ StructType *VarargsTy = cast<StructType>(alloced->getAllocatedType());
+
+ for (size_t i = 0; i < size(); i++) {
+ auto [v, isByval] = maybeValueIsByval[i];
+ if (!v)
+ continue;
+
+ auto r = Builder.CreateStructGEP(VarargsTy, alloced, i);
+ if (isByval) {
+ Type *ByValType = fieldTypes[i];
+ Builder.CreateMemCpy(r, {}, v, {},
+ DL.getTypeAllocSize(ByValType).getFixedValue());
+ } else {
+ Builder.CreateStore(v, r);
+ }
+ }
+ }
+ } Frame;
+
+ uint64_t CurrentOffset = 0;
+ for (unsigned I = FuncType->getNumParams(), E = CB->arg_size(); I < E; ++I) {
+ Value *ArgVal = CB->getArgOperand(I);
+ bool isByVal = CB->paramHasAttr(I, Attribute::ByVal);
+ Type *ArgType = isByVal ? CB->getParamByValType(I) : ArgVal->getType();
+ Align DataAlign = DL.getABITypeAlign(ArgType);
+
+ uint64_t DataAlignV = DataAlign.value();
+
+ // Currently using 0 as a sentinel to mean ignored
+ if (slotAlign.min && DataAlignV < slotAlign.min)
+ DataAlignV = slotAlign.min;
+ if (slotAlign.max && DataAlignV > slotAlign.max)
+ DataAlignV = slotAlign.max;
+
+ DataAlign = Align(DataAlignV);
+ MaxFieldAlign = std::max(MaxFieldAlign, DataAlign);
+
+ if (uint64_t Rem = CurrentOffset % DataAlignV) {
+ // Inject explicit padding to deal with alignment requirements
+ uint64_t Padding = DataAlignV - Rem;
+ Frame.padding(Ctx, Padding);
+ CurrentOffset += Padding;
+ }
+
+ Frame.append(ArgType, ArgVal, isByVal);
+ CurrentOffset += DL.getTypeAllocSize(ArgType).getFixedValue();
+ }
+
+ if (Frame.empty()) {
+ // Not passing anything, hopefully va_arg won't try to dereference it
+ // Might be a target specific thing whether one can pass nullptr instead
+ // of undef i32
+ Frame.append(Type::getInt32Ty(Ctx), nullptr, false);
+ }
+
+ Function *CBF = CB->getParent()->getParent();
+
+ StructType *VarargsTy = Frame.asStruct(Ctx, CBF->getName());
+
+ BasicBlock &BB = CBF->getEntryBlock();
+ IRBuilder<> Builder(&*BB.getFirstInsertionPt());
+
+ // Clumsy call here is to set a specific alignment on the struct instance
+ AllocaInst *alloced =
+ Builder.Insert(new AllocaInst(VarargsTy, DL.getAllocaAddrSpace(), nullptr,
+ MaxFieldAlign),
+ "vararg_buffer");
+ assert(alloced->getAllocatedType() == VarargsTy);
+
+ // Initialise the fields in the struct
+ // TODO: Lifetime annotate it and alloca in entry
+ // Needs to start life shortly before these copies and end immediately after
+ // the new call instruction
+ Builder.SetInsertPoint(CB);
+
+ Frame.initialiseStructAlloca(DL, Builder, alloced);
+
+ unsigned NumArgs = FuncType->getNumParams();
+
+ SmallVector<Value *> Args;
+ Args.assign(CB->arg_begin(), CB->arg_begin() + NumArgs);
+
+ // Initialise a va_list pointing to that struct and pass it as the last
+ // argument
+ {
+ PointerType *voidptr = PointerType::getUnqual(Ctx);
+ Value *voidBuffer =
+ Builder.CreatePointerBitCastOrAddrSpaceCast(alloced, voidptr);
+
+ if (ABI->valistOnStack()) {
+ assert(ABI->vaListCC() == VariadicABIInfo::valistCC::pointer);
+ Type *va_list_ty = ABI->vaListType(Ctx);
+
+ // TODO: one va_list alloca per function, also lifetime annotate
+ AllocaInst *va_list =
+ Builder.CreateAlloca(va_list_ty, nullptr, "va_list");
+
+ ABI->initializeVAList(Ctx, Builder, va_list, voidBuffer);
+ Args.push_back(va_list);
+ } else {
+ assert(ABI->vaListCC() == VariadicABIInfo::valistCC::value);
+ Args.push_back(voidBuffer);
+ }
+ }
+
+ // Attributes excluding any on the vararg arguments
+ AttributeList PAL = CB->getAttributes();
+ if (!PAL.isEmpty()) {
+ SmallVector<AttributeSet, 8> ArgAttrs;
+ for (unsigned ArgNo = 0; ArgNo < NumArgs; ArgNo++)
+ ArgAttrs.push_back(PAL.getParamAttrs(ArgNo));
+ PAL =
+ AttributeList::get(Ctx, PAL.getFnAttrs(), PAL.getRetAttrs(), ArgAttrs);
+ }
+
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CB->getOperandBundlesAsDefs(OpBundles);
+
+ CallInst *NewCB = CallInst::Create(NF, Args, OpBundles, "", CB);
+
+ CallInst::TailCallKind TCK = cast<CallInst>(CB)->getTailCallKind();
+ assert(TCK != CallInst::TCK_MustTail); // guarded at prologue
+
+ // It doesn't get to be a tail call any more
+ // might want to guard this with arch, x64 and aarch64 document that
+ // varargs can't be tail called anyway
+ // Not totally convinced this is necessary but dead store elimination
+ // decides to discard the stores to the alloca and pass uninitialised
+ // memory along instead when the function is marked tailcall
+ if (TCK == CallInst::TCK_Tail) {
+ TCK = CallInst::TCK_None;
+ }
+ NewCB->setTailCallKind(TCK);
+
+ NewCB->setAttributes(PAL);
+ NewCB->takeName(CB);
+ NewCB->setCallingConv(CB->getCallingConv());
+ NewCB->copyMetadata(*CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
+
+ if (!CB->use_empty()) // dead branch?
+ {
+ CB->replaceAllUsesWith(NewCB);
+ }
+ CB->eraseFromParent();
+}
+
+} // namespace
+
+char ExpandVariadics::ID = 0;
+
+INITIALIZE_PASS(ExpandVariadics, DEBUG_TYPE, "Expand variadic functions", false,
+ false)
+
+ModulePass *llvm::createExpandVariadicsPass() { return new ExpandVariadics(); }
+
+PreservedAnalyses ExpandVariadicsPass::run(Module &M, ModuleAnalysisManager &) {
+ return ExpandVariadics().runOnModule(M) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/llvm/test/CodeGen/X86/expand-variadic-call-i386-darwin.ll b/llvm/test/CodeGen/X86/expand-variadic-call-i386-darwin.ll
new file mode 100644
index 00000000000000..a5b783e5966142
--- /dev/null
+++ b/llvm/test/CodeGen/X86/expand-variadic-call-i386-darwin.ll
@@ -0,0 +1,385 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s
+target datalayout = "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.4.0"
+
+; The types show the call frames
+; CHECK: %single_i32.vararg = type <{ i32 }>
+; CHECK: %single_double.vararg = type <{ double }>
+; CHECK: %single_v4f32.vararg = type <{ <4 x float> }>
+; CHECK: %single_v8f32.vararg = type <{ <8 x float> }>
+; CHECK: %single_v16f32.vararg = type <{ <16 x float> }>
+; CHECK: %single_v32f32.vararg = type <{ <32 x float> }>
+; CHECK: %i32_double.vararg = type <{ i32, double }>
+; CHECK: %double_i32.vararg = type <{ double, i32 }>
+; CHECK: %i32_v4f32.vararg = type <{ i32, [12 x i8], <4 x float> }>
+; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }>
+; CHECK: %i32_v8f32.vararg = type <{ i32, [28 x i8], <8 x float> }>
+; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }>
+; CHECK: %i32_v16f32.vararg = type <{ i32, [60 x i8], <16 x float> }>
+; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }>
+; CHECK: %i32_v32f32.vararg = type <{ i32, [124 x i8], <32 x float> }>
+; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }>
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+define void @codegen_for_copy(ptr noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@codegen_for_copy(ptr noundef %x) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %x.addr = alloca ptr, align 4
+; CHECK-NEXT: %cp = alloca ptr, align 4
+; CHECK-NEXT: store ptr %x, ptr %x.addr, align 4, !tbaa !4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp) #9
+; CHECK-NEXT: call void @llvm.va_copy(ptr nonnull %cp, ptr nonnull %x.addr)
+; CHECK-NEXT: %0 = load ptr, ptr %cp, align 4, !tbaa !4
+; CHECK-NEXT: call void @wrapped(ptr noundef %0) #10
+; CHECK-NEXT: call void @llvm.va_end(ptr %cp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp) #9
+; CHECK-NEXT: ret void
+;
+entry:
+ %x.addr = alloca ptr, align 4
+ %cp = alloca ptr, align 4
+ store ptr %x, ptr %x.addr, align 4, !tbaa !5
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp) #8
+ call void @llvm.va_copy(ptr nonnull %cp, ptr nonnull %x.addr)
+ %0 = load ptr, ptr %cp, align 4, !tbaa !5
+ call void @wrapped(ptr noundef %0) #9
+ call void @llvm.va_end(ptr %cp)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp) #8
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+declare void @llvm.va_copy(ptr, ptr) #2
+
+declare void @wrapped(ptr noundef) local_unnamed_addr #3
+
+declare void @llvm.va_end(ptr) #2
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+define void @vararg(...) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@vararg(...) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %va) #9
+; CHECK-NEXT: call void @llvm.va_start(ptr nonnull %va)
+; CHECK-NEXT: %0 = load ptr, ptr %va, align 4, !tbaa !4
+; CHECK-NEXT: call void @wrapped(ptr noundef %0) #10
+; CHECK-NEXT: call void @llvm.va_end(ptr %va)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %va) #9
+; CHECK-NEXT: ret void
+;
+entry:
+ %va = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %va) #8
+ call void @llvm.va_start(ptr nonnull %va)
+ %0 = load ptr, ptr %va, align 4, !tbaa !5
+ call void @wrapped(ptr noundef %0) #9
+ call void @llvm.va_end(ptr %va)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %va) #8
+ ret void
+}
+
+declare void @llvm.va_start(ptr) #2
+
+define void @single_i32(i32 noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 4
+; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x) #10
+ ret void
+}
+
+define void @single_double(double noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 4
+; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x) #10
+ ret void
+}
+
+define void @single_v4f32(<4 x float> noundef %x) local_unnamed_addr #4 {
+; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) local_unnamed_addr #4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 16
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x) #10
+ ret void
+}
+
+define void @single_v8f32(<8 x float> noundef %x) local_unnamed_addr #5 {
+; CHECK-LABEL: define {{[^@]+}}@single_v8f32(<8 x float> noundef %x) local_unnamed_addr #5 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 32
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x) #10
+ ret void
+}
+
+define void @single_v16f32(<16 x float> noundef %x) local_unnamed_addr #6 {
+; CHECK-LABEL: define {{[^@]+}}@single_v16f32(<16 x float> noundef %x) local_unnamed_addr #6 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 64
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x) #10
+ ret void
+}
+
+define void @single_v32f32(<32 x float> noundef %x) local_unnamed_addr #7 {
+; CHECK-LABEL: define {{[^@]+}}@single_v32f32(<32 x float> noundef %x) local_unnamed_addr #7 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 128
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x) #10
+ ret void
+}
+
+define void @i32_double(i32 noundef %x, double noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 4
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store double %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, double noundef %y) #10
+ ret void
+}
+
+define void @double_i32(double noundef %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 4
+; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x, i32 noundef %y) #10
+ ret void
+}
+
+define void @i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 4 %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 4 %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 4
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %y, i64 24, i1 false)
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(%struct.libcS) align 4 %y) #10
+ ret void
+}
+
+define void @libcS_i32(ptr noundef byval(%struct.libcS) align 4 %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@libcS_i32(ptr noundef byval(%struct.libcS) align 4 %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 4
+; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %0, ptr %x, i64 24, i1 false)
+; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(ptr noundef nonnull byval(%struct.libcS) align 4 %x, i32 noundef %y) #10
+ ret void
+}
+
+define void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) local_unnamed_addr #4 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) local_unnamed_addr #4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 16
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <4 x float> %y, ptr %1, align 16
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y) #10
+ ret void
+}
+
+define void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) local_unnamed_addr #4 {
+; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) local_unnamed_addr #4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 16
+; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y) #10
+ ret void
+}
+
+define void @i32_v8f32(i32 noundef %x, <8 x float> noundef %y) local_unnamed_addr #5 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, <8 x float> noundef %y) local_unnamed_addr #5 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 32
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <8 x float> %y, ptr %1, align 32
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <8 x float> noundef %y) #10
+ ret void
+}
+
+define void @v8f32_i32(<8 x float> noundef %x, i32 noundef %y) local_unnamed_addr #5 {
+; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(<8 x float> noundef %x, i32 noundef %y) local_unnamed_addr #5 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 32
+; CHECK-NEXT: %0 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32
+; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x, i32 noundef %y) #10
+ ret void
+}
+
+define void @i32_v16f32(i32 noundef %x, <16 x float> noundef %y) local_unnamed_addr #6 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, <16 x float> noundef %y) local_unnamed_addr #6 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 64
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <16 x float> %y, ptr %1, align 64
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <16 x float> noundef %y) #10
+ ret void
+}
+
+define void @v16f32_i32(<16 x float> noundef %x, i32 noundef %y) local_unnamed_addr #6 {
+; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(<16 x float> noundef %x, i32 noundef %y) local_unnamed_addr #6 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 64
+; CHECK-NEXT: %0 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64
+; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x, i32 noundef %y) #10
+ ret void
+}
+
+define void @i32_v32f32(i32 noundef %x, <32 x float> noundef %y) local_unnamed_addr #7 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, <32 x float> noundef %y) local_unnamed_addr #7 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 128
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <32 x float> %y, ptr %1, align 128
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <32 x float> noundef %y) #10
+ ret void
+}
+
+define void @v32f32_i32(<32 x float> noundef %x, i32 noundef %y) local_unnamed_addr #7 {
+; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(<32 x float> noundef %x, i32 noundef %y) local_unnamed_addr #7 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 128
+; CHECK-NEXT: %0 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128
+; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x, i32 noundef %y) #10
+ ret void
+}
+
+attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" "tune-cpu"="generic" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn }
+attributes #3 = { "frame-pointer"="all" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" "tune-cpu"="generic" }
+attributes #4 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="128" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" "tune-cpu"="generic" }
+attributes #5 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="256" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" "tune-cpu"="generic" }
+attributes #6 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="512" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" "tune-cpu"="generic" }
+attributes #7 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="1024" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" "tune-cpu"="generic" }
+attributes #8 = { nounwind }
+attributes #9 = { nobuiltin nounwind "no-builtins" }
+attributes #10 = { nobuiltin "no-builtins" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 1, !"NumRegisterParameters", i32 0}
+!1 = !{i32 1, !"wchar_size", i32 4}
+!2 = !{i32 8, !"PIC Level", i32 2}
+!3 = !{i32 7, !"frame-pointer", i32 2}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"any pointer", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/CodeGen/X86/expand-variadic-call-i386-linux.ll b/llvm/test/CodeGen/X86/expand-variadic-call-i386-linux.ll
new file mode 100644
index 00000000000000..f932065df15724
--- /dev/null
+++ b/llvm/test/CodeGen/X86/expand-variadic-call-i386-linux.ll
@@ -0,0 +1,385 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s
+target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+; The types show the call frames
+; CHECK: %single_i32.vararg = type <{ i32 }>
+; CHECK: %single_double.vararg = type <{ double }>
+; CHECK: %single_v4f32.vararg = type <{ <4 x float> }>
+; CHECK: %single_v8f32.vararg = type <{ <8 x float> }>
+; CHECK: %single_v16f32.vararg = type <{ <16 x float> }>
+; CHECK: %single_v32f32.vararg = type <{ <32 x float> }>
+; CHECK: %i32_double.vararg = type <{ i32, double }>
+; CHECK: %double_i32.vararg = type <{ double, i32 }>
+; CHECK: %i32_v4f32.vararg = type <{ i32, [12 x i8], <4 x float> }>
+; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }>
+; CHECK: %i32_v8f32.vararg = type <{ i32, [28 x i8], <8 x float> }>
+; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }>
+; CHECK: %i32_v16f32.vararg = type <{ i32, [60 x i8], <16 x float> }>
+; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }>
+; CHECK: %i32_v32f32.vararg = type <{ i32, [124 x i8], <32 x float> }>
+; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }>
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+define dso_local void @codegen_for_copy(ptr noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@codegen_for_copy(ptr noundef %x) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %x.addr = alloca ptr, align 4
+; CHECK-NEXT: %cp = alloca ptr, align 4
+; CHECK-NEXT: store ptr %x, ptr %x.addr, align 4, !tbaa !4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp) #9
+; CHECK-NEXT: call void @llvm.va_copy(ptr nonnull %cp, ptr nonnull %x.addr)
+; CHECK-NEXT: %0 = load ptr, ptr %cp, align 4, !tbaa !4
+; CHECK-NEXT: call void @wrapped(ptr noundef %0) #10
+; CHECK-NEXT: call void @llvm.va_end(ptr %cp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp) #9
+; CHECK-NEXT: ret void
+;
+entry:
+ %x.addr = alloca ptr, align 4
+ %cp = alloca ptr, align 4
+ store ptr %x, ptr %x.addr, align 4, !tbaa !5
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp) #8
+ call void @llvm.va_copy(ptr nonnull %cp, ptr nonnull %x.addr)
+ %0 = load ptr, ptr %cp, align 4, !tbaa !5
+ call void @wrapped(ptr noundef %0) #9
+ call void @llvm.va_end(ptr %cp)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp) #8
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+declare void @llvm.va_copy(ptr, ptr) #2
+
+declare void @wrapped(ptr noundef) local_unnamed_addr #3
+
+declare void @llvm.va_end(ptr) #2
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+define dso_local void @vararg(...) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@vararg(...) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %va) #9
+; CHECK-NEXT: call void @llvm.va_start(ptr nonnull %va)
+; CHECK-NEXT: %0 = load ptr, ptr %va, align 4, !tbaa !4
+; CHECK-NEXT: call void @wrapped(ptr noundef %0) #10
+; CHECK-NEXT: call void @llvm.va_end(ptr %va)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %va) #9
+; CHECK-NEXT: ret void
+;
+entry:
+ %va = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %va) #8
+ call void @llvm.va_start(ptr nonnull %va)
+ %0 = load ptr, ptr %va, align 4, !tbaa !5
+ call void @wrapped(ptr noundef %0) #9
+ call void @llvm.va_end(ptr %va)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %va) #8
+ ret void
+}
+
+declare void @llvm.va_start(ptr) #2
+
+define dso_local void @single_i32(i32 noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 4
+; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x) #10
+ ret void
+}
+
+define dso_local void @single_double(double noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 4
+; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x) #10
+ ret void
+}
+
+define dso_local void @single_v4f32(<4 x float> noundef %x) local_unnamed_addr #4 {
+; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) local_unnamed_addr #4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 16
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x) #10
+ ret void
+}
+
+define dso_local void @single_v8f32(<8 x float> noundef %x) local_unnamed_addr #5 {
+; CHECK-LABEL: define {{[^@]+}}@single_v8f32(<8 x float> noundef %x) local_unnamed_addr #5 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 32
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x) #10
+ ret void
+}
+
+define dso_local void @single_v16f32(<16 x float> noundef %x) local_unnamed_addr #6 {
+; CHECK-LABEL: define {{[^@]+}}@single_v16f32(<16 x float> noundef %x) local_unnamed_addr #6 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 64
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x) #10
+ ret void
+}
+
+define dso_local void @single_v32f32(<32 x float> noundef %x) local_unnamed_addr #7 {
+; CHECK-LABEL: define {{[^@]+}}@single_v32f32(<32 x float> noundef %x) local_unnamed_addr #7 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 128
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x) #10
+ ret void
+}
+
+define dso_local void @i32_double(i32 noundef %x, double noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 4
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store double %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, double noundef %y) #10
+ ret void
+}
+
+define dso_local void @double_i32(double noundef %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 4
+; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x, i32 noundef %y) #10
+ ret void
+}
+
+define dso_local void @i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 4 %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 4 %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 4
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %y, i64 24, i1 false)
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(%struct.libcS) align 4 %y) #10
+ ret void
+}
+
+define dso_local void @libcS_i32(ptr noundef byval(%struct.libcS) align 4 %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@libcS_i32(ptr noundef byval(%struct.libcS) align 4 %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 4
+; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %0, ptr %x, i64 24, i1 false)
+; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(ptr noundef nonnull byval(%struct.libcS) align 4 %x, i32 noundef %y) #10
+ ret void
+}
+
+define dso_local void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) local_unnamed_addr #4 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) local_unnamed_addr #4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 16
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <4 x float> %y, ptr %1, align 16
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y) #10
+ ret void
+}
+
+define dso_local void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) local_unnamed_addr #4 {
+; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) local_unnamed_addr #4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 16
+; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y) #10
+ ret void
+}
+
+define dso_local void @i32_v8f32(i32 noundef %x, <8 x float> noundef %y) local_unnamed_addr #5 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, <8 x float> noundef %y) local_unnamed_addr #5 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 32
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <8 x float> %y, ptr %1, align 32
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <8 x float> noundef %y) #10
+ ret void
+}
+
+define dso_local void @v8f32_i32(<8 x float> noundef %x, i32 noundef %y) local_unnamed_addr #5 {
+; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(<8 x float> noundef %x, i32 noundef %y) local_unnamed_addr #5 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 32
+; CHECK-NEXT: %0 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32
+; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> noundef %x, i32 noundef %y) #10
+ ret void
+}
+
+define dso_local void @i32_v16f32(i32 noundef %x, <16 x float> noundef %y) local_unnamed_addr #6 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, <16 x float> noundef %y) local_unnamed_addr #6 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 64
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <16 x float> %y, ptr %1, align 64
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <16 x float> noundef %y) #10
+ ret void
+}
+
+define dso_local void @v16f32_i32(<16 x float> noundef %x, i32 noundef %y) local_unnamed_addr #6 {
+; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(<16 x float> noundef %x, i32 noundef %y) local_unnamed_addr #6 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 64
+; CHECK-NEXT: %0 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64
+; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> noundef %x, i32 noundef %y) #10
+ ret void
+}
+
+define dso_local void @i32_v32f32(i32 noundef %x, <32 x float> noundef %y) local_unnamed_addr #7 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, <32 x float> noundef %y) local_unnamed_addr #7 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 128
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <32 x float> %y, ptr %1, align 128
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <32 x float> noundef %y) #10
+ ret void
+}
+
+define dso_local void @v32f32_i32(<32 x float> noundef %x, i32 noundef %y) local_unnamed_addr #7 {
+; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(<32 x float> noundef %x, i32 noundef %y) local_unnamed_addr #7 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 128
+; CHECK-NEXT: %0 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128
+; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<32 x float> noundef %x, i32 noundef %y) #10
+ ret void
+}
+
+attributes #0 = { nounwind "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn }
+attributes #3 = { "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #4 = { nounwind "min-legal-vector-width"="128" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #5 = { nounwind "min-legal-vector-width"="256" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #6 = { nounwind "min-legal-vector-width"="512" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #7 = { nounwind "min-legal-vector-width"="1024" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #8 = { nounwind }
+attributes #9 = { nobuiltin nounwind "no-builtins" }
+attributes #10 = { nobuiltin "no-builtins" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 1, !"NumRegisterParameters", i32 0}
+!1 = !{i32 1, !"wchar_size", i32 4}
+!2 = !{i32 8, !"PIC Level", i32 2}
+!3 = !{i32 7, !"PIE Level", i32 2}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"any pointer", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/CodeGen/X86/expand-variadic-call-i686-msvc.ll b/llvm/test/CodeGen/X86/expand-variadic-call-i686-msvc.ll
new file mode 100644
index 00000000000000..3b270810888abe
--- /dev/null
+++ b/llvm/test/CodeGen/X86/expand-variadic-call-i686-msvc.ll
@@ -0,0 +1,402 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s
+target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32-a:0:32-S32"
+target triple = "i686-unknown-windows-msvc19.33.0"
+
+; The types show the call frames
+; CHECK: %single_i32.vararg = type <{ i32 }>
+; CHECK: %single_double.vararg = type <{ double }>
+; CHECK: %single_v4f32.vararg = type <{ <4 x float> }>
+; CHECK: %single_v8f32.vararg = type <{ <8 x float> }>
+; CHECK: %single_v16f32.vararg = type <{ <16 x float> }>
+; CHECK: %single_v32f32.vararg = type <{ ptr }>
+; CHECK: %i32_double.vararg = type <{ i32, [4 x i8], double }>
+; CHECK: %double_i32.vararg = type <{ double, i32 }>
+; CHECK: %i32_v4f32.vararg = type <{ i32, [12 x i8], <4 x float> }>
+; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }>
+; CHECK: %i32_v8f32.vararg = type <{ i32, [28 x i8], <8 x float> }>
+; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }>
+; CHECK: %i32_v16f32.vararg = type <{ i32, [60 x i8], <16 x float> }>
+; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }>
+; CHECK: %i32_v32f32.vararg = type <{ i32, ptr }>
+; CHECK: %v32f32_i32.vararg = type <{ ptr, i32 }>
+
+%struct.libcS = type { i8, i16, i32, i32, float, double }
+
+define dso_local void @codegen_for_copy(ptr noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@codegen_for_copy(ptr noundef %x) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %x.addr = alloca ptr, align 4
+; CHECK-NEXT: %cp = alloca ptr, align 4
+; CHECK-NEXT: store ptr %x, ptr %x.addr, align 4, !tbaa !3
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp) #8
+; CHECK-NEXT: call void @llvm.va_copy(ptr nonnull %cp, ptr nonnull %x.addr)
+; CHECK-NEXT: %0 = load ptr, ptr %cp, align 4, !tbaa !3
+; CHECK-NEXT: call void @wrapped(ptr noundef %0) #9
+; CHECK-NEXT: call void @llvm.va_end(ptr %cp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %x.addr = alloca ptr, align 4
+ %cp = alloca ptr, align 4
+ store ptr %x, ptr %x.addr, align 4, !tbaa !4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp) #7
+ call void @llvm.va_copy(ptr nonnull %cp, ptr nonnull %x.addr)
+ %0 = load ptr, ptr %cp, align 4, !tbaa !4
+ call void @wrapped(ptr noundef %0) #8
+ call void @llvm.va_end(ptr %cp)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp) #7
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+declare void @llvm.va_copy(ptr, ptr) #2
+
+declare dso_local void @wrapped(ptr noundef) local_unnamed_addr #3
+
+declare void @llvm.va_end(ptr) #2
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+define dso_local void @vararg(...) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@vararg(...) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va = alloca ptr, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %va) #8
+; CHECK-NEXT: call void @llvm.va_start(ptr nonnull %va)
+; CHECK-NEXT: %0 = load ptr, ptr %va, align 4, !tbaa !3
+; CHECK-NEXT: call void @wrapped(ptr noundef %0) #9
+; CHECK-NEXT: call void @llvm.va_end(ptr %va)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %va) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %va = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %va) #7
+ call void @llvm.va_start(ptr nonnull %va)
+ %0 = load ptr, ptr %va, align 4, !tbaa !4
+ call void @wrapped(ptr noundef %0) #8
+ call void @llvm.va_end(ptr %va)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %va) #7
+ ret void
+}
+
+declare void @llvm.va_start(ptr) #2
+
+define dso_local void @single_i32(i32 noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 4
+; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x) #9
+ ret void
+}
+
+define dso_local void @single_double(double noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 8
+; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x) #9
+ ret void
+}
+
+define dso_local void @single_v4f32(<4 x float> inreg noundef %x) local_unnamed_addr #4 {
+; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> inreg noundef %x) local_unnamed_addr #4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 16
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> inreg noundef %x) #9
+ ret void
+}
+
+define dso_local void @single_v8f32(<8 x float> inreg noundef %x) local_unnamed_addr #5 {
+; CHECK-LABEL: define {{[^@]+}}@single_v8f32(<8 x float> inreg noundef %x) local_unnamed_addr #5 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 32
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> inreg noundef %x) #9
+ ret void
+}
+
+define dso_local void @single_v16f32(<16 x float> inreg noundef %x) local_unnamed_addr #6 {
+; CHECK-LABEL: define {{[^@]+}}@single_v16f32(<16 x float> inreg noundef %x) local_unnamed_addr #6 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 64
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> inreg noundef %x) #9
+ ret void
+}
+
+define dso_local void @single_v32f32(ptr nocapture noundef readonly %0) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_v32f32(ptr nocapture noundef readonly %0) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 4
+; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 128
+; CHECK-NEXT: %x = load <32 x float>, ptr %0, align 128, !tbaa !7
+; CHECK-NEXT: store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !7
+; CHECK-NEXT: %1 = getelementptr inbounds %single_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store ptr %indirect-arg-temp, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <32 x float>, align 128
+ %x = load <32 x float>, ptr %0, align 128, !tbaa !8
+ store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !8
+ call void (...) @vararg(ptr noundef nonnull %indirect-arg-temp) #9
+ ret void
+}
+
+define dso_local void @i32_double(i32 noundef %x, double noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 8
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store double %y, ptr %1, align 8
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, double noundef %y) #9
+ ret void
+}
+
+define dso_local void @double_i32(double noundef %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 8
+; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x, i32 noundef %y) #9
+ ret void
+}
+
+define dso_local void @i32_libcS(i32 noundef %x, ptr nocapture noundef readonly byval(%struct.libcS) align 4 %0) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, ptr nocapture noundef readonly byval(%struct.libcS) align 4 %0) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: %2 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %2, ptr %0, i64 24, i1 false)
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(%struct.libcS) align 4 %0) #9
+ ret void
+}
+
+define dso_local void @libcS_i32(ptr nocapture noundef readonly byval(%struct.libcS) align 4 %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@libcS_i32(ptr nocapture noundef readonly byval(%struct.libcS) align 4 %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %0, i64 24, i1 false)
+; CHECK-NEXT: %2 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %2, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(ptr noundef nonnull byval(%struct.libcS) align 4 %0, i32 noundef %y) #9
+ ret void
+}
+
+define dso_local void @i32_v4f32(i32 noundef %x, <4 x float> inreg noundef %y) local_unnamed_addr #4 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> inreg noundef %y) local_unnamed_addr #4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 16
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <4 x float> %y, ptr %1, align 16
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <4 x float> inreg noundef %y) #9
+ ret void
+}
+
+define dso_local void @v4f32_i32(<4 x float> inreg noundef %x, i32 noundef %y) local_unnamed_addr #4 {
+; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> inreg noundef %x, i32 noundef %y) local_unnamed_addr #4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 16
+; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> inreg noundef %x, i32 noundef %y) #9
+ ret void
+}
+
+define dso_local void @i32_v8f32(i32 noundef %x, <8 x float> inreg noundef %y) local_unnamed_addr #5 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, <8 x float> inreg noundef %y) local_unnamed_addr #5 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 32
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <8 x float> %y, ptr %1, align 32
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <8 x float> inreg noundef %y) #9
+ ret void
+}
+
+define dso_local void @v8f32_i32(<8 x float> inreg noundef %x, i32 noundef %y) local_unnamed_addr #5 {
+; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(<8 x float> inreg noundef %x, i32 noundef %y) local_unnamed_addr #5 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 32
+; CHECK-NEXT: %0 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32
+; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<8 x float> inreg noundef %x, i32 noundef %y) #9
+ ret void
+}
+
+define dso_local void @i32_v16f32(i32 noundef %x, <16 x float> inreg noundef %y) local_unnamed_addr #6 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, <16 x float> inreg noundef %y) local_unnamed_addr #6 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 64
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <16 x float> %y, ptr %1, align 64
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <16 x float> inreg noundef %y) #9
+ ret void
+}
+
+define dso_local void @v16f32_i32(<16 x float> inreg noundef %x, i32 noundef %y) local_unnamed_addr #6 {
+; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(<16 x float> inreg noundef %x, i32 noundef %y) local_unnamed_addr #6 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 64
+; CHECK-NEXT: %0 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64
+; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<16 x float> inreg noundef %x, i32 noundef %y) #9
+ ret void
+}
+
+define dso_local void @i32_v32f32(i32 noundef %x, ptr nocapture noundef readonly %0) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, ptr nocapture noundef readonly %0) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 4
+; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 128
+; CHECK-NEXT: %y = load <32 x float>, ptr %0, align 128, !tbaa !7
+; CHECK-NEXT: store <32 x float> %y, ptr %indirect-arg-temp, align 128, !tbaa !7
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: %2 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store ptr %indirect-arg-temp, ptr %2, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <32 x float>, align 128
+ %y = load <32 x float>, ptr %0, align 128, !tbaa !8
+ store <32 x float> %y, ptr %indirect-arg-temp, align 128, !tbaa !8
+ call void (...) @vararg(i32 noundef %x, ptr noundef nonnull %indirect-arg-temp) #9
+ ret void
+}
+
+define dso_local void @v32f32_i32(ptr nocapture noundef readonly %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(ptr nocapture noundef readonly %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 4
+; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 128
+; CHECK-NEXT: %x = load <32 x float>, ptr %0, align 128, !tbaa !7
+; CHECK-NEXT: store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !7
+; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store ptr %indirect-arg-temp, ptr %1, align 4
+; CHECK-NEXT: %2 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %2, align 4
+; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <32 x float>, align 128
+ %x = load <32 x float>, ptr %0, align 128, !tbaa !8
+ store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !8
+ call void (...) @vararg(ptr noundef nonnull %indirect-arg-temp, i32 noundef %y) #9
+ ret void
+}
+
+attributes #0 = { nounwind "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn }
+attributes #3 = { "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #4 = { nounwind "min-legal-vector-width"="128" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #5 = { nounwind "min-legal-vector-width"="256" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #6 = { nounwind "min-legal-vector-width"="512" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #7 = { nounwind }
+attributes #8 = { nobuiltin nounwind "no-builtins" }
+attributes #9 = { nobuiltin "no-builtins" }
+
+!llvm.module.flags = !{!0, !1, !2}
+
+!0 = !{i32 1, !"NumRegisterParameters", i32 0}
+!1 = !{i32 1, !"wchar_size", i32 2}
+!2 = !{i32 1, !"MaxTLSAlign", i32 65536}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"any pointer", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = !{!6, !6, i64 0}
diff --git a/llvm/test/CodeGen/X86/expand-variadic-call-x64-darwin.ll b/llvm/test/CodeGen/X86/expand-variadic-call-x64-darwin.ll
new file mode 100644
index 00000000000000..ee6804e43dcbf2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/expand-variadic-call-x64-darwin.ll
@@ -0,0 +1,589 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.4.0"
+
+; The types show the call frames
+; CHECK: %single_i32.vararg = type <{ i32 }>
+; CHECK: %single_double.vararg = type <{ double }>
+; CHECK: %single_v4f32.vararg = type <{ <4 x float> }>
+; CHECK: %single_v8f32.vararg = type <{ <8 x float> }>
+; CHECK: %single_v16f32.vararg = type <{ <16 x float> }>
+; CHECK: %single_v32f32.vararg = type <{ <32 x float> }>
+; CHECK: %i32_double.vararg = type <{ i32, [4 x i8], double }>
+; CHECK: %double_i32.vararg = type <{ double, i32 }>
+; CHECK: %i32_v4f32.vararg = type <{ i32, [12 x i8], <4 x float> }>
+; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }>
+; CHECK: %i32_v8f32.vararg = type <{ i32, [28 x i8], <8 x float> }>
+; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }>
+; CHECK: %i32_v16f32.vararg = type <{ i32, [60 x i8], <16 x float> }>
+; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }>
+; CHECK: %i32_v32f32.vararg = type <{ i32, [124 x i8], <32 x float> }>
+; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }>
+
+%struct.__va_list_tag = type { i32, i32, ptr, ptr }
+%struct.libcS = type { i8, i16, i32, i64, float, double }
+
+define void @codegen_for_copy(ptr noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@codegen_for_copy(ptr noundef %x) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %cp = alloca [1 x %struct.__va_list_tag], align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %cp) #6
+; CHECK-NEXT: call void @llvm.va_copy(ptr nonnull %cp, ptr %x)
+; CHECK-NEXT: call void @wrapped(ptr noundef nonnull %cp) #7
+; CHECK-NEXT: call void @llvm.va_end(ptr %cp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %cp) #6
+; CHECK-NEXT: ret void
+;
+entry:
+ %cp = alloca [1 x %struct.__va_list_tag], align 16
+ call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %cp) #5
+ call void @llvm.va_copy(ptr nonnull %cp, ptr %x)
+ call void @wrapped(ptr noundef nonnull %cp) #6
+ call void @llvm.va_end(ptr %cp)
+ call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %cp) #5
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+declare void @llvm.va_copy(ptr, ptr) #2
+
+declare void @wrapped(ptr noundef) local_unnamed_addr #3
+
+declare void @llvm.va_end(ptr) #2
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+define void @vararg(...) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@vararg(...) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va = alloca [1 x %struct.__va_list_tag], align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %va) #6
+; CHECK-NEXT: call void @llvm.va_start(ptr nonnull %va)
+; CHECK-NEXT: call void @wrapped(ptr noundef nonnull %va) #7
+; CHECK-NEXT: call void @llvm.va_end(ptr %va)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %va) #6
+; CHECK-NEXT: ret void
+;
+entry:
+ %va = alloca [1 x %struct.__va_list_tag], align 16
+ call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %va) #5
+ call void @llvm.va_start(ptr nonnull %va)
+ call void @wrapped(ptr noundef nonnull %va) #6
+ call void @llvm.va_end(ptr %va)
+ call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %va) #5
+ ret void
+}
+
+declare void @llvm.va_start(ptr) #2
+
+define void @single_i32(i32 noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 8
+; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x) #7
+ ret void
+}
+
+define void @single_double(double noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 8
+; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x) #7
+ ret void
+}
+
+define void @single_v4f32(<4 x float> noundef %x) local_unnamed_addr #4 {
+; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) local_unnamed_addr #4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 16
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x) #7
+ ret void
+}
+
+define void @single_v8f32(ptr nocapture noundef readonly byval(<8 x float>) align 16 %0) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_v8f32(ptr nocapture noundef readonly byval(<8 x float>) align 16 %0) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 32
+; CHECK-NEXT: %indirect-arg-temp = alloca <8 x float>, align 16
+; CHECK-NEXT: %x = load <8 x float>, ptr %0, align 16, !tbaa !3
+; CHECK-NEXT: store <8 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %single_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 32, i1 false)
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <8 x float>, align 16
+ %x = load <8 x float>, ptr %0, align 16, !tbaa !4
+ store <8 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !4
+ tail call void (...) @vararg(ptr noundef nonnull byval(<8 x float>) align 16 %indirect-arg-temp) #7
+ ret void
+}
+
+define void @single_v16f32(ptr nocapture noundef readonly byval(<16 x float>) align 16 %0) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_v16f32(ptr nocapture noundef readonly byval(<16 x float>) align 16 %0) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 64
+; CHECK-NEXT: %indirect-arg-temp = alloca <16 x float>, align 16
+; CHECK-NEXT: %x = load <16 x float>, ptr %0, align 16, !tbaa !3
+; CHECK-NEXT: store <16 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %single_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 64, i1 false)
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <16 x float>, align 16
+ %x = load <16 x float>, ptr %0, align 16, !tbaa !4
+ store <16 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !4
+ tail call void (...) @vararg(ptr noundef nonnull byval(<16 x float>) align 16 %indirect-arg-temp) #7
+ ret void
+}
+
+define void @single_v32f32(ptr nocapture noundef readonly byval(<32 x float>) align 16 %0) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_v32f32(ptr nocapture noundef readonly byval(<32 x float>) align 16 %0) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 128
+; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 16
+; CHECK-NEXT: %x = load <32 x float>, ptr %0, align 16, !tbaa !3
+; CHECK-NEXT: store <32 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %single_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 128, i1 false)
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <32 x float>, align 16
+ %x = load <32 x float>, ptr %0, align 16, !tbaa !4
+ store <32 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !4
+ tail call void (...) @vararg(ptr noundef nonnull byval(<32 x float>) align 16 %indirect-arg-temp) #7
+ ret void
+}
+
+define void @i32_double(i32 noundef %x, double noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 8
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store double %y, ptr %1, align 8
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, double noundef %y) #7
+ ret void
+}
+
+define void @double_i32(double noundef %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 8
+; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x, i32 noundef %y) #7
+ ret void
+}
+
+define void @i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 8
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %y, i64 32, i1 false)
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(%struct.libcS) align 8 %y) #7
+ ret void
+}
+
+define void @libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 8
+; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %0, ptr %x, i64 32, i1 false)
+; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(ptr noundef nonnull byval(%struct.libcS) align 8 %x, i32 noundef %y) #7
+ ret void
+}
+
+define void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) local_unnamed_addr #4 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) local_unnamed_addr #4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 16
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <4 x float> %y, ptr %1, align 16
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y) #7
+ ret void
+}
+
+define void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) local_unnamed_addr #4 {
+; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) local_unnamed_addr #4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 16
+; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y) #7
+ ret void
+}
+
+define void @i32_v8f32(i32 noundef %x, ptr nocapture noundef readonly byval(<8 x float>) align 16 %0) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, ptr nocapture noundef readonly byval(<8 x float>) align 16 %0) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 32
+; CHECK-NEXT: %indirect-arg-temp = alloca <8 x float>, align 16
+; CHECK-NEXT: %y = load <8 x float>, ptr %0, align 16, !tbaa !3
+; CHECK-NEXT: store <8 x float> %y, ptr %indirect-arg-temp, align 16, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: %2 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %2, ptr %indirect-arg-temp, i64 32, i1 false)
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <8 x float>, align 16
+ %y = load <8 x float>, ptr %0, align 16, !tbaa !4
+ store <8 x float> %y, ptr %indirect-arg-temp, align 16, !tbaa !4
+ tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(<8 x float>) align 16 %indirect-arg-temp) #7
+ ret void
+}
+
+define void @v8f32_i32(ptr nocapture noundef readonly byval(<8 x float>) align 16 %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(ptr nocapture noundef readonly byval(<8 x float>) align 16 %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 32
+; CHECK-NEXT: %indirect-arg-temp = alloca <8 x float>, align 16
+; CHECK-NEXT: %x = load <8 x float>, ptr %0, align 16, !tbaa !3
+; CHECK-NEXT: store <8 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 32, i1 false)
+; CHECK-NEXT: %2 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %2, align 4
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <8 x float>, align 16
+ %x = load <8 x float>, ptr %0, align 16, !tbaa !4
+ store <8 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !4
+ tail call void (...) @vararg(ptr noundef nonnull byval(<8 x float>) align 16 %indirect-arg-temp, i32 noundef %y) #7
+ ret void
+}
+
+define void @i32_v16f32(i32 noundef %x, ptr nocapture noundef readonly byval(<16 x float>) align 16 %0) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, ptr nocapture noundef readonly byval(<16 x float>) align 16 %0) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 64
+; CHECK-NEXT: %indirect-arg-temp = alloca <16 x float>, align 16
+; CHECK-NEXT: %y = load <16 x float>, ptr %0, align 16, !tbaa !3
+; CHECK-NEXT: store <16 x float> %y, ptr %indirect-arg-temp, align 16, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: %2 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %2, ptr %indirect-arg-temp, i64 64, i1 false)
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <16 x float>, align 16
+ %y = load <16 x float>, ptr %0, align 16, !tbaa !4
+ store <16 x float> %y, ptr %indirect-arg-temp, align 16, !tbaa !4
+ tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(<16 x float>) align 16 %indirect-arg-temp) #7
+ ret void
+}
+
+define void @v16f32_i32(ptr nocapture noundef readonly byval(<16 x float>) align 16 %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(ptr nocapture noundef readonly byval(<16 x float>) align 16 %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 64
+; CHECK-NEXT: %indirect-arg-temp = alloca <16 x float>, align 16
+; CHECK-NEXT: %x = load <16 x float>, ptr %0, align 16, !tbaa !3
+; CHECK-NEXT: store <16 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 64, i1 false)
+; CHECK-NEXT: %2 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %2, align 4
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <16 x float>, align 16
+ %x = load <16 x float>, ptr %0, align 16, !tbaa !4
+ store <16 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !4
+ tail call void (...) @vararg(ptr noundef nonnull byval(<16 x float>) align 16 %indirect-arg-temp, i32 noundef %y) #7
+ ret void
+}
+
+define void @i32_v32f32(i32 noundef %x, ptr nocapture noundef readonly byval(<32 x float>) align 16 %0) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, ptr nocapture noundef readonly byval(<32 x float>) align 16 %0) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 128
+; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 16
+; CHECK-NEXT: %y = load <32 x float>, ptr %0, align 16, !tbaa !3
+; CHECK-NEXT: store <32 x float> %y, ptr %indirect-arg-temp, align 16, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: %2 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %2, ptr %indirect-arg-temp, i64 128, i1 false)
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <32 x float>, align 16
+ %y = load <32 x float>, ptr %0, align 16, !tbaa !4
+ store <32 x float> %y, ptr %indirect-arg-temp, align 16, !tbaa !4
+ tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(<32 x float>) align 16 %indirect-arg-temp) #7
+ ret void
+}
+
+define void @v32f32_i32(ptr nocapture noundef readonly byval(<32 x float>) align 16 %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(ptr nocapture noundef readonly byval(<32 x float>) align 16 %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 128
+; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 16
+; CHECK-NEXT: %x = load <32 x float>, ptr %0, align 16, !tbaa !3
+; CHECK-NEXT: store <32 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 128, i1 false)
+; CHECK-NEXT: %2 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %2, align 4
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <32 x float>, align 16
+ %x = load <32 x float>, ptr %0, align 16, !tbaa !4
+ store <32 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !4
+ tail call void (...) @vararg(ptr noundef nonnull byval(<32 x float>) align 16 %indirect-arg-temp, i32 noundef %y) #7
+ ret void
+}
+
+attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+ssse3,+x87" "tune-cpu"="generic" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn }
+attributes #3 = { "frame-pointer"="all" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+ssse3,+x87" "tune-cpu"="generic" }
+attributes #4 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="128" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+ssse3,+x87" "tune-cpu"="generic" }
+attributes #5 = { nounwind }
+attributes #6 = { nobuiltin nounwind "no-builtins" }
+attributes #7 = { nobuiltin "no-builtins" }
+
+!llvm.module.flags = !{!0, !1, !2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll b/llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll
new file mode 100644
index 00000000000000..cf8d98f1e30832
--- /dev/null
+++ b/llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll
@@ -0,0 +1,589 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The types show the call frames
+; CHECK: %single_i32.vararg = type <{ i32 }>
+; CHECK: %single_double.vararg = type <{ double }>
+; CHECK: %single_v4f32.vararg = type <{ <4 x float> }>
+; CHECK: %single_v8f32.vararg = type <{ <8 x float> }>
+; CHECK: %single_v16f32.vararg = type <{ <16 x float> }>
+; CHECK: %single_v32f32.vararg = type <{ <32 x float> }>
+; CHECK: %i32_double.vararg = type <{ i32, [4 x i8], double }>
+; CHECK: %double_i32.vararg = type <{ double, i32 }>
+; CHECK: %i32_v4f32.vararg = type <{ i32, [12 x i8], <4 x float> }>
+; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }>
+; CHECK: %i32_v8f32.vararg = type <{ i32, [28 x i8], <8 x float> }>
+; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }>
+; CHECK: %i32_v16f32.vararg = type <{ i32, [60 x i8], <16 x float> }>
+; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }>
+; CHECK: %i32_v32f32.vararg = type <{ i32, [124 x i8], <32 x float> }>
+; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }>
+
+%struct.__va_list_tag = type { i32, i32, ptr, ptr }
+%struct.libcS = type { i8, i16, i32, i64, float, double }
+
+define dso_local void @codegen_for_copy(ptr noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@codegen_for_copy(ptr noundef %x) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %cp = alloca [1 x %struct.__va_list_tag], align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %cp) #6
+; CHECK-NEXT: call void @llvm.va_copy(ptr nonnull %cp, ptr %x)
+; CHECK-NEXT: call void @wrapped(ptr noundef nonnull %cp) #7
+; CHECK-NEXT: call void @llvm.va_end(ptr %cp)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %cp) #6
+; CHECK-NEXT: ret void
+;
+entry:
+ %cp = alloca [1 x %struct.__va_list_tag], align 16
+ call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %cp) #5
+ call void @llvm.va_copy(ptr nonnull %cp, ptr %x)
+ call void @wrapped(ptr noundef nonnull %cp) #6
+ call void @llvm.va_end(ptr %cp)
+ call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %cp) #5
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+declare void @llvm.va_copy(ptr, ptr) #2
+
+declare void @wrapped(ptr noundef) local_unnamed_addr #3
+
+declare void @llvm.va_end(ptr) #2
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+define dso_local void @vararg(...) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@vararg(...) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %va = alloca [1 x %struct.__va_list_tag], align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %va) #6
+; CHECK-NEXT: call void @llvm.va_start(ptr nonnull %va)
+; CHECK-NEXT: call void @wrapped(ptr noundef nonnull %va) #7
+; CHECK-NEXT: call void @llvm.va_end(ptr %va)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %va) #6
+; CHECK-NEXT: ret void
+;
+entry:
+ %va = alloca [1 x %struct.__va_list_tag], align 16
+ call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %va) #5
+ call void @llvm.va_start(ptr nonnull %va)
+ call void @wrapped(ptr noundef nonnull %va) #6
+ call void @llvm.va_end(ptr %va)
+ call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %va) #5
+ ret void
+}
+
+declare void @llvm.va_start(ptr) #2
+
+define dso_local void @single_i32(i32 noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 8
+; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x) #7
+ ret void
+}
+
+define dso_local void @single_double(double noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 8
+; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x) #7
+ ret void
+}
+
+define dso_local void @single_v4f32(<4 x float> noundef %x) local_unnamed_addr #4 {
+; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) local_unnamed_addr #4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 16
+; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x) #7
+ ret void
+}
+
+define dso_local void @single_v8f32(ptr nocapture noundef readonly byval(<8 x float>) align 32 %0) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_v8f32(ptr nocapture noundef readonly byval(<8 x float>) align 32 %0) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 32
+; CHECK-NEXT: %indirect-arg-temp = alloca <8 x float>, align 32
+; CHECK-NEXT: %x = load <8 x float>, ptr %0, align 32, !tbaa !3
+; CHECK-NEXT: store <8 x float> %x, ptr %indirect-arg-temp, align 32, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %single_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 32, i1 false)
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <8 x float>, align 32
+ %x = load <8 x float>, ptr %0, align 32, !tbaa !4
+ store <8 x float> %x, ptr %indirect-arg-temp, align 32, !tbaa !4
+ tail call void (...) @vararg(ptr noundef nonnull byval(<8 x float>) align 32 %indirect-arg-temp) #7
+ ret void
+}
+
+define dso_local void @single_v16f32(ptr nocapture noundef readonly byval(<16 x float>) align 64 %0) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_v16f32(ptr nocapture noundef readonly byval(<16 x float>) align 64 %0) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 64
+; CHECK-NEXT: %indirect-arg-temp = alloca <16 x float>, align 64
+; CHECK-NEXT: %x = load <16 x float>, ptr %0, align 64, !tbaa !3
+; CHECK-NEXT: store <16 x float> %x, ptr %indirect-arg-temp, align 64, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %single_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 64, i1 false)
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <16 x float>, align 64
+ %x = load <16 x float>, ptr %0, align 64, !tbaa !4
+ store <16 x float> %x, ptr %indirect-arg-temp, align 64, !tbaa !4
+ tail call void (...) @vararg(ptr noundef nonnull byval(<16 x float>) align 64 %indirect-arg-temp) #7
+ ret void
+}
+
+define dso_local void @single_v32f32(ptr nocapture noundef readonly byval(<32 x float>) align 128 %0) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@single_v32f32(ptr nocapture noundef readonly byval(<32 x float>) align 128 %0) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 128
+; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 128
+; CHECK-NEXT: %x = load <32 x float>, ptr %0, align 128, !tbaa !3
+; CHECK-NEXT: store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %single_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 128, i1 false)
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <32 x float>, align 128
+ %x = load <32 x float>, ptr %0, align 128, !tbaa !4
+ store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !4
+ tail call void (...) @vararg(ptr noundef nonnull byval(<32 x float>) align 128 %indirect-arg-temp) #7
+ ret void
+}
+
+define dso_local void @i32_double(i32 noundef %x, double noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 8
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store double %y, ptr %1, align 8
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, double noundef %y) #7
+ ret void
+}
+
+define dso_local void @double_i32(double noundef %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 8
+; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store double %x, ptr %0, align 8
+; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(double noundef %x, i32 noundef %y) #7
+ ret void
+}
+
+define dso_local void @i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 8
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %y, i64 32, i1 false)
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(%struct.libcS) align 8 %y) #7
+ ret void
+}
+
+define dso_local void @libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 8
+; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %0, ptr %x, i64 32, i1 false)
+; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(ptr noundef nonnull byval(%struct.libcS) align 8 %x, i32 noundef %y) #7
+ ret void
+}
+
+define dso_local void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) local_unnamed_addr #4 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) local_unnamed_addr #4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 16
+; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %0, align 4
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: store <4 x float> %y, ptr %1, align 16
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y) #7
+ ret void
+}
+
+define dso_local void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) local_unnamed_addr #4 {
+; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) local_unnamed_addr #4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 16
+; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16
+; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %1, align 4
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y) #7
+ ret void
+}
+
+define dso_local void @i32_v8f32(i32 noundef %x, ptr nocapture noundef readonly byval(<8 x float>) align 32 %0) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, ptr nocapture noundef readonly byval(<8 x float>) align 32 %0) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 32
+; CHECK-NEXT: %indirect-arg-temp = alloca <8 x float>, align 32
+; CHECK-NEXT: %y = load <8 x float>, ptr %0, align 32, !tbaa !3
+; CHECK-NEXT: store <8 x float> %y, ptr %indirect-arg-temp, align 32, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: %2 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %2, ptr %indirect-arg-temp, i64 32, i1 false)
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <8 x float>, align 32
+ %y = load <8 x float>, ptr %0, align 32, !tbaa !4
+ store <8 x float> %y, ptr %indirect-arg-temp, align 32, !tbaa !4
+ tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(<8 x float>) align 32 %indirect-arg-temp) #7
+ ret void
+}
+
+define dso_local void @v8f32_i32(ptr nocapture noundef readonly byval(<8 x float>) align 32 %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(ptr nocapture noundef readonly byval(<8 x float>) align 32 %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 32
+; CHECK-NEXT: %indirect-arg-temp = alloca <8 x float>, align 32
+; CHECK-NEXT: %x = load <8 x float>, ptr %0, align 32, !tbaa !3
+; CHECK-NEXT: store <8 x float> %x, ptr %indirect-arg-temp, align 32, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 32, i1 false)
+; CHECK-NEXT: %2 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %2, align 4
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <8 x float>, align 32
+ %x = load <8 x float>, ptr %0, align 32, !tbaa !4
+ store <8 x float> %x, ptr %indirect-arg-temp, align 32, !tbaa !4
+ tail call void (...) @vararg(ptr noundef nonnull byval(<8 x float>) align 32 %indirect-arg-temp, i32 noundef %y) #7
+ ret void
+}
+
+define dso_local void @i32_v16f32(i32 noundef %x, ptr nocapture noundef readonly byval(<16 x float>) align 64 %0) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, ptr nocapture noundef readonly byval(<16 x float>) align 64 %0) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 64
+; CHECK-NEXT: %indirect-arg-temp = alloca <16 x float>, align 64
+; CHECK-NEXT: %y = load <16 x float>, ptr %0, align 64, !tbaa !3
+; CHECK-NEXT: store <16 x float> %y, ptr %indirect-arg-temp, align 64, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: %2 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %2, ptr %indirect-arg-temp, i64 64, i1 false)
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <16 x float>, align 64
+ %y = load <16 x float>, ptr %0, align 64, !tbaa !4
+ store <16 x float> %y, ptr %indirect-arg-temp, align 64, !tbaa !4
+ tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(<16 x float>) align 64 %indirect-arg-temp) #7
+ ret void
+}
+
+define dso_local void @v16f32_i32(ptr nocapture noundef readonly byval(<16 x float>) align 64 %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(ptr nocapture noundef readonly byval(<16 x float>) align 64 %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 64
+; CHECK-NEXT: %indirect-arg-temp = alloca <16 x float>, align 64
+; CHECK-NEXT: %x = load <16 x float>, ptr %0, align 64, !tbaa !3
+; CHECK-NEXT: store <16 x float> %x, ptr %indirect-arg-temp, align 64, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 64, i1 false)
+; CHECK-NEXT: %2 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %2, align 4
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <16 x float>, align 64
+ %x = load <16 x float>, ptr %0, align 64, !tbaa !4
+ store <16 x float> %x, ptr %indirect-arg-temp, align 64, !tbaa !4
+ tail call void (...) @vararg(ptr noundef nonnull byval(<16 x float>) align 64 %indirect-arg-temp, i32 noundef %y) #7
+ ret void
+}
+
+define dso_local void @i32_v32f32(i32 noundef %x, ptr nocapture noundef readonly byval(<32 x float>) align 128 %0) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, ptr nocapture noundef readonly byval(<32 x float>) align 128 %0) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 128
+; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 128
+; CHECK-NEXT: %y = load <32 x float>, ptr %0, align 128, !tbaa !3
+; CHECK-NEXT: store <32 x float> %y, ptr %indirect-arg-temp, align 128, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i32 %x, ptr %1, align 4
+; CHECK-NEXT: %2 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 2
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %2, ptr %indirect-arg-temp, i64 128, i1 false)
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <32 x float>, align 128
+ %y = load <32 x float>, ptr %0, align 128, !tbaa !4
+ store <32 x float> %y, ptr %indirect-arg-temp, align 128, !tbaa !4
+ tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(<32 x float>) align 128 %indirect-arg-temp) #7
+ ret void
+}
+
+define dso_local void @v32f32_i32(ptr nocapture noundef readonly byval(<32 x float>) align 128 %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(ptr nocapture noundef readonly byval(<32 x float>) align 128 %0, i32 noundef %y) local_unnamed_addr #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 128
+; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 128
+; CHECK-NEXT: %x = load <32 x float>, ptr %0, align 128, !tbaa !3
+; CHECK-NEXT: store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !3
+; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 128, i1 false)
+; CHECK-NEXT: %2 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 %y, ptr %2, align 4
+; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8
+; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0
+; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4
+; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1
+; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4
+; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2
+; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8
+; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3
+; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8
+; CHECK-NEXT: call void @wrapped(ptr %va_list) #8
+; CHECK-NEXT: ret void
+;
+entry:
+ %indirect-arg-temp = alloca <32 x float>, align 128
+ %x = load <32 x float>, ptr %0, align 128, !tbaa !4
+ store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !4
+ tail call void (...) @vararg(ptr noundef nonnull byval(<32 x float>) align 128 %indirect-arg-temp, i32 noundef %y) #7
+ ret void
+}
+
+attributes #0 = { nounwind "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn }
+attributes #3 = { "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #4 = { nounwind "min-legal-vector-width"="128" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #5 = { nounwind }
+attributes #6 = { nobuiltin nounwind "no-builtins" }
+attributes #7 = { nobuiltin "no-builtins" }
+
+!llvm.module.flags = !{!0, !1, !2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"PIE Level", i32 2}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
index 2003e86e90b96d..fc82a4f97dbb10 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
@@ -33,6 +33,7 @@ static_library("IPO") {
"DeadArgumentElimination.cpp",
"ElimAvailExtern.cpp",
"EmbedBitcodePass.cpp",
+ "ExpandVariadics.cpp",
"ExtractGV.cpp",
"ForceFunctionAttrs.cpp",
"FunctionAttrs.cpp",
More information about the llvm-commits
mailing list