[clang] [llvm] [transforms] Inline simple variadic functions (PR #81058)

Wed Feb 7 15:50:09 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: Jon Chesterfield (JonChesterfield)

<details>
<summary>Changes</summary>

This is a MVP style commit for eliminating variadic functions as an IR pass. It replaces the ... with a va_list value.

The implementation strategy is to divide the transform into orthogonal parts that can be tested independently. In particular this seeks to decouple target specific complexity from llvm modelling complexity. The original motivation was to provide variadic function support to amdgpu and nvptx. That's tested and working in the downstream branch from which this patch was extracted.

This commit introduces the target specific part of the transform, implemented for 32 and 64 bit x86. Test coverage of more interesting types from clang requires either pull/80002 or complicated filecheck patterns. It rewrites very simple variadic calls into one to an equivalent function taking a va_list. 

Subsequent patches are needed to:
1. rewrite variadic functions that don't happen to already match this pattern
2. add more targets - aarch64, amdgpu, nvptx are planned at present
3. call the pass from codegen to expand all variadic functions for GPU targets
4. close the holes in test coverage (and implementation)
5. add this transform to one of the optimisation pipelines, maybe O1
6. maybe expand va_arg instructions (for the GPUs, or maybe from alternative front ends)

This patch is known to be incomplete. It is posted in this form to check the strategy and provide a mostly-correct baseline to extend. Initial discussion is at https://discourse.llvm.org/t/rfc-desugar-variadics-codegen-for-new-targets-optimisation-for-existing/72854

---

Patch is 176.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/81058.diff


16 Files Affected:

- (added) clang/test/CodeGen/expand-variadic-call.c (+273) 
- (added) clang/test/CodeGen/variadic-wrapper-removal.c (+86) 
- (added) clang/test/CodeGenCXX/inline-then-fold-variadics.cpp (+117) 
- (modified) llvm/include/llvm/CodeGen/Passes.h (+4) 
- (modified) llvm/include/llvm/InitializePasses.h (+1) 
- (added) llvm/include/llvm/Transforms/IPO/ExpandVariadics.h (+17) 
- (modified) llvm/lib/Passes/PassBuilder.cpp (+1) 
- (modified) llvm/lib/Passes/PassRegistry.def (+1) 
- (modified) llvm/lib/Transforms/IPO/CMakeLists.txt (+1) 
- (added) llvm/lib/Transforms/IPO/ExpandVariadics.cpp (+723) 
- (added) llvm/test/CodeGen/X86/expand-variadic-call-i386-darwin.ll (+385) 
- (added) llvm/test/CodeGen/X86/expand-variadic-call-i386-linux.ll (+385) 
- (added) llvm/test/CodeGen/X86/expand-variadic-call-i686-msvc.ll (+402) 
- (added) llvm/test/CodeGen/X86/expand-variadic-call-x64-darwin.ll (+589) 
- (added) llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll (+589) 
- (modified) llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn (+1) 


``````````diff

diff --git a/clang/test/CodeGen/expand-variadic-call.c b/clang/test/CodeGen/expand-variadic-call.c
new file mode 100644
index 00000000000000..fa2b984bec08a5
--- /dev/null
+++ b/clang/test/CodeGen/expand-variadic-call.c
@@ -0,0 +1,273 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-cpu x86-64-v4 -std=c23 -O1 -ffreestanding -emit-llvm -o - %s | FileCheck %s
+
+// This test sanity checks calling a variadic function with the expansion transform disabled.
+// The IR test cases {arch}/expand-variadic-call-*.ll correspond to IR generated from this test case.
+
+typedef __builtin_va_list va_list;
+#define va_copy(dest, src) __builtin_va_copy(dest, src)
+#define va_start(ap, ...) __builtin_va_start(ap, 0)
+#define va_end(ap) __builtin_va_end(ap)
+#define va_arg(ap, type) __builtin_va_arg(ap, type)
+
+// 32 bit x86 alignment uses getTypeStackAlign for special cases
+// Whitebox testing.
+// Needs a type >= 16 which is either a simd or a struct containing a simd
+// darwinvectorabi should force 4 bytes
+// linux vectors with align 16/32/64 return that alignment
+
+
+void wrapped(va_list);
+
+// CHECK-LABEL: @codegen_for_copy(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CP:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[CP]]) #[[ATTR7:[0-9]+]]
+// CHECK-NEXT:    call void @llvm.va_copy(ptr nonnull [[CP]], ptr [[X:%.*]])
+// CHECK-NEXT:    call void @wrapped(ptr noundef nonnull [[CP]]) #[[ATTR8:[0-9]+]]
+// CHECK-NEXT:    call void @llvm.va_end(ptr [[CP]])
+// CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[CP]]) #[[ATTR7]]
+// CHECK-NEXT:    ret void
+//
+void codegen_for_copy(va_list x)
+{
+  va_list cp;
+  va_copy(cp, x);
+  wrapped(cp);
+  va_end(cp);
+}
+
+
+// CHECK-LABEL: @vararg(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VA:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[VA]]) #[[ATTR7]]
+// CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[VA]])
+// CHECK-NEXT:    call void @wrapped(ptr noundef nonnull [[VA]]) #[[ATTR8]]
+// CHECK-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[VA]]) #[[ATTR7]]
+// CHECK-NEXT:    ret void
+//
+ void vararg(...)
+{
+  va_list va;
+  __builtin_va_start(va, 0);
+  wrapped(va);
+  va_end(va);
+}
+
+// vectors with alignment 16/32/64 are natively aligned on linux x86
+// v32f32 would be a m1024 type, larger than x64 defines at time of writing
+typedef int i32;
+typedef float v4f32 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef float v8f32 __attribute__((__vector_size__(32), __aligned__(32)));
+typedef float v16f32 __attribute__((__vector_size__(64), __aligned__(64)));
+typedef float v32f32 __attribute__((__vector_size__(128), __aligned__(128)));
+
+
+// Pass a single value to wrapped() via vararg(...)
+// CHECK-LABEL: @single_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void (...) @vararg(i32 noundef [[X:%.*]]) #[[ATTR9:[0-9]+]]
+// CHECK-NEXT:    ret void
+//
+void single_i32(i32 x)
+{
+  vararg(x);
+}
+
+// CHECK-LABEL: @single_double(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void (...) @vararg(double noundef [[X:%.*]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void single_double(double x)
+{
+  vararg(x);
+}
+
+// CHECK-LABEL: @single_v4f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void (...) @vararg(<4 x float> noundef [[X:%.*]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void single_v4f32(v4f32 x)
+{
+  vararg(x);
+}
+
+// CHECK-LABEL: @single_v8f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void (...) @vararg(<8 x float> noundef [[X:%.*]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void single_v8f32(v8f32 x)
+{
+  vararg(x);
+}
+
+// CHECK-LABEL: @single_v16f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void (...) @vararg(<16 x float> noundef [[X:%.*]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void single_v16f32(v16f32 x)
+{
+  vararg(x);
+}
+
+// CHECK-LABEL: @single_v32f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca <32 x float>, align 128
+// CHECK-NEXT:    [[X:%.*]] = load <32 x float>, ptr [[TMP0:%.*]], align 128, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT:    store <32 x float> [[X]], ptr [[INDIRECT_ARG_TEMP]], align 128, !tbaa [[TBAA2]]
+// CHECK-NEXT:    tail call void (...) @vararg(ptr noundef nonnull byval(<32 x float>) align 128 [[INDIRECT_ARG_TEMP]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void single_v32f32(v32f32 x)
+{
+  vararg(x);
+}
+
+
+
+// CHECK-LABEL: @i32_double(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void (...) @vararg(i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void i32_double(i32 x, double y)
+{
+  vararg(x, y);
+}
+
+// CHECK-LABEL: @double_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void (...) @vararg(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void double_i32(double x, i32 y)
+{
+  vararg(x, y);
+}
+
+
+// A struct used by libc variadic tests
+
+typedef struct {
+  char c;
+  short s;
+  int i;
+  long l;
+  float f;
+  double d;
+}  libcS;
+
+// CHECK-LABEL: @i32_libcS(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void (...) @vararg(i32 noundef [[X:%.*]], ptr noundef nonnull byval([[STRUCT_LIBCS:%.*]]) align 8 [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void i32_libcS(i32 x, libcS y)
+{
+  vararg(x, y);
+}
+
+// CHECK-LABEL: @libcS_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void (...) @vararg(ptr noundef nonnull byval([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void libcS_i32(libcS x, i32 y)
+{
+  vararg(x, y);
+}
+
+
+// CHECK-LABEL: @i32_v4f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void (...) @vararg(i32 noundef [[X:%.*]], <4 x float> noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void i32_v4f32(i32 x, v4f32 y)
+{
+  vararg(x, y);
+}
+
+// CHECK-LABEL: @v4f32_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void (...) @vararg(<4 x float> noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void v4f32_i32(v4f32 x, i32 y)
+{
+  vararg(x, y);
+}
+
+// CHECK-LABEL: @i32_v8f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void (...) @vararg(i32 noundef [[X:%.*]], <8 x float> noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void i32_v8f32(i32 x, v8f32 y)
+{
+  vararg(x, y);
+}
+
+// CHECK-LABEL: @v8f32_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void (...) @vararg(<8 x float> noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void v8f32_i32(v8f32 x, i32 y)
+{
+  vararg(x, y);
+}
+
+// CHECK-LABEL: @i32_v16f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void (...) @vararg(i32 noundef [[X:%.*]], <16 x float> noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void i32_v16f32(i32 x, v16f32 y)
+{
+  vararg(x, y);
+}
+
+// CHECK-LABEL: @v16f32_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void (...) @vararg(<16 x float> noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void v16f32_i32(v16f32 x, i32 y)
+{
+  vararg(x, y);
+}
+
+// CHECK-LABEL: @i32_v32f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca <32 x float>, align 128
+// CHECK-NEXT:    [[Y:%.*]] = load <32 x float>, ptr [[TMP0:%.*]], align 128, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x float> [[Y]], ptr [[INDIRECT_ARG_TEMP]], align 128, !tbaa [[TBAA2]]
+// CHECK-NEXT:    tail call void (...) @vararg(i32 noundef [[X:%.*]], ptr noundef nonnull byval(<32 x float>) align 128 [[INDIRECT_ARG_TEMP]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void i32_v32f32(i32 x, v32f32 y)
+{
+  vararg(x, y);
+}
+
+// CHECK-LABEL: @v32f32_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca <32 x float>, align 128
+// CHECK-NEXT:    [[X:%.*]] = load <32 x float>, ptr [[TMP0:%.*]], align 128, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <32 x float> [[X]], ptr [[INDIRECT_ARG_TEMP]], align 128, !tbaa [[TBAA2]]
+// CHECK-NEXT:    tail call void (...) @vararg(ptr noundef nonnull byval(<32 x float>) align 128 [[INDIRECT_ARG_TEMP]], i32 noundef [[Y:%.*]]) #[[ATTR9]]
+// CHECK-NEXT:    ret void
+//
+void v32f32_i32(v32f32 x, i32 y)
+{
+  vararg(x, y);
+}
diff --git a/clang/test/CodeGen/variadic-wrapper-removal.c b/clang/test/CodeGen/variadic-wrapper-removal.c
new file mode 100644
index 00000000000000..da41dde16f3d73
--- /dev/null
+++ b/clang/test/CodeGen/variadic-wrapper-removal.c
@@ -0,0 +1,86 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O1 -emit-llvm -o - %s | opt --passes=expand-variadics -S | FileCheck %s
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu -O1 -emit-llvm -o - %s | opt --passes=expand-variadics -S | FileCheck %s
+
+// neither arm arch is implemented yet, leaving it here as a reminder
+// armv6 is a ptr as far as the struct is concerned, but possibly also a [1 x i32] passed by value
+// that seems insistent, maybe leave 32 bit arm alone for now
+// aarch64 is a struct of five things passed using byval memcpy
+
+// R-N: %clang_cc1 -triple=armv6-none--eabi -O1 -emit-llvm -o - %s | opt --passes=expand-variadics -S | FileCheck %s
+
+// R-N: %clang_cc1 -triple=aarch64-none-linux-gnu -O1 -emit-llvm -o - %s | opt --passes=expand-variadics -S | FileCheck %s
+
+
+
+// expand-variadics rewrites calls to variadic functions into calls to
+// equivalent functions that take a va_list argument. A property of the
+// implementation is that said "equivalent function" may be a pre-existing one.
+// This is equivalent to inlining a sufficiently simple variadic wrapper.
+
+#include <stdarg.h>
+
+typedef int FILE; // close enough for this test
+
+// fprintf is sometimes implemented as a call to vfprintf. That fits the
+// pattern the transform pass recognises - given an implementation of fprintf
+// in the IR module, calls to it can be rewritten into calls into vfprintf.
+
+// CHECK-LABEL: define{{.*}} i32 @fprintf(
+// CHECK-LABEL: define{{.*}} i32 @call_fprintf(
+// CHECK-NOT:   @fprintf
+// CHECK:       @vfprintf
+int vfprintf(FILE *restrict f, const char *restrict fmt, va_list ap);
+int fprintf(FILE *restrict f, const char *restrict fmt, ...)
+{
+  int ret;
+  va_list ap;
+  va_start(ap, fmt);
+  ret = vfprintf(f, fmt, ap);
+  va_end(ap);
+  return ret;
+}
+int call_fprintf(FILE *f)
+{
+  int x = 42;
+  double y = 3.14;
+  return fprintf(f, "int %d dbl %g\n", x, y);
+}
+
+// Void return type is also OK
+
+// CHECK-LABEL: define{{.*}} void @no_result(
+// CHECK-LABEL: define{{.*}} void @call_no_result(
+// CHECK-NOT:   @no_result
+// CHECK:       @vno_result
+void vno_result(const char * fmt, va_list);
+void no_result(const char * fmt, ...)
+{
+  va_list ap;
+  va_start(ap, fmt);
+  vno_result(fmt, ap);
+  va_end(ap);
+}
+void call_no_result(FILE *f)
+{
+  int x = 101;
+  no_result("", x);
+}
+
+// The vaend in the forwarding implementation is optional where it's a no-op
+
+// CHECK-LABEL: define{{.*}} i32 @no_vaend(
+// CHECK-LABEL: define{{.*}} i32 @call_no_vaend(
+// CHECK-NOT:   @no_vaend
+// CHECK:       @vno_vaend
+int vno_vaend(int x, va_list);
+int no_vaend(int x, ...)
+{
+  va_list ap;
+  va_start(ap, x);
+  return vno_vaend(x, ap);
+}
+int call_no_vaend(int x)
+{
+  return no_vaend(x, 10, 2.5f);
+}
diff --git a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
new file mode 100644
index 00000000000000..cf436ead77a2cb
--- /dev/null
+++ b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp
@@ -0,0 +1,117 @@
+// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default<O1>" -S | FileCheck %s --check-prefixes=CHECK,X86Linux
+
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default<O1>" -S | FileCheck %s --check-prefixes=CHECK,X64SystemV
+
+// RUN: %clang_cc1 -triple i386-apple-darwin -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default<O1>" -S | FileCheck %s --check-prefixes=CHECK,X86Darwin
+
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default<O1>" -S | FileCheck %s --check-prefixes=CHECK,X64SystemV
+
+// RUN: %clang_cc1 -triple i686-windows-msvc -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default<O1>" -S | FileCheck %s --check-prefixes=CHECK,X86Windows
+
+// 64 bit windows va_arg passes most types indirectly but the call instruction uses the types by value
+// ___: %clang_cc1 -triple x86_64-pc-windows-msvc -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default<O1>" -S | FileCheck %s --check-prefixes=CHECK
+
+// Checks for consistency between clang and expand-variadics
+// 1. Use clang to lower va_arg
+// 2. Use expand-variadics to lower the rest of the variadic operations
+// 3. Use opt -O1 to simplify the result for simpler filecheck patterns
+// The simplification will fail when the two are not consistent, modulo bugs elsewhere.
+
+#include <stdarg.h>
+
+// This test can be simplified when expand-variadics is extended to apply to more patterns.
+// The first_valist and second_valist functions can then be inlined, either in the test or
+// by enabling optimisaton passes in the clang invocation.
+// The explicit instcombine pass canonicalises the variadic function IR.
+
+// More complicated tests need instcombine of ptrmask to land first.
+
+template <typename X, typename Y>
+static X first_valist(va_list va) {
+  return va_arg(va, X);
+}
+
+template <typename X, typename Y>
+static X first(...) {
+  va_list va;
+  __builtin_va_start(va, 0);
+  return first_valist<X,Y>(va);
+}
+
+template <typename X, typename Y>
+static Y second_valist(va_list va) {
+  va_arg(va, X);
+  Y r = va_arg(va, Y);
+  return r;
+}
+
+
+template <typename X, typename Y>
+static Y second(...) {
+  va_list va;
+  __builtin_va_start(va, 0);
+  return second_valist<X,Y>(va);
+}
+
+extern "C"
+{
+// CHECK-LABEL: define{{.*}} i32 @first_i32_i32(i32{{.*}} %x, i32{{.*}} %y)
+// CHECK:       entry:
+// CHECK:       ret i32 %x
+int first_i32_i32(int x, int y)
+{
+  return first<int,int>(x, y);
+}
+
+// CHECK-LABEL: define{{.*}} i32 @second_i32_i32(i32{{.*}} %x, i32{{.*}} %y)
+// CHECK:       entry:
+// CHECK:       ret i32 %y
+int second_i32_i32(int x, int y)
+{
+  return second<int,int>(x, y);
+}
+}
+
+// Permutations of an int and a double
+extern "C"
+{
+// CHECK-LABEL: define{{.*}} i32 @first_i32_f64(i32{{.*}} %x, double{{.*}} %y)
+// CHECK:       entry:
+// CHECK:       ret i32 %x
+int first_i32_f64(int x, double y)
+{
+  return first<int,double>(x, y);
+}
+  
+// CHECK-LABEL: define{{.*}} double @second_i32_f64(i32{{.*}} %x, double{{.*}} %y)
+// CHECK:       entry:
+
+// X86Linux:    ret double %y
+// X64SystemV:  ret double %y
+// X86Darwin:   ret double %y
+// X86Windows:  [[TMP0:%.*]] = alloca <{ [4 x i8], double }>, align 4
+// X86Windows:  [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4
+// X86Windows:  store double %y, ptr [[TMP1]], align 4
+// X86Windows:  [[TMP2:%.*]] = load double, ptr [[TMP0]], align 4
+// X86Windows:  ret double [[TMP2]]
+double second_i32_f64(int x, double y)
+{
+  return second<int,double>(x, y);
+}
+
+// CHECK-LABEL: define{{.*}} double @first_f64_i32(double{{.*}} %x, i32{{.*}} %y)
+// CHECK:       entry:
+// CHECK:       ret double %x
+double first_f64_i32(double x, int y)
+{
+  return first<double,int>(x, y);
+}
+
+// CHECK-LABEL: define{{.*}} i32 @second_f64_i32(double{{.*}} %x, i32{{.*}} %y)
+// CHECK:       entry:
+// CHECK:       ret i32 %y
+int second_f64_i32(double x, int y)
+{
+  return second<double,int>(x, y);
+}   
+}
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index bbfb8a0dbe26a4..fe3208df7a23b2 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -600,6 +600,10 @@ namespace llvm {
 
   /// Lowers KCFI operand bundles for indirect calls.
   FunctionPass *createKCFIPass();
+
+  // Inline variadic functions and expand variadic intrinsics.
+  ModulePass *createExpandVariadicsPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 3db639a6872407..6487d0a5e26d1b 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -106,6 +106,7 @@ void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
 void initializeExpandMemCmpLegacyPassPass(PassRegistry &);
 void initializeExpandPostRAPass(PassRegistry&);
 void initializeExpandReductionsPass(PassRegistry&);
+void initializeExpandVariadicsPass(PassRegistry &);
 void initializeExpandVectorPredicationPass(PassRegistry &);
 void initializeExternalAAWrapperPassPass(PassRegistry&);
 void initializeFEntryInserterPass(PassRegistry&);
diff --git a/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
new file mode 100644
index 00000000000000..e7ffe343b940e9
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
@@ -0,0 +1,17 @@
+#ifndef LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
+#define LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Module;
+
+class ExpandVariadicsPass : public PassInfoMixin<ExpandVariadicsPass> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index c934ec42f6eb15..624fffd233ce56 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -131,6 +131,7 @@
 #include "llvm/Transforms/IPO/DeadArgumentElimination.h"
 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
 #include "llvm/Transforms/IPO/EmbedBitcodePass.h"
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
 #include "llvm/Transforms/IPO/FunctionImport.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 44511800ccff8d..4ea9493208315a 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -59,6 +59,7 @@ MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass())
 MODULE_PASS("dxil-upgrade", DXILUpgradePass())
 MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
 MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))
+MODULE_PASS("expand-variadics", ExpandVariadicsPass())
 MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
 MODULE_PASS("function-import", FunctionImportPass())
 MODULE_PASS("globalopt", GlobalOptPass())
diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
index 034f1587ae8df4..b8bd0be91d2232 100644
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -12,6 +12,7 @@ add_llvm_component_library(LLVMipo
   DeadArgumentElimination.cpp
   ElimAvailExtern.cpp
   EmbedBitcodePass.cpp
+  ExpandVariadics.cpp
   ExtractGV.cpp
   ForceFunctionAttrs.cpp
   FunctionAttrs.cpp
diff --git a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
new file mode 100644
index 00000000000000..4b4c635cef5092
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
@@ -0,0 +1,723 @@
+//===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// S...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/81058