[clang] [llvm] [CodeGen][LLVM] Make the `va_list` related intrinsics generic. (PR #85460)

Alex Voicu via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 15 13:25:50 PDT 2024


https://github.com/AlexVlx created https://github.com/llvm/llvm-project/pull/85460

Currently, the builtins used for implementing `va_list` handling unconditionally take their arguments as unqualified `ptr`s i.e. pointers to AS 0. This does not work for targets where the default AS is not 0 or AS 0 is not a viable AS (for example, a target might choose 0 to represent the constant address space). This patch changes the builtins' signature to take generic `anyptr` args, which corrects this issue. It is noisy due to the number of tests affected. A test for an upstream target which does not use 0 as its default AS (SPIRV for HIP device compilations) is added as well.

>From bd5cde52a9e355643e2fb5b26278a2af47338891 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 15 Mar 2024 19:37:02 +0000
Subject: [PATCH] Make the `va_list` related intrinsics generic.

---
 clang/lib/CodeGen/CGBuiltin.cpp               |  7 ++-
 clang/test/CodeGen/CSKY/csky-abi.c            | 16 +++----
 clang/test/CodeGen/LoongArch/abi-lp64d.c      |  4 +-
 .../test/CodeGen/PowerPC/aix-altivec-vaargs.c |  4 +-
 clang/test/CodeGen/PowerPC/aix-vaargs.c       | 14 +++---
 .../CodeGen/PowerPC/ppc64le-varargs-f128.c    | 18 +++----
 clang/test/CodeGen/RISCV/riscv32-vararg.c     | 40 ++++++++--------
 clang/test/CodeGen/RISCV/riscv64-vararg.c     | 16 +++----
 clang/test/CodeGen/WebAssembly/wasm-varargs.c | 16 +++----
 clang/test/CodeGen/X86/va-arg-sse.c           |  4 +-
 clang/test/CodeGen/aarch64-ABI-align-packed.c | 14 +++---
 clang/test/CodeGen/aarch64-varargs.c          |  2 +-
 clang/test/CodeGen/arm-varargs.c              |  2 +-
 clang/test/CodeGen/hexagon-linux-vararg.c     |  2 +-
 clang/test/CodeGen/mips-varargs.c             | 16 +++----
 clang/test/CodeGen/pr53127.cpp                |  4 +-
 ...rargs-with-nonzero-default-address-space.c | 22 +++++++++
 clang/test/CodeGen/xcore-abi.c                |  2 +-
 clang/test/CodeGenCXX/ext-int.cpp             | 12 ++---
 clang/test/CodeGenCXX/ibm128-declarations.cpp |  4 +-
 clang/test/Modules/codegen.test               |  2 +-
 llvm/include/llvm/IR/Intrinsics.td            | 13 +++--
 llvm/test/Bitcode/compatibility-3.6.ll        | 16 +++----
 llvm/test/Bitcode/compatibility-3.7.ll        | 16 +++----
 llvm/test/Bitcode/compatibility-3.8.ll        | 16 +++----
 llvm/test/Bitcode/compatibility-3.9.ll        | 16 +++----
 llvm/test/Bitcode/compatibility-4.0.ll        | 16 +++----
 llvm/test/Bitcode/compatibility-5.0.ll        | 16 +++----
 llvm/test/Bitcode/compatibility-6.0.ll        | 16 +++----
 llvm/test/Bitcode/compatibility.ll            | 18 +++----
 llvm/test/Bitcode/thinlto-function-summary.ll |  6 +--
 .../Bitcode/variableArgumentIntrinsic.3.2.ll  |  8 ++--
 .../MemorySanitizer/AArch64/vararg_shadow.ll  | 48 +++++++++----------
 .../MemorySanitizer/SystemZ/vararg-kernel.ll  |  2 +-
 .../MemorySanitizer/X86/vararg_shadow.ll      | 48 +++++++++----------
 .../MemorySanitizer/msan_debug_info.ll        |  2 +-
 .../Transforms/GlobalOpt/inalloca-varargs.ll  |  2 +-
 .../Transforms/IROutliner/illegal-vaarg.ll    | 12 ++---
 .../IROutliner/outline-vaarg-intrinsic.ll     |  8 ++--
 llvm/test/Transforms/NewGVN/pr31483.ll        |  2 +-
 .../Transforms/Reassociate/vaarg_movable.ll   |  4 +-
 41 files changed, 267 insertions(+), 239 deletions(-)
 create mode 100644 clang/test/CodeGen/varargs-with-nonzero-default-address-space.c

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b09bf563622089..75fd036e8654c8 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -792,7 +792,8 @@ EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
 
 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
-  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
+  return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
+                            ArgValue);
 }
 
 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
@@ -3018,7 +3019,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__builtin_va_copy: {
     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
-    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), {DstPtr, SrcPtr});
+    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy,
+                                        {DstPtr->getType(), SrcPtr->getType()}),
+                       {DstPtr, SrcPtr});
     return RValue::get(nullptr);
   }
   case Builtin::BIabs:
diff --git a/clang/test/CodeGen/CSKY/csky-abi.c b/clang/test/CodeGen/CSKY/csky-abi.c
index 2e549376ba9330..29ed661aea75d9 100644
--- a/clang/test/CodeGen/CSKY/csky-abi.c
+++ b/clang/test/CodeGen/CSKY/csky-abi.c
@@ -185,13 +185,13 @@ void f_va_caller(void) {
 // CHECK:   [[VA:%.*]] = alloca ptr, align 4
 // CHECK:   [[V:%.*]] = alloca i32, align 4
 // CHECK:   store ptr %fmt, ptr [[FMT_ADDR]], align 4
-// CHECK:   call void @llvm.va_start(ptr [[VA]])
+// CHECK:   call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK:   [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK:   [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
 // CHECK:   store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
 // CHECK:   [[TMP1:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
 // CHECK:   store i32 [[TMP1]], ptr [[V]], align 4
-// CHECK:   call void @llvm.va_end(ptr [[VA]])
+// CHECK:   call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK:   [[TMP2:%.*]] = load i32, ptr [[V]], align 4
 // CHECK:   ret i32 [[TMP2]]
 // CHECK: }
@@ -210,13 +210,13 @@ int f_va_1(char *fmt, ...) {
 // CHECK-NEXT:    [[VA:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[V:%.*]] = alloca double, align 4
 // CHECK-NEXT:    store ptr [[FMT:%.*]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8
 // CHECK-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = load double, ptr [[ARGP_CUR]], align 4
 // CHECK-NEXT:    store double [[TMP4]], ptr [[V]], align 4
-// CHECK-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-NEXT:    [[TMP5:%.*]] = load double, ptr [[V]], align 4
 // CHECK-NEXT:    ret double [[TMP5]]
 double f_va_2(char *fmt, ...) {
@@ -236,7 +236,7 @@ double f_va_2(char *fmt, ...) {
 // CHECK-NEXT:    [[W:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[X:%.*]] = alloca double, align 4
 // CHECK-NEXT:    store ptr [[FMT:%.*]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8
 // CHECK-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
@@ -252,7 +252,7 @@ double f_va_2(char *fmt, ...) {
 // CHECK-NEXT:    store ptr [[ARGP_NEXT5]], ptr [[VA]], align 4
 // CHECK-NEXT:    [[TMP11:%.*]] = load double, ptr [[ARGP_CUR4]], align 4
 // CHECK-NEXT:    store double [[TMP11]], ptr [[X]], align 4
-// CHECK-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-NEXT:    [[TMP12:%.*]] = load double, ptr [[V]], align 4
 // CHECK-NEXT:    [[TMP13:%.*]] = load double, ptr [[X]], align 4
 // CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]]
@@ -279,7 +279,7 @@ double f_va_3(char *fmt, ...) {
 // CHECK-NEXT:    [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4
 // CHECK-NEXT:    [[RET:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store ptr [[FMT:%.*]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
 // CHECK-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
@@ -302,7 +302,7 @@ double f_va_3(char *fmt, ...) {
 // CHECK-NEXT:    [[ARGP_NEXT9:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR8]], i32 16
 // CHECK-NEXT:    store ptr [[ARGP_NEXT9]], ptr [[VA]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[ARGP_CUR8]], i32 16, i1 false)
-// CHECK-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 int f_va_4(char *fmt, ...) {
   __builtin_va_list va;
 
diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d.c b/clang/test/CodeGen/LoongArch/abi-lp64d.c
index 66b480a7f06894..fc7f1eada586b3 100644
--- a/clang/test/CodeGen/LoongArch/abi-lp64d.c
+++ b/clang/test/CodeGen/LoongArch/abi-lp64d.c
@@ -449,13 +449,13 @@ void f_va_caller(void) {
 // CHECK-NEXT:    [[VA:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[V:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store ptr [[FMT:%.*]], ptr [[FMT_ADDR]], align 8
-// CHECK-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8
 // CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8
 // CHECK-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 8
 // CHECK-NEXT:    store i32 [[TMP0]], ptr [[V]], align 4
-// CHECK-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[V]], align 4
 // CHECK-NEXT:    ret i32 [[TMP1]]
 int f_va_int(char *fmt, ...) {
diff --git a/clang/test/CodeGen/PowerPC/aix-altivec-vaargs.c b/clang/test/CodeGen/PowerPC/aix-altivec-vaargs.c
index 03182423a422c1..b3f1e93b639440 100644
--- a/clang/test/CodeGen/PowerPC/aix-altivec-vaargs.c
+++ b/clang/test/CodeGen/PowerPC/aix-altivec-vaargs.c
@@ -17,7 +17,7 @@ vector double vector_varargs(int count, ...) {
 }
 
 // CHECK:         %arg_list = alloca ptr
-// CHECK:         call void @llvm.va_start(ptr %arg_list)
+// CHECK:         call void @llvm.va_start.p0(ptr %arg_list)
 
 // AIX32:       for.body:
 // AIX32-NEXT:    %argp.cur = load ptr, ptr %arg_list, align 4
@@ -41,4 +41,4 @@ vector double vector_varargs(int count, ...) {
 
 
 // CHECK:      for.end:
-// CHECK:        call void @llvm.va_end(ptr %arg_list)
+// CHECK:        call void @llvm.va_end.p0(ptr %arg_list)
diff --git a/clang/test/CodeGen/PowerPC/aix-vaargs.c b/clang/test/CodeGen/PowerPC/aix-vaargs.c
index 8b8417d315a504..8637f9cafe6470 100644
--- a/clang/test/CodeGen/PowerPC/aix-vaargs.c
+++ b/clang/test/CodeGen/PowerPC/aix-vaargs.c
@@ -35,7 +35,7 @@ void testva (int n, ...) {
 
 // CHECK-NEXT:  %v = alloca i32, align 4
 // CHECK-NEXT:  store i32 %n, ptr %n.addr, align 4
-// CHECK-NEXT:  call void @llvm.va_start(ptr %ap)
+// CHECK-NEXT:  call void @llvm.va_start.p0(ptr %ap)
 
 // AIX32-NEXT:  %argp.cur = load ptr, ptr %ap, align 4
 // AIX32-NEXT:  %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 16
@@ -48,7 +48,7 @@ void testva (int n, ...) {
 // AIX32-NEXT:  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %t, ptr align 4 %argp.cur, i32 16, i1 false)
 // AIX64-NEXT:  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %t, ptr align 8 %argp.cur, i64 16, i1 false)
 
-// CHECK-NEXT:  call void @llvm.va_copy(ptr %ap2, ptr %ap)
+// CHECK-NEXT:  call void @llvm.va_copy.p0.p0(ptr %ap2, ptr %ap)
 
 // AIX32-NEXT:  %argp.cur1 = load ptr, ptr %ap2, align 4
 // AIX32-NEXT:  %argp.next2 = getelementptr inbounds i8, ptr %argp.cur1, i32 4
@@ -62,14 +62,14 @@ void testva (int n, ...) {
 // AIX64-NEXT:  %1 = load i32, ptr %0, align 4
 // AIX64-NEXT:  store i32 %1, ptr %v, align 4
 
-// CHECK-NEXT:  call void @llvm.va_end(ptr %ap2)
-// CHECK-NEXT:  call void @llvm.va_end(ptr %ap)
+// CHECK-NEXT:  call void @llvm.va_end.p0(ptr %ap2)
+// CHECK-NEXT:  call void @llvm.va_end.p0(ptr %ap)
 // CHECK-NEXT:  ret void
 
-// CHECK: declare void @llvm.va_start(ptr)
+// CHECK: declare void @llvm.va_start.p0(ptr)
 
 // AIX32: declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg)
 // AIX64: declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
 
-// CHECK: declare void @llvm.va_copy(ptr, ptr)
-// CHECK: declare void @llvm.va_end(ptr)
+// CHECK: declare void @llvm.va_copy.p0.p0(ptr, ptr)
+// CHECK: declare void @llvm.va_end.p0(ptr)
diff --git a/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c b/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c
index 396614fe5bac2f..2f5459d1bb9c4c 100644
--- a/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c
+++ b/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c
@@ -31,7 +31,7 @@ void foo_ls(ldbl128_s);
 // OMP-TARGET: call void @foo_ld(ppc_fp128 noundef %[[V3]])
 
 // OMP-HOST-LABEL: define{{.*}} void @omp(
-// OMP-HOST: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
+// OMP-HOST: call void @llvm.va_start.p0(ptr %[[AP:[0-9a-zA-Z_.]+]])
 // OMP-HOST: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]], align 8
 // OMP-HOST: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15
 // OMP-HOST: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16)
@@ -49,13 +49,13 @@ void omp(int n, ...) {
 }
 
 // IEEE-LABEL: define{{.*}} void @f128
-// IEEE: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
+// IEEE: call void @llvm.va_start.p0(ptr %[[AP:[0-9a-zA-Z_.]+]])
 // IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
 // IEEE: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15
 // IEEE: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16)
 // IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[ALIGN]], align 16
 // IEEE: call void @foo_fq(fp128 noundef %[[V4]])
-// IEEE: call void @llvm.va_end(ptr %[[AP]])
+// IEEE: call void @llvm.va_end.p0(ptr %[[AP]])
 void f128(int n, ...) {
   va_list ap;
   va_start(ap, n);
@@ -64,20 +64,20 @@ void f128(int n, ...) {
 }
 
 // IEEE-LABEL: define{{.*}} void @long_double
-// IEEE: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
+// IEEE: call void @llvm.va_start.p0(ptr %[[AP:[0-9a-zA-Z_.]+]])
 // IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
 // IEEE: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15
 // IEEE: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16)
 // IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[ALIGN]], align 16
 // IEEE: call void @foo_ld(fp128 noundef %[[V4]])
-// IEEE: call void @llvm.va_end(ptr %[[AP]])
+// IEEE: call void @llvm.va_end.p0(ptr %[[AP]])
 
 // IBM-LABEL: define{{.*}} void @long_double
-// IBM: call void @llvm.va_start(ptr  %[[AP:[0-9a-zA-Z_.]+]])
+// IBM: call void @llvm.va_start.p0(ptr  %[[AP:[0-9a-zA-Z_.]+]])
 // IBM: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
 // IBM: %[[V4:[0-9a-zA-Z_.]+]] = load ppc_fp128, ptr %[[CUR]], align 8
 // IBM: call void @foo_ld(ppc_fp128 noundef %[[V4]])
-// IBM: call void @llvm.va_end(ptr %[[AP]])
+// IBM: call void @llvm.va_end.p0(ptr %[[AP]])
 void long_double(int n, ...) {
   va_list ap;
   va_start(ap, n);
@@ -86,7 +86,7 @@ void long_double(int n, ...) {
 }
 
 // IEEE-LABEL: define{{.*}} void @long_double_struct
-// IEEE: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
+// IEEE: call void @llvm.va_start.p0(ptr %[[AP:[0-9a-zA-Z_.]+]])
 // IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
 // IEEE: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15
 // IEEE: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16)
@@ -96,7 +96,7 @@ void long_double(int n, ...) {
 // IEEE: %[[COERCE:[0-9a-zA-Z_.]+]] = getelementptr inbounds %struct.ldbl128_s, ptr %[[TMP]], i32 0, i32 0
 // IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[COERCE]], align 16
 // IEEE: call void @foo_ls(fp128 inreg %[[V4]])
-// IEEE: call void @llvm.va_end(ptr %[[AP]])
+// IEEE: call void @llvm.va_end.p0(ptr %[[AP]])
 void long_double_struct(int n, ...) {
   va_list ap;
   va_start(ap, n);
diff --git a/clang/test/CodeGen/RISCV/riscv32-vararg.c b/clang/test/CodeGen/RISCV/riscv32-vararg.c
index 1c4e41f2f54c8f..00e04eb894675e 100644
--- a/clang/test/CodeGen/RISCV/riscv32-vararg.c
+++ b/clang/test/CodeGen/RISCV/riscv32-vararg.c
@@ -80,13 +80,13 @@ void f_va_caller(void) {
 // CHECK-NEXT:    [[VA:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[V:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
 // CHECK-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
 // CHECK-NEXT:    store i32 [[TMP0]], ptr [[V]], align 4
-// CHECK-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[V]], align 4
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
@@ -111,7 +111,7 @@ int f_va_1(char *fmt, ...) {
 // CHECK-ILP32F-NEXT:    [[VA:%.*]] = alloca ptr, align 4
 // CHECK-ILP32F-NEXT:    [[V:%.*]] = alloca double, align 8
 // CHECK-ILP32F-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32F-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32F-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-ILP32F-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-ILP32F-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
 // CHECK-ILP32F-NEXT:    [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
@@ -119,7 +119,7 @@ int f_va_1(char *fmt, ...) {
 // CHECK-ILP32F-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
 // CHECK-ILP32F-NEXT:    [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
 // CHECK-ILP32F-NEXT:    store double [[TMP1]], ptr [[V]], align 8
-// CHECK-ILP32F-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32F-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-ILP32F-NEXT:    [[TMP2:%.*]] = load double, ptr [[V]], align 8
 // CHECK-ILP32F-NEXT:    ret double [[TMP2]]
 //
@@ -130,7 +130,7 @@ int f_va_1(char *fmt, ...) {
 // CHECK-ILP32D-NEXT:    [[VA:%.*]] = alloca ptr, align 4
 // CHECK-ILP32D-NEXT:    [[V:%.*]] = alloca double, align 8
 // CHECK-ILP32D-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32D-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32D-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-ILP32D-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-ILP32D-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
 // CHECK-ILP32D-NEXT:    [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
@@ -138,7 +138,7 @@ int f_va_1(char *fmt, ...) {
 // CHECK-ILP32D-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
 // CHECK-ILP32D-NEXT:    [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8
 // CHECK-ILP32D-NEXT:    store double [[TMP1]], ptr [[V]], align 8
-// CHECK-ILP32D-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32D-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-ILP32D-NEXT:    [[TMP2:%.*]] = load double, ptr [[V]], align 8
 // CHECK-ILP32D-NEXT:    ret double [[TMP2]]
 //
@@ -149,13 +149,13 @@ int f_va_1(char *fmt, ...) {
 // CHECK-ILP32E-NEXT:    [[VA:%.*]] = alloca ptr, align 4
 // CHECK-ILP32E-NEXT:    [[V:%.*]] = alloca double, align 8
 // CHECK-ILP32E-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32E-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32E-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-ILP32E-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-ILP32E-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8
 // CHECK-ILP32E-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
 // CHECK-ILP32E-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARGP_CUR]], align 4
 // CHECK-ILP32E-NEXT:    store double [[TMP0]], ptr [[V]], align 8
-// CHECK-ILP32E-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32E-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-ILP32E-NEXT:    [[TMP1:%.*]] = load double, ptr [[V]], align 8
 // CHECK-ILP32E-NEXT:    ret double [[TMP1]]
 //
@@ -180,7 +180,7 @@ double f_va_2(char *fmt, ...) {
 // CHECK-ILP32F-NEXT:    [[W:%.*]] = alloca i32, align 4
 // CHECK-ILP32F-NEXT:    [[X:%.*]] = alloca double, align 8
 // CHECK-ILP32F-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32F-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32F-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-ILP32F-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-ILP32F-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
 // CHECK-ILP32F-NEXT:    [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
@@ -200,7 +200,7 @@ double f_va_2(char *fmt, ...) {
 // CHECK-ILP32F-NEXT:    store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4
 // CHECK-ILP32F-NEXT:    [[TMP4:%.*]] = load double, ptr [[ARGP_CUR3_ALIGNED]], align 8
 // CHECK-ILP32F-NEXT:    store double [[TMP4]], ptr [[X]], align 8
-// CHECK-ILP32F-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32F-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-ILP32F-NEXT:    [[TMP5:%.*]] = load double, ptr [[V]], align 8
 // CHECK-ILP32F-NEXT:    [[TMP6:%.*]] = load double, ptr [[X]], align 8
 // CHECK-ILP32F-NEXT:    [[ADD:%.*]] = fadd double [[TMP5]], [[TMP6]]
@@ -215,7 +215,7 @@ double f_va_2(char *fmt, ...) {
 // CHECK-ILP32D-NEXT:    [[W:%.*]] = alloca i32, align 4
 // CHECK-ILP32D-NEXT:    [[X:%.*]] = alloca double, align 8
 // CHECK-ILP32D-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32D-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32D-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-ILP32D-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-ILP32D-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
 // CHECK-ILP32D-NEXT:    [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
@@ -235,7 +235,7 @@ double f_va_2(char *fmt, ...) {
 // CHECK-ILP32D-NEXT:    store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4
 // CHECK-ILP32D-NEXT:    [[TMP4:%.*]] = load double, ptr [[ARGP_CUR3_ALIGNED]], align 8
 // CHECK-ILP32D-NEXT:    store double [[TMP4]], ptr [[X]], align 8
-// CHECK-ILP32D-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32D-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-ILP32D-NEXT:    [[TMP5:%.*]] = load double, ptr [[V]], align 8
 // CHECK-ILP32D-NEXT:    [[TMP6:%.*]] = load double, ptr [[X]], align 8
 // CHECK-ILP32D-NEXT:    [[ADD:%.*]] = fadd double [[TMP5]], [[TMP6]]
@@ -250,7 +250,7 @@ double f_va_2(char *fmt, ...) {
 // CHECK-ILP32E-NEXT:    [[W:%.*]] = alloca i32, align 4
 // CHECK-ILP32E-NEXT:    [[X:%.*]] = alloca double, align 8
 // CHECK-ILP32E-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32E-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32E-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-ILP32E-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-ILP32E-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 8
 // CHECK-ILP32E-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
@@ -266,7 +266,7 @@ double f_va_2(char *fmt, ...) {
 // CHECK-ILP32E-NEXT:    store ptr [[ARGP_NEXT4]], ptr [[VA]], align 4
 // CHECK-ILP32E-NEXT:    [[TMP2:%.*]] = load double, ptr [[ARGP_CUR3]], align 4
 // CHECK-ILP32E-NEXT:    store double [[TMP2]], ptr [[X]], align 8
-// CHECK-ILP32E-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32E-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-ILP32E-NEXT:    [[TMP3:%.*]] = load double, ptr [[V]], align 8
 // CHECK-ILP32E-NEXT:    [[TMP4:%.*]] = load double, ptr [[X]], align 8
 // CHECK-ILP32E-NEXT:    [[ADD:%.*]] = fadd double [[TMP3]], [[TMP4]]
@@ -296,7 +296,7 @@ double f_va_3(char *fmt, ...) {
 // CHECK-ILP32F-NEXT:    [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4
 // CHECK-ILP32F-NEXT:    [[RET:%.*]] = alloca i32, align 4
 // CHECK-ILP32F-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32F-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32F-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-ILP32F-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-ILP32F-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
 // CHECK-ILP32F-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
@@ -321,7 +321,7 @@ double f_va_3(char *fmt, ...) {
 // CHECK-ILP32F-NEXT:    store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4
 // CHECK-ILP32F-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4
 // CHECK-ILP32F-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false)
-// CHECK-ILP32F-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32F-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-ILP32F-NEXT:    [[TMP4:%.*]] = load i32, ptr [[V]], align 4
 // CHECK-ILP32F-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128
 // CHECK-ILP32F-NEXT:    [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16
@@ -384,7 +384,7 @@ double f_va_3(char *fmt, ...) {
 // CHECK-ILP32D-NEXT:    [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4
 // CHECK-ILP32D-NEXT:    [[RET:%.*]] = alloca i32, align 4
 // CHECK-ILP32D-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32D-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32D-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-ILP32D-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-ILP32D-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
 // CHECK-ILP32D-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
@@ -409,7 +409,7 @@ double f_va_3(char *fmt, ...) {
 // CHECK-ILP32D-NEXT:    store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4
 // CHECK-ILP32D-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4
 // CHECK-ILP32D-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false)
-// CHECK-ILP32D-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32D-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-ILP32D-NEXT:    [[TMP4:%.*]] = load i32, ptr [[V]], align 4
 // CHECK-ILP32D-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128
 // CHECK-ILP32D-NEXT:    [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16
@@ -472,7 +472,7 @@ double f_va_3(char *fmt, ...) {
 // CHECK-ILP32E-NEXT:    [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 4
 // CHECK-ILP32E-NEXT:    [[RET:%.*]] = alloca i32, align 4
 // CHECK-ILP32E-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-ILP32E-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-ILP32E-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-ILP32E-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-ILP32E-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
 // CHECK-ILP32E-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
@@ -497,7 +497,7 @@ double f_va_3(char *fmt, ...) {
 // CHECK-ILP32E-NEXT:    store ptr [[ARGP_NEXT8]], ptr [[VA]], align 4
 // CHECK-ILP32E-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR7]], align 4
 // CHECK-ILP32E-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[LS]], ptr align 4 [[TMP3]], i32 16, i1 false)
-// CHECK-ILP32E-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-ILP32E-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-ILP32E-NEXT:    [[TMP4:%.*]] = load i32, ptr [[V]], align 4
 // CHECK-ILP32E-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP4]] to fp128
 // CHECK-ILP32E-NEXT:    [[TMP5:%.*]] = load fp128, ptr [[LD]], align 16
diff --git a/clang/test/CodeGen/RISCV/riscv64-vararg.c b/clang/test/CodeGen/RISCV/riscv64-vararg.c
index 634cde61320cb6..efdffa2687e624 100644
--- a/clang/test/CodeGen/RISCV/riscv64-vararg.c
+++ b/clang/test/CodeGen/RISCV/riscv64-vararg.c
@@ -135,13 +135,13 @@ void f_va_caller(void) {
 // CHECK-NEXT:    [[VA:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[V:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 8
-// CHECK-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8
 // CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8
 // CHECK-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 8
 // CHECK-NEXT:    store i32 [[TMP0]], ptr [[V]], align 4
-// CHECK-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[V]], align 4
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
@@ -166,7 +166,7 @@ int f_va_1(char *fmt, ...) {
 // CHECK-NEXT:    [[VA:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[V:%.*]] = alloca fp128, align 16
 // CHECK-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 8
-// CHECK-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 15
 // CHECK-NEXT:    [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[TMP0]], i64 -16)
@@ -174,7 +174,7 @@ int f_va_1(char *fmt, ...) {
 // CHECK-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load fp128, ptr [[ARGP_CUR_ALIGNED]], align 16
 // CHECK-NEXT:    store fp128 [[TMP1]], ptr [[V]], align 16
-// CHECK-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-NEXT:    [[TMP2:%.*]] = load fp128, ptr [[V]], align 16
 // CHECK-NEXT:    ret fp128 [[TMP2]]
 //
@@ -199,7 +199,7 @@ long double f_va_2(char *fmt, ...) {
 // CHECK-NEXT:    [[W:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[X:%.*]] = alloca fp128, align 16
 // CHECK-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 8
-// CHECK-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 15
 // CHECK-NEXT:    [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[TMP0]], i64 -16)
@@ -219,7 +219,7 @@ long double f_va_2(char *fmt, ...) {
 // CHECK-NEXT:    store ptr [[ARGP_NEXT4]], ptr [[VA]], align 8
 // CHECK-NEXT:    [[TMP4:%.*]] = load fp128, ptr [[ARGP_CUR3_ALIGNED]], align 16
 // CHECK-NEXT:    store fp128 [[TMP4]], ptr [[X]], align 16
-// CHECK-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-NEXT:    [[TMP5:%.*]] = load fp128, ptr [[V]], align 16
 // CHECK-NEXT:    [[TMP6:%.*]] = load fp128, ptr [[X]], align 16
 // CHECK-NEXT:    [[ADD:%.*]] = fadd fp128 [[TMP5]], [[TMP6]]
@@ -248,7 +248,7 @@ long double f_va_3(char *fmt, ...) {
 // CHECK-NEXT:    [[LS:%.*]] = alloca [[STRUCT_LARGE:%.*]], align 8
 // CHECK-NEXT:    [[RET:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 8
-// CHECK-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8
 // CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8
 // CHECK-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 8
@@ -267,7 +267,7 @@ long double f_va_3(char *fmt, ...) {
 // CHECK-NEXT:    store ptr [[ARGP_NEXT6]], ptr [[VA]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR5]], align 8
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[LS]], ptr align 8 [[TMP1]], i64 32, i1 false)
-// CHECK-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_TINY]], ptr [[TS]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[A]], align 2
 // CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[TMP2]] to i64
diff --git a/clang/test/CodeGen/WebAssembly/wasm-varargs.c b/clang/test/CodeGen/WebAssembly/wasm-varargs.c
index c475de19ae4487..e794857304e1c9 100644
--- a/clang/test/CodeGen/WebAssembly/wasm-varargs.c
+++ b/clang/test/CodeGen/WebAssembly/wasm-varargs.c
@@ -10,13 +10,13 @@
 // CHECK-NEXT:    [[VA:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[V:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
 // CHECK-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
 // CHECK-NEXT:    store i32 [[TMP0]], ptr [[V]], align 4
-// CHECK-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[V]], align 4
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
@@ -38,7 +38,7 @@ int test_i32(char *fmt, ...) {
 // CHECK-NEXT:    [[VA:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[V:%.*]] = alloca i64, align 8
 // CHECK-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7
 // CHECK-NEXT:    [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8)
@@ -46,7 +46,7 @@ int test_i32(char *fmt, ...) {
 // CHECK-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[ARGP_CUR_ALIGNED]], align 8
 // CHECK-NEXT:    store i64 [[TMP1]], ptr [[V]], align 8
-// CHECK-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[V]], align 8
 // CHECK-NEXT:    ret i64 [[TMP2]]
 //
@@ -73,13 +73,13 @@ struct S {
 // CHECK-NEXT:    [[FMT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[VA:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4
 // CHECK-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_RESULT]], ptr align 4 [[TMP0]], i32 12, i1 false)
-// CHECK-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-NEXT:    ret void
 //
 struct S test_struct(char *fmt, ...) {
@@ -102,7 +102,7 @@ struct Z {};
 // CHECK-NEXT:    [[VA:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[U:%.*]] = alloca [[STRUCT_Z:%.*]], align 1
 // CHECK-NEXT:    store ptr [[FMT]], ptr [[FMT_ADDR]], align 4
-// CHECK-NEXT:    call void @llvm.va_start(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
 // CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 4
 // CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 0
 // CHECK-NEXT:    store ptr [[ARGP_NEXT]], ptr [[VA]], align 4
@@ -112,7 +112,7 @@ struct Z {};
 // CHECK-NEXT:    store ptr [[ARGP_NEXT2]], ptr [[VA]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR1]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_RESULT]], ptr align 4 [[TMP0]], i32 12, i1 false)
-// CHECK-NEXT:    call void @llvm.va_end(ptr [[VA]])
+// CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VA]])
 // CHECK-NEXT:    ret void
 //
 struct S test_empty_struct(char *fmt, ...) {
diff --git a/clang/test/CodeGen/X86/va-arg-sse.c b/clang/test/CodeGen/X86/va-arg-sse.c
index e040b0e5790bd1..b7d00dad1453d3 100644
--- a/clang/test/CodeGen/X86/va-arg-sse.c
+++ b/clang/test/CodeGen/X86/va-arg-sse.c
@@ -21,7 +21,7 @@ struct S a[5];
 // CHECK-NEXT:    store i32 0, ptr [[J]], align 4
 // CHECK-NEXT:    store i32 0, ptr [[K]], align 4
 // CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0
-// CHECK-NEXT:    call void @llvm.va_start(ptr [[ARRAYDECAY]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
 // CHECK-NEXT:    store ptr getelementptr inbounds ([5 x %struct.S], ptr @a, i64 0, i64 2), ptr [[P]], align 8
 // CHECK-NEXT:    [[ARRAYDECAY2:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0
 // CHECK-NEXT:    [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY2]], i32 0, i32 1
@@ -52,7 +52,7 @@ struct S a[5];
 // CHECK-NEXT:    [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARG]], ptr align 4 [[VAARG_ADDR]], i64 12, i1 false)
 // CHECK-NEXT:    [[ARRAYDECAY3:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0
-// CHECK-NEXT:    call void @llvm.va_end(ptr [[ARRAYDECAY3]])
+// CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[ARRAYDECAY3]])
 // CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[P]], align 8
 // CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne ptr [[TMP15]], null
 // CHECK-NEXT:    br i1 [[TOBOOL]], label [[LAND_LHS_TRUE:%.*]], label [[IF_END:%.*]]
diff --git a/clang/test/CodeGen/aarch64-ABI-align-packed.c b/clang/test/CodeGen/aarch64-ABI-align-packed.c
index 2b029f64589567..13c68fe54b849f 100644
--- a/clang/test/CodeGen/aarch64-ABI-align-packed.c
+++ b/clang/test/CodeGen/aarch64-ABI-align-packed.c
@@ -73,7 +73,7 @@ __attribute__((noinline)) void named_arg_non_packed_struct(double d0, double d1,
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6:[0-9]+]]
-// CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[VL]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[VL]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
 // CHECK-NEXT:    ret void
 void variadic_non_packed_struct(double d0, double d1, double d2, double d3,
@@ -128,7 +128,7 @@ __attribute__((noinline)) void named_arg_packed_struct(double d0, double d1, dou
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
-// CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[VL]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[VL]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
 // CHECK-NEXT:    ret void
 void variadic_packed_struct(double d0, double d1, double d2, double d3,
@@ -183,7 +183,7 @@ __attribute__((noinline)) void named_arg_packed_member(double d0, double d1, dou
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
-// CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[VL]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[VL]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
 // CHECK-NEXT:    ret void
 void variadic_packed_member(double d0, double d1, double d2, double d3,
@@ -238,7 +238,7 @@ __attribute__((noinline)) void named_arg_aligned_struct_8(double d0, double d1,
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
-// CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[VL]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[VL]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
 // CHECK-NEXT:    ret void
 void variadic_aligned_struct_8(double d0, double d1, double d2, double d3,
@@ -293,7 +293,7 @@ __attribute__((noinline)) void named_arg_aligned_member_8(double d0, double d1,
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
-// CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[VL]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[VL]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
 // CHECK-NEXT:    ret void
 void variadic_aligned_member_8(double d0, double d1, double d2, double d3,
@@ -348,7 +348,7 @@ __attribute__((noinline)) void named_arg_pragma_packed_struct_8(double d0, doubl
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
-// CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[VL]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[VL]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
 // CHECK-NEXT:    ret void
 void variadic_pragma_packed_struct_8(double d0, double d1, double d2, double d3,
@@ -403,7 +403,7 @@ __attribute__((noinline)) void named_arg_pragma_packed_struct_4(double d0, doubl
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VL:%.*]] = alloca [[STRUCT___VA_LIST:%.*]], align 8
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
-// CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[VL]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[VL]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VL]]) #[[ATTR6]]
 // CHECK-NEXT:    ret void
 void variadic_pragma_packed_struct_4(double d0, double d1, double d2, double d3,
diff --git a/clang/test/CodeGen/aarch64-varargs.c b/clang/test/CodeGen/aarch64-varargs.c
index 44b87029e7b3d3..ee4e88eda4ef44 100644
--- a/clang/test/CodeGen/aarch64-varargs.c
+++ b/clang/test/CodeGen/aarch64-varargs.c
@@ -837,7 +837,7 @@ void check_start(int n, ...) {
   va_list the_list;
   va_start(the_list, n);
 // CHECK: [[THE_LIST:%[a-z_0-9]+]] = alloca %struct.__va_list
-// CHECK: call void @llvm.va_start(ptr [[THE_LIST]])
+// CHECK: call void @llvm.va_start.p0(ptr [[THE_LIST]])
 }
 
 typedef struct {} empty;
diff --git a/clang/test/CodeGen/arm-varargs.c b/clang/test/CodeGen/arm-varargs.c
index f754c7f52e5904..ab4ac46924e605 100644
--- a/clang/test/CodeGen/arm-varargs.c
+++ b/clang/test/CodeGen/arm-varargs.c
@@ -264,5 +264,5 @@ void check_start(int n, ...) {
   va_list the_list;
   va_start(the_list, n);
 // CHECK: [[THE_LIST:%[a-z0-9._]+]] = alloca %struct.__va_list
-// CHECK: call void @llvm.va_start(ptr [[THE_LIST]])
+// CHECK: call void @llvm.va_start.p0(ptr [[THE_LIST]])
 }
diff --git a/clang/test/CodeGen/hexagon-linux-vararg.c b/clang/test/CodeGen/hexagon-linux-vararg.c
index 033e72ab449d31..84945e872d28bc 100644
--- a/clang/test/CodeGen/hexagon-linux-vararg.c
+++ b/clang/test/CodeGen/hexagon-linux-vararg.c
@@ -9,7 +9,7 @@ struct AAA {
   int d;
 };
 
-// CHECK:   call void @llvm.va_start(ptr %arraydecay)
+// CHECK:   call void @llvm.va_start.p0(ptr %arraydecay)
 // CHECK:   %arraydecay1 = getelementptr inbounds [1 x %struct.__va_list_tag],
 // ptr %ap, i32 0, i32 0
 // CHECK:   br label %vaarg.maybe_reg
diff --git a/clang/test/CodeGen/mips-varargs.c b/clang/test/CodeGen/mips-varargs.c
index 052aedd1cd1e2c..029f000c121a5b 100644
--- a/clang/test/CodeGen/mips-varargs.c
+++ b/clang/test/CodeGen/mips-varargs.c
@@ -29,7 +29,7 @@ int test_i32(char *fmt, ...) {
 // ALL:   [[V:%.*]] = alloca i32, align 4
 // NEW:   [[PROMOTION_TEMP:%.*]] = alloca i32, align 4
 //
-// ALL:   call void @llvm.va_start(ptr %va)
+// ALL:   call void @llvm.va_start.p0(ptr %va)
 // ALL:   [[AP_CUR:%.+]] = load ptr, ptr %va, align [[$PTRALIGN]]
 // O32:   [[AP_NEXT:%.+]] = getelementptr inbounds i8, ptr [[AP_CUR]], [[$INTPTR_T:i32]] [[$CHUNKSIZE:4]]
 // NEW:   [[AP_NEXT:%.+]] = getelementptr inbounds i8, ptr [[AP_CUR]], [[$INTPTR_T:i32|i64]] [[$CHUNKSIZE:8]]
@@ -45,7 +45,7 @@ int test_i32(char *fmt, ...) {
 // NEW:   [[ARG:%.+]] = load i32, ptr [[PROMOTION_TEMP]], align 4
 // ALL:   store i32 [[ARG]], ptr [[V]], align 4
 //
-// ALL:   call void @llvm.va_end(ptr %va)
+// ALL:   call void @llvm.va_end.p0(ptr %va)
 // ALL: }
 
 long long test_i64(char *fmt, ...) {
@@ -61,7 +61,7 @@ long long test_i64(char *fmt, ...) {
 // ALL-LABEL: define{{.*}} i64 @test_i64(ptr{{.*}} %fmt, ...)
 //
 // ALL:   %va = alloca ptr, align [[$PTRALIGN]]
-// ALL:   call void @llvm.va_start(ptr %va)
+// ALL:   call void @llvm.va_start.p0(ptr %va)
 // ALL:   [[AP_CUR:%.+]] = load ptr, ptr %va, align [[$PTRALIGN]]
 //
 // i64 is 8-byte aligned, while this is within O32's stack alignment there's no
@@ -74,7 +74,7 @@ long long test_i64(char *fmt, ...) {
 //
 // ALL:   [[ARG:%.+]] = load i64, ptr [[AP_CUR]], align 8
 //
-// ALL:   call void @llvm.va_end(ptr %va)
+// ALL:   call void @llvm.va_end.p0(ptr %va)
 // ALL: }
 
 char *test_ptr(char *fmt, ...) {
@@ -92,7 +92,7 @@ char *test_ptr(char *fmt, ...) {
 // ALL:   %va = alloca ptr, align [[$PTRALIGN]]
 // ALL:   [[V:%.*]] = alloca ptr, align [[$PTRALIGN]]
 // N32:   [[AP_CAST:%.+]] = alloca ptr, align 4
-// ALL:   call void @llvm.va_start(ptr %va)
+// ALL:   call void @llvm.va_start.p0(ptr %va)
 // ALL:   [[AP_CUR:%.+]] = load ptr, ptr %va, align [[$PTRALIGN]]
 // ALL:   [[AP_NEXT:%.+]] = getelementptr inbounds i8, ptr [[AP_CUR]], [[$INTPTR_T]] [[$CHUNKSIZE]]
 // ALL:   store ptr [[AP_NEXT]], ptr %va, align [[$PTRALIGN]]
@@ -109,7 +109,7 @@ char *test_ptr(char *fmt, ...) {
 // N64:   [[ARG:%.+]] = load ptr, ptr [[AP_CUR]], align [[$PTRALIGN]]
 // ALL:   store ptr [[ARG]], ptr [[V]], align [[$PTRALIGN]]
 //
-// ALL:   call void @llvm.va_end(ptr %va)
+// ALL:   call void @llvm.va_end.p0(ptr %va)
 // ALL: }
 
 int test_v4i32(char *fmt, ...) {
@@ -128,7 +128,7 @@ int test_v4i32(char *fmt, ...) {
 //
 // ALL:   %va = alloca ptr, align [[$PTRALIGN]]
 // ALL:   [[V:%.+]] = alloca <4 x i32>, align 16
-// ALL:   call void @llvm.va_start(ptr %va)
+// ALL:   call void @llvm.va_start.p0(ptr %va)
 // ALL:   [[AP_CUR:%.+]] = load ptr, ptr %va, align [[$PTRALIGN]]
 //
 // Vectors are 16-byte aligned, however the O32 ABI has a maximum alignment of
@@ -152,7 +152,7 @@ int test_v4i32(char *fmt, ...) {
 // N32:   [[ARG:%.+]] = load <4 x i32>, ptr [[AP_CUR]], align 16
 // ALL:   store <4 x i32> [[ARG]], ptr [[V]], align 16
 //
-// ALL:   call void @llvm.va_end(ptr %va)
+// ALL:   call void @llvm.va_end.p0(ptr %va)
 // ALL:   [[VECEXT:%.+]] = extractelement <4 x i32> {{.*}}, i32 0
 // ALL:   ret i32 [[VECEXT]]
 // ALL: }
diff --git a/clang/test/CodeGen/pr53127.cpp b/clang/test/CodeGen/pr53127.cpp
index 97fe1291352d3c..501222f4582d23 100644
--- a/clang/test/CodeGen/pr53127.cpp
+++ b/clang/test/CodeGen/pr53127.cpp
@@ -34,7 +34,7 @@ void operator delete(void*);
 // CHECK-NEXT:    br i1 [[CALL6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]]
 // CHECK:       cond.true7:
 // CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[L]], i64 0, i64 0
-// CHECK-NEXT:    call void @llvm.va_start(ptr [[ARRAYDECAY]])
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
 // CHECK-NEXT:    br label [[COND_END9:%.*]]
 // CHECK:       cond.false8:
 // CHECK-NEXT:    br label [[COND_END9]]
@@ -44,7 +44,7 @@ void operator delete(void*);
 // CHECK:       cond.true11:
 // CHECK-NEXT:    [[ARRAYDECAY12:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[L]], i64 0, i64 0
 // CHECK-NEXT:    [[ARRAYDECAY13:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[L2]], i64 0, i64 0
-// CHECK-NEXT:    call void @llvm.va_copy(ptr [[ARRAYDECAY12]], ptr [[ARRAYDECAY13]])
+// CHECK-NEXT:    call void @llvm.va_copy.p0.p0(ptr [[ARRAYDECAY12]], ptr [[ARRAYDECAY13]])
 // CHECK-NEXT:    br label [[COND_END15:%.*]]
 // CHECK:       cond.false14:
 // CHECK-NEXT:    br label [[COND_END15]]
diff --git a/clang/test/CodeGen/varargs-with-nonzero-default-address-space.c b/clang/test/CodeGen/varargs-with-nonzero-default-address-space.c
new file mode 100644
index 00000000000000..fc9c711be27006
--- /dev/null
+++ b/clang/test/CodeGen/varargs-with-nonzero-default-address-space.c
@@ -0,0 +1,22 @@
+// REQUIRES: spirv-registered-target
+// RUN: %clang_cc1 -triple spirv64-unknown-unknown -fcuda-is-device -emit-llvm -o - %s | FileCheck %s
+struct x {
+  double b;
+  long a;
+};
+
+void testva(int n, ...) {
+  __builtin_va_list ap;
+  __builtin_va_start(ap, n);
+  struct x t = __builtin_va_arg(ap, struct x);
+  __builtin_va_list ap2;
+  __builtin_va_copy(ap2, ap);
+  int v = __builtin_va_arg(ap2, int);
+  __builtin_va_end(ap2);
+  __builtin_va_end(ap);
+}
+
+// CHECK:  call void @llvm.va_start.p4(ptr addrspace(4) %ap{{.*}})
+// CHECK:  call void @llvm.va_copy.p4.p4(ptr addrspace(4) %ap2{{.*}}, ptr addrspace(4) {{.*}})
+// CHECK:  call void @llvm.va_end.p4(ptr addrspace(4) %ap2{{.*}})
+// CHECK-NEXT:  call void @llvm.va_end.p4(ptr addrspace(4) %ap{{.*}})
\ No newline at end of file
diff --git a/clang/test/CodeGen/xcore-abi.c b/clang/test/CodeGen/xcore-abi.c
index 4dd0f221533b94..bb8d2fec46bdb2 100644
--- a/clang/test/CodeGen/xcore-abi.c
+++ b/clang/test/CodeGen/xcore-abi.c
@@ -28,7 +28,7 @@ void testva (int n, ...) {
   // CHECK: [[AP:%[a-z0-9]+]] = alloca ptr, align 4
   // CHECK: [[V5:%[a-z0-9]+]] = alloca %struct.x, align 4
   // CHECK: [[TMP:%[a-z0-9]+]] = alloca [4 x i32], align 4
-  // CHECK: call void @llvm.va_start(ptr [[AP]])
+  // CHECK: call void @llvm.va_start.p0(ptr [[AP]])
 
   char* v1 = va_arg (ap, char*);
   f(v1);
diff --git a/clang/test/CodeGenCXX/ext-int.cpp b/clang/test/CodeGenCXX/ext-int.cpp
index 5a4270aef28542..a1d17c840ee460 100644
--- a/clang/test/CodeGenCXX/ext-int.cpp
+++ b/clang/test/CodeGenCXX/ext-int.cpp
@@ -159,9 +159,9 @@ void TakesVarargs(int i, ...) {
   // WIN: %[[ARGS:.+]] = alloca ptr
   __builtin_va_start(args, i);
   // LIN64: %[[STARTAD:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %[[ARGS]]
-  // LIN64: call void @llvm.va_start(ptr %[[STARTAD]])
-  // LIN32: call void @llvm.va_start(ptr %[[ARGS]])
-  // WIN: call void @llvm.va_start(ptr %[[ARGS]])
+  // LIN64: call void @llvm.va_start.p0(ptr %[[STARTAD]])
+  // LIN32: call void @llvm.va_start.p0(ptr %[[ARGS]])
+  // WIN: call void @llvm.va_start.p0(ptr %[[ARGS]])
 
   _BitInt(92) A = __builtin_va_arg(args, _BitInt(92));
   // LIN64: %[[AD1:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %[[ARGS]]
@@ -302,9 +302,9 @@ void TakesVarargs(int i, ...) {
 
   __builtin_va_end(args);
   // LIN64: %[[ENDAD:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %[[ARGS]]
-  // LIN64: call void @llvm.va_end(ptr %[[ENDAD]])
-  // LIN32: call void @llvm.va_end(ptr %[[ARGS]])
-  // WIN: call void @llvm.va_end(ptr %[[ARGS]])
+  // LIN64: call void @llvm.va_end.p0(ptr %[[ENDAD]])
+  // LIN32: call void @llvm.va_end.p0(ptr %[[ARGS]])
+  // WIN: call void @llvm.va_end.p0(ptr %[[ARGS]])
 }
 void typeid_tests() {
   // LIN: define{{.*}} void @_Z12typeid_testsv()
diff --git a/clang/test/CodeGenCXX/ibm128-declarations.cpp b/clang/test/CodeGenCXX/ibm128-declarations.cpp
index 5ee4f354d37957..e0187e20cde423 100644
--- a/clang/test/CodeGenCXX/ibm128-declarations.cpp
+++ b/clang/test/CodeGenCXX/ibm128-declarations.cpp
@@ -107,13 +107,13 @@ int main(void) {
 // CHECK: define dso_local noundef ppc_fp128 @_Z10func_vaargiz(i32 noundef signext %n, ...)
 // CHECK: entry:
 // CHECK:   store i32 %n, ptr %n.addr, align 4
-// CHECK:   call void @llvm.va_start(ptr %ap)
+// CHECK:   call void @llvm.va_start.p0(ptr %ap)
 // CHECK:   %argp.cur = load ptr, ptr %ap, align 8
 // CHECK:   %argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 16
 // CHECK:   store ptr %argp.next, ptr %ap, align 8
 // CHECK:   %0 = load ppc_fp128, ptr %argp.cur, align 8
 // CHECK:   store ppc_fp128 %0, ptr %r, align 16
-// CHECK:   call void @llvm.va_end(ptr %ap)
+// CHECK:   call void @llvm.va_end.p0(ptr %ap)
 // CHECK:   %1 = load ppc_fp128, ptr %r, align 16
 // CHECK:   ret ppc_fp128 %1
 // CHECK: }
diff --git a/clang/test/Modules/codegen.test b/clang/test/Modules/codegen.test
index 77602056defd4e..0af630a7548056 100644
--- a/clang/test/Modules/codegen.test
+++ b/clang/test/Modules/codegen.test
@@ -26,7 +26,7 @@ USE: $_Z4instIiEvv = comdat any
 USE: $_Z10always_inlv = comdat any
 FOO: $_ZN13implicit_dtorD2Ev = comdat any
 FOO: define weak_odr void @_Z2f1PKcz(ptr noundef %fmt, ...) #{{[0-9]+}} comdat
-FOO:   call void @llvm.va_start(ptr %{{[a-zA-Z0-9]*}})
+FOO:   call void @llvm.va_start.p0(ptr %{{[a-zA-Z0-9]*}})
 
 Test that implicit special members are emitted into the FOO module if they're
 ODR used there, otherwise emit them linkonce_odr as usual in the use.
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 144298fd7c0162..7fd80225e7e3d6 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -700,10 +700,13 @@ class MSBuiltin<string name> {
 //===--------------- Variable Argument Handling Intrinsics ----------------===//
 //
 
-def int_vastart : DefaultAttrsIntrinsic<[], [llvm_ptr_ty], [], "llvm.va_start">;
-def int_vacopy  : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [],
-                            "llvm.va_copy">;
-def int_vaend   : DefaultAttrsIntrinsic<[], [llvm_ptr_ty], [], "llvm.va_end">;
+def int_vastart : DefaultAttrsIntrinsic<[],
+                                        [llvm_anyptr_ty], [], "llvm.va_start">;
+def int_vacopy  : DefaultAttrsIntrinsic<[],
+                                        [llvm_anyptr_ty, llvm_anyptr_ty], [],
+                                        "llvm.va_copy">;
+def int_vaend   : DefaultAttrsIntrinsic<[],
+                                        [llvm_anyptr_ty], [], "llvm.va_end">;
 
 //===------------------- Garbage Collection Intrinsics --------------------===//
 //
@@ -1713,7 +1716,7 @@ def int_coro_subfn_addr : DefaultAttrsIntrinsic<
 
 ///===-------------------------- Other Intrinsics --------------------------===//
 //
-// TODO: We should introduce a new memory kind fo traps (and other side effects 
+// TODO: We should introduce a new memory kind fo traps (and other side effects
 //       we only model to keep things alive).
 def int_trap : Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrInaccessibleMemOnly,
                IntrWriteMem]>, ClangBuiltin<"__builtin_trap">;
diff --git a/llvm/test/Bitcode/compatibility-3.6.ll b/llvm/test/Bitcode/compatibility-3.6.ll
index b1f4abf7b8c554..905be7174f54e0 100644
--- a/llvm/test/Bitcode/compatibility-3.6.ll
+++ b/llvm/test/Bitcode/compatibility-3.6.ll
@@ -1061,16 +1061,16 @@ define void @instructions.va_arg(i8* %v, ...) {
   %ap2 = bitcast i8** %ap to i8*
 
   call void @llvm.va_start(i8* %ap2)
-  ; CHECK: call void @llvm.va_start(ptr %ap2)
+  ; CHECK: call void @llvm.va_start.p0(ptr %ap2)
 
   va_arg i8* %ap2, i32
   ; CHECK: va_arg ptr %ap2, i32
 
   call void @llvm.va_copy(i8* %v, i8* %ap2)
-  ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2)
+  ; CHECK: call void @llvm.va_copy.p0.p0(ptr %v, ptr %ap2)
 
   call void @llvm.va_end(i8* %ap2)
-  ; CHECK: call void @llvm.va_end(ptr %ap2)
+  ; CHECK: call void @llvm.va_end.p0(ptr %ap2)
 
   ret void
 }
@@ -1178,11 +1178,11 @@ define void @intrinsics.codegen() {
 ; CHECK: attributes #27 = { uwtable }
 ; CHECK: attributes #28 = { "cpu"="cortex-a8" }
 ; CHECK: attributes #29 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #30 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #31 = { nounwind memory(argmem: read) }
-; CHECK: attributes #32 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #33 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #34 = { nocallback nounwind }
+; CHECK: attributes #30 = { nounwind memory(argmem: read) }
+; CHECK: attributes #31 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #32 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #33 = { nocallback nounwind }
+; CHECK: attributes #34 = { nocallback nofree nosync nounwind willreturn }
 ; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
 ; CHECK: attributes #36 = { builtin }
 
diff --git a/llvm/test/Bitcode/compatibility-3.7.ll b/llvm/test/Bitcode/compatibility-3.7.ll
index 91e55f6eda59f9..b2bd1b3be9e687 100644
--- a/llvm/test/Bitcode/compatibility-3.7.ll
+++ b/llvm/test/Bitcode/compatibility-3.7.ll
@@ -1092,16 +1092,16 @@ define void @instructions.va_arg(i8* %v, ...) {
   %ap2 = bitcast i8** %ap to i8*
 
   call void @llvm.va_start(i8* %ap2)
-  ; CHECK: call void @llvm.va_start(ptr %ap2)
+  ; CHECK: call void @llvm.va_start.p0(ptr %ap2)
 
   va_arg i8* %ap2, i32
   ; CHECK: va_arg ptr %ap2, i32
 
   call void @llvm.va_copy(i8* %v, i8* %ap2)
-  ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2)
+  ; CHECK: call void @llvm.va_copy.p0.p0(ptr %v, ptr %ap2)
 
   call void @llvm.va_end(i8* %ap2)
-  ; CHECK: call void @llvm.va_end(ptr %ap2)
+  ; CHECK: call void @llvm.va_end.p0(ptr %ap2)
 
   ret void
 }
@@ -1241,11 +1241,11 @@ define void @misc.metadata() {
 ; CHECK: attributes #30 = { uwtable }
 ; CHECK: attributes #31 = { "cpu"="cortex-a8" }
 ; CHECK: attributes #32 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #33 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #34 = { nounwind memory(argmem: read) }
-; CHECK: attributes #35 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #37 = { nocallback nounwind }
+; CHECK: attributes #33 = { nounwind memory(argmem: read) }
+; CHECK: attributes #34 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #36 = { nocallback nounwind }
+; CHECK: attributes #37 = { nocallback nofree nosync nounwind willreturn }
 ; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
 ; CHECK: attributes #39 = { builtin }
 
diff --git a/llvm/test/Bitcode/compatibility-3.8.ll b/llvm/test/Bitcode/compatibility-3.8.ll
index aa4d8b14968c6e..d9176fbb9c5928 100644
--- a/llvm/test/Bitcode/compatibility-3.8.ll
+++ b/llvm/test/Bitcode/compatibility-3.8.ll
@@ -1247,16 +1247,16 @@ define void @instructions.va_arg(i8* %v, ...) {
   %ap2 = bitcast i8** %ap to i8*
 
   call void @llvm.va_start(i8* %ap2)
-  ; CHECK: call void @llvm.va_start(ptr %ap2)
+  ; CHECK: call void @llvm.va_start.p0(ptr %ap2)
 
   va_arg i8* %ap2, i32
   ; CHECK: va_arg ptr %ap2, i32
 
   call void @llvm.va_copy(i8* %v, i8* %ap2)
-  ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2)
+  ; CHECK: call void @llvm.va_copy.p0.p0(ptr %v, ptr %ap2)
 
   call void @llvm.va_end(i8* %ap2)
-  ; CHECK: call void @llvm.va_end(ptr %ap2)
+  ; CHECK: call void @llvm.va_end.p0(ptr %ap2)
 
   ret void
 }
@@ -1551,11 +1551,11 @@ normal:
 ; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) }
 ; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
 ; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #37 = { nounwind memory(argmem: read) }
-; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #40 = { nocallback nounwind }
+; CHECK: attributes #36 = { nounwind memory(argmem: read) }
+; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #39 = { nocallback nounwind }
+; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn }
 ; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
 ; CHECK: attributes #42 = { builtin }
 
diff --git a/llvm/test/Bitcode/compatibility-3.9.ll b/llvm/test/Bitcode/compatibility-3.9.ll
index e3c84f6e600714..2c678186741595 100644
--- a/llvm/test/Bitcode/compatibility-3.9.ll
+++ b/llvm/test/Bitcode/compatibility-3.9.ll
@@ -1318,16 +1318,16 @@ define void @instructions.va_arg(i8* %v, ...) {
   %ap2 = bitcast i8** %ap to i8*
 
   call void @llvm.va_start(i8* %ap2)
-  ; CHECK: call void @llvm.va_start(ptr %ap2)
+  ; CHECK: call void @llvm.va_start.p0(ptr %ap2)
 
   va_arg i8* %ap2, i32
   ; CHECK: va_arg ptr %ap2, i32
 
   call void @llvm.va_copy(i8* %v, i8* %ap2)
-  ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2)
+  ; CHECK: call void @llvm.va_copy.p0.p0(ptr %v, ptr %ap2)
 
   call void @llvm.va_end(i8* %ap2)
-  ; CHECK: call void @llvm.va_end(ptr %ap2)
+  ; CHECK: call void @llvm.va_end.p0(ptr %ap2)
 
   ret void
 }
@@ -1624,11 +1624,11 @@ declare void @f.writeonly() writeonly
 ; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) }
 ; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
 ; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #37 = { nounwind memory(argmem: read) }
-; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #40 = { nocallback nounwind }
+; CHECK: attributes #36 = { nounwind memory(argmem: read) }
+; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #39 = { nocallback nounwind }
+; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn }
 ; CHECK: attributes #41 = { memory(write) }
 ; CHECK: attributes #42 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
 ; CHECK: attributes #43 = { builtin }
diff --git a/llvm/test/Bitcode/compatibility-4.0.ll b/llvm/test/Bitcode/compatibility-4.0.ll
index da5ea0e19639c1..a94b8d8d305381 100644
--- a/llvm/test/Bitcode/compatibility-4.0.ll
+++ b/llvm/test/Bitcode/compatibility-4.0.ll
@@ -1318,16 +1318,16 @@ define void @instructions.va_arg(i8* %v, ...) {
   %ap2 = bitcast i8** %ap to i8*
 
   call void @llvm.va_start(i8* %ap2)
-  ; CHECK: call void @llvm.va_start(ptr %ap2)
+  ; CHECK: call void @llvm.va_start.p0(ptr %ap2)
 
   va_arg i8* %ap2, i32
   ; CHECK: va_arg ptr %ap2, i32
 
   call void @llvm.va_copy(i8* %v, i8* %ap2)
-  ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2)
+  ; CHECK: call void @llvm.va_copy.p0.p0(ptr %v, ptr %ap2)
 
   call void @llvm.va_end(i8* %ap2)
-  ; CHECK: call void @llvm.va_end(ptr %ap2)
+  ; CHECK: call void @llvm.va_end.p0(ptr %ap2)
 
   ret void
 }
@@ -1649,11 +1649,11 @@ define i8** @constexpr() {
 ; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) }
 ; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
 ; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #37 = { nounwind memory(argmem: read) }
-; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #40 = { nocallback nounwind }
+; CHECK: attributes #36 = { nounwind memory(argmem: read) }
+; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #39 = { nocallback nounwind }
+; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn }
 ; CHECK: attributes #41 = { memory(write) }
 ; CHECK: attributes #42 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
 ; CHECK: attributes #43 = { builtin }
diff --git a/llvm/test/Bitcode/compatibility-5.0.ll b/llvm/test/Bitcode/compatibility-5.0.ll
index 7a39ae6256b805..c411dd88c59b3b 100644
--- a/llvm/test/Bitcode/compatibility-5.0.ll
+++ b/llvm/test/Bitcode/compatibility-5.0.ll
@@ -1330,16 +1330,16 @@ define void @instructions.va_arg(i8* %v, ...) {
   %ap2 = bitcast i8** %ap to i8*
 
   call void @llvm.va_start(i8* %ap2)
-  ; CHECK: call void @llvm.va_start(ptr %ap2)
+  ; CHECK: call void @llvm.va_start.p0(ptr %ap2)
 
   va_arg i8* %ap2, i32
   ; CHECK: va_arg ptr %ap2, i32
 
   call void @llvm.va_copy(i8* %v, i8* %ap2)
-  ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2)
+  ; CHECK: call void @llvm.va_copy.p0.p0(ptr %v, ptr %ap2)
 
   call void @llvm.va_end(i8* %ap2)
-  ; CHECK: call void @llvm.va_end(ptr %ap2)
+  ; CHECK: call void @llvm.va_end.p0(ptr %ap2)
 
   ret void
 }
@@ -1664,11 +1664,11 @@ define i8** @constexpr() {
 ; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) }
 ; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
 ; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #37 = { nounwind memory(argmem: read) }
-; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #40 = { nocallback nounwind }
+; CHECK: attributes #36 = { nounwind memory(argmem: read) }
+; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #39 = { nocallback nounwind }
+; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn }
 ; CHECK: attributes #41 = { memory(write) }
 ; CHECK: attributes #42 = { speculatable }
 ; CHECK: attributes #43 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
diff --git a/llvm/test/Bitcode/compatibility-6.0.ll b/llvm/test/Bitcode/compatibility-6.0.ll
index 4cb1f3bd123cb1..cf89c061072619 100644
--- a/llvm/test/Bitcode/compatibility-6.0.ll
+++ b/llvm/test/Bitcode/compatibility-6.0.ll
@@ -1340,16 +1340,16 @@ define void @instructions.va_arg(i8* %v, ...) {
   %ap2 = bitcast i8** %ap to i8*
 
   call void @llvm.va_start(i8* %ap2)
-  ; CHECK: call void @llvm.va_start(ptr %ap2)
+  ; CHECK: call void @llvm.va_start.p0(ptr %ap2)
 
   va_arg i8* %ap2, i32
   ; CHECK: va_arg ptr %ap2, i32
 
   call void @llvm.va_copy(i8* %v, i8* %ap2)
-  ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap2)
+  ; CHECK: call void @llvm.va_copy.p0.p0(ptr %v, ptr %ap2)
 
   call void @llvm.va_end(i8* %ap2)
-  ; CHECK: call void @llvm.va_end(ptr %ap2)
+  ; CHECK: call void @llvm.va_end.p0(ptr %ap2)
 
   ret void
 }
@@ -1674,11 +1674,11 @@ define i8** @constexpr() {
 ; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) }
 ; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
 ; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #37 = { nounwind memory(argmem: read) }
-; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #40 = { nocallback nounwind }
+; CHECK: attributes #36 = { nounwind memory(argmem: read) }
+; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #39 = { nocallback nounwind }
+; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn }
 ; CHECK: attributes #41 = { memory(write) }
 ; CHECK: attributes #42 = { speculatable }
 ; CHECK: attributes #43 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index ce6a6571ec144c..4030c3280c77e1 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -1648,16 +1648,16 @@ define void @instructions.va_arg(ptr %v, ...) {
   %ap = alloca ptr
 
   call void @llvm.va_start(ptr %ap)
-  ; CHECK: call void @llvm.va_start(ptr %ap)
+  ; CHECK: call void @llvm.va_start.p0(ptr %ap)
 
   va_arg ptr %ap, i32
   ; CHECK: va_arg ptr %ap, i32
 
   call void @llvm.va_copy(ptr %v, ptr %ap)
-  ; CHECK: call void @llvm.va_copy(ptr %v, ptr %ap)
+  ; CHECK: call void @llvm.va_copy.p0.p0(ptr %v, ptr %ap)
 
   call void @llvm.va_end(ptr %ap)
-  ; CHECK: call void @llvm.va_end(ptr %ap)
+  ; CHECK: call void @llvm.va_end.p0(ptr %ap)
 
   ret void
 }
@@ -2091,12 +2091,12 @@ define float @nofpclass_callsites(float %arg) {
 ; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) }
 ; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
 ; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn }
-; CHECK: attributes #37 = { nounwind memory(argmem: read) }
-; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) }
-; CHECK: attributes #39 = { nocallback nofree nosync nounwind willreturn memory(read) }
-; CHECK: attributes #40 = { nocallback nounwind }
-; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
+; CHECK: attributes #36 = { nounwind memory(argmem: read) }
+; CHECK: attributes #37 = { nounwind memory(argmem: readwrite) }
+; CHECK: attributes #38 = { nocallback nofree nosync nounwind willreturn memory(read) }
+; CHECK: attributes #39 = { nocallback nounwind }
+; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) }
+; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn }
 ; CHECK: attributes #42 = { memory(write) }
 ; CHECK: attributes #43 = { speculatable }
 ; CHECK: attributes #44 = { strictfp }
diff --git a/llvm/test/Bitcode/thinlto-function-summary.ll b/llvm/test/Bitcode/thinlto-function-summary.ll
index 799759ebcac1ad..13c6611843d651 100644
--- a/llvm/test/Bitcode/thinlto-function-summary.ll
+++ b/llvm/test/Bitcode/thinlto-function-summary.ll
@@ -13,9 +13,9 @@
 ; "variadic"
 ; BC-NEXT: <FUNCTION op0=46 op1=8
 ; "llvm.va_start"
-; BC-NEXT: <FUNCTION op0=54 op1=13
+; BC-NEXT: <FUNCTION op0=54 op1=16
 ; "f"
-; BC-NEXT: <ALIAS op0=67 op1=1
+; BC-NEXT: <ALIAS op0=70 op1=1
 ; BC: <GLOBALVAL_SUMMARY_BLOCK
 ; BC-NEXT: <VERSION
 ; BC-NEXT: <FLAGS
@@ -26,7 +26,7 @@
 ; BC-NEXT: <ALIAS {{.*}} op0=6 op1=0 op2=3
 ; BC-NEXT: </GLOBALVAL_SUMMARY_BLOCK
 ; BC: <STRTAB_BLOCK
-; BC-NEXT: blob data = 'hfoobaranon.{{................................}}.0variadicllvm.va_startf{{.*}}'
+; BC-NEXT: blob data = 'hfoobaranon.{{................................}}.0variadicllvm.va_start.p{{[0-9]+}}f{{.*}}'
 
 
 ; RUN: opt -passes=name-anon-globals -module-summary < %s | llvm-dis | FileCheck %s
diff --git a/llvm/test/Bitcode/variableArgumentIntrinsic.3.2.ll b/llvm/test/Bitcode/variableArgumentIntrinsic.3.2.ll
index fad7b8ea6a58b9..5f1becdec57bd3 100644
--- a/llvm/test/Bitcode/variableArgumentIntrinsic.3.2.ll
+++ b/llvm/test/Bitcode/variableArgumentIntrinsic.3.2.ll
@@ -10,7 +10,7 @@ define i32 @varArgIntrinsic(i32 %X, ...) {
   %ap = alloca i8*
   %ap2 = bitcast i8** %ap to i8*
 
-; CHECK: call void @llvm.va_start(ptr %ap2)
+; CHECK: call void @llvm.va_start.p0(ptr %ap2)
   call void @llvm.va_start(i8* %ap2)
 
 ; CHECK-NEXT: %tmp = va_arg ptr %ap, i32
@@ -19,12 +19,12 @@ define i32 @varArgIntrinsic(i32 %X, ...) {
   %aq = alloca i8*
   %aq2 = bitcast i8** %aq to i8*
 
-; CHECK: call void @llvm.va_copy(ptr %aq2, ptr %ap2)
+; CHECK: call void @llvm.va_copy.p0.p0(ptr %aq2, ptr %ap2)
   call void @llvm.va_copy(i8* %aq2, i8* %ap2)
-; CHECK-NEXT: call void @llvm.va_end(ptr %aq2)
+; CHECK-NEXT: call void @llvm.va_end.p0(ptr %aq2)
   call void @llvm.va_end(i8* %aq2)
 
-; CHECK-NEXT:  call void @llvm.va_end(ptr %ap2)
+; CHECK-NEXT:  call void @llvm.va_end.p0(ptr %ap2)
   call void @llvm.va_end(i8* %ap2)
   ret i32 %tmp
 }
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/vararg_shadow.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/vararg_shadow.ll
index 96ac4b6088c31c..9133b329deb263 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/vararg_shadow.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/vararg_shadow.ll
@@ -758,7 +758,7 @@ define linkonce_odr dso_local void @_Z5test2IcEvT_iz(i8 noundef %t, i32 noundef
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -808,7 +808,7 @@ define linkonce_odr dso_local void @_Z5test2IcEvT_iz(i8 noundef %t, i32 noundef
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -851,7 +851,7 @@ define linkonce_odr dso_local void @_Z5test2IiEvT_iz(i32 noundef %t, i32 noundef
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -901,7 +901,7 @@ define linkonce_odr dso_local void @_Z5test2IiEvT_iz(i32 noundef %t, i32 noundef
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -936,7 +936,7 @@ define linkonce_odr dso_local void @_Z5test2IfEvT_iz(float noundef %t, i32 nound
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -986,7 +986,7 @@ define linkonce_odr dso_local void @_Z5test2IfEvT_iz(float noundef %t, i32 nound
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1021,7 +1021,7 @@ define linkonce_odr dso_local void @_Z5test2IdEvT_iz(double noundef %t, i32 noun
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1071,7 +1071,7 @@ define linkonce_odr dso_local void @_Z5test2IdEvT_iz(double noundef %t, i32 noun
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1106,7 +1106,7 @@ define linkonce_odr dso_local void @_Z5test2IeEvT_iz(fp128 noundef %t, i32 nound
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1156,7 +1156,7 @@ define linkonce_odr dso_local void @_Z5test2IeEvT_iz(fp128 noundef %t, i32 nound
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1191,7 +1191,7 @@ define linkonce_odr dso_local void @_Z5test2I6IntIntEvT_iz(i64 %t.coerce, i32 no
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1241,7 +1241,7 @@ define linkonce_odr dso_local void @_Z5test2I6IntIntEvT_iz(i64 %t.coerce, i32 no
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1276,7 +1276,7 @@ define linkonce_odr dso_local void @_Z5test2I10Int64Int64EvT_iz([2 x i64] %t.coe
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1326,7 +1326,7 @@ define linkonce_odr dso_local void @_Z5test2I10Int64Int64EvT_iz([2 x i64] %t.coe
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1361,7 +1361,7 @@ define linkonce_odr dso_local void @_Z5test2I12DoubleDoubleEvT_iz([2 x double] a
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1411,7 +1411,7 @@ define linkonce_odr dso_local void @_Z5test2I12DoubleDoubleEvT_iz([2 x double] a
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1446,7 +1446,7 @@ define linkonce_odr dso_local void @_Z5test2I7Double4EvT_iz([4 x double] alignst
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1496,7 +1496,7 @@ define linkonce_odr dso_local void @_Z5test2I7Double4EvT_iz([4 x double] alignst
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1531,7 +1531,7 @@ define linkonce_odr dso_local void @_Z5test2I11DoubleFloatEvT_iz([2 x i64] %t.co
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1581,7 +1581,7 @@ define linkonce_odr dso_local void @_Z5test2I11DoubleFloatEvT_iz([2 x i64] %t.co
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1616,7 +1616,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble2EvT_iz([2 x fp128] ali
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1666,7 +1666,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble2EvT_iz([2 x fp128] ali
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1701,7 +1701,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble4EvT_iz([4 x fp128] ali
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1751,7 +1751,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble4EvT_iz([4 x fp128] ali
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP52]], ptr align 16 [[TMP53]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll b/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll
index 1535fccfc21107..e0b5907719afcb 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/SystemZ/vararg-kernel.ll
@@ -39,7 +39,7 @@ define i64 @foo(i64 %guard, ...) #1 {
 ; Only 56 bytes of the register save area is copied, because of
 ; "use-soft-float".
 
-; CHECK: call void @llvm.va_start(ptr %vl)
+; CHECK: call void @llvm.va_start.p0(ptr %vl)
 ; CHECK: [[VlAddr:%.*]] = ptrtoint ptr %vl to i64
 ; CHECK: [[RegSaveAreaAddrAddr:%.*]] = add i64 [[VlAddr]], 24
 ; CHECK: [[RegSaveAreaAddr:%.*]] = inttoptr i64 [[RegSaveAreaAddrAddr]] to ptr
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/vararg_shadow.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/vararg_shadow.ll
index aff4d2c55ad6fc..205101564dfe09 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/vararg_shadow.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/vararg_shadow.ll
@@ -560,7 +560,7 @@ define linkonce_odr dso_local void @_Z5test2IcEvT_iz(i8 noundef signext %t, i32
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 16
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -580,7 +580,7 @@ define linkonce_odr dso_local void @_Z5test2IcEvT_iz(i8 noundef signext %t, i32
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -623,7 +623,7 @@ define linkonce_odr dso_local void @_Z5test2IiEvT_iz(i32 noundef %t, i32 noundef
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 16
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -643,7 +643,7 @@ define linkonce_odr dso_local void @_Z5test2IiEvT_iz(i32 noundef %t, i32 noundef
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -678,7 +678,7 @@ define linkonce_odr dso_local void @_Z5test2IfEvT_iz(float noundef %t, i32 nound
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 16
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -698,7 +698,7 @@ define linkonce_odr dso_local void @_Z5test2IfEvT_iz(float noundef %t, i32 nound
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -733,7 +733,7 @@ define linkonce_odr dso_local void @_Z5test2IdEvT_iz(double noundef %t, i32 noun
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 16
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -753,7 +753,7 @@ define linkonce_odr dso_local void @_Z5test2IdEvT_iz(double noundef %t, i32 noun
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -788,7 +788,7 @@ define linkonce_odr dso_local void @_Z5test2IeEvT_iz(x86_fp80 noundef %t, i32 no
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 16
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -808,7 +808,7 @@ define linkonce_odr dso_local void @_Z5test2IeEvT_iz(x86_fp80 noundef %t, i32 no
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -843,7 +843,7 @@ define linkonce_odr dso_local void @_Z5test2I6IntIntEvT_iz(i64 %t.coerce, i32 no
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 16
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -863,7 +863,7 @@ define linkonce_odr dso_local void @_Z5test2I6IntIntEvT_iz(i64 %t.coerce, i32 no
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -898,7 +898,7 @@ define linkonce_odr dso_local void @_Z5test2I10Int64Int64EvT_iz(i64 %t.coerce0,
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 16
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -918,7 +918,7 @@ define linkonce_odr dso_local void @_Z5test2I10Int64Int64EvT_iz(i64 %t.coerce0,
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -953,7 +953,7 @@ define linkonce_odr dso_local void @_Z5test2I12DoubleDoubleEvT_iz(double %t.coer
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 16
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -973,7 +973,7 @@ define linkonce_odr dso_local void @_Z5test2I12DoubleDoubleEvT_iz(double %t.coer
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1008,7 +1008,7 @@ define linkonce_odr dso_local void @_Z5test2I7Double4EvT_iz(ptr noundef byval(%s
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 16
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1028,7 +1028,7 @@ define linkonce_odr dso_local void @_Z5test2I7Double4EvT_iz(ptr noundef byval(%s
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1063,7 +1063,7 @@ define linkonce_odr dso_local void @_Z5test2I11DoubleFloatEvT_iz(double %t.coerc
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 16
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1083,7 +1083,7 @@ define linkonce_odr dso_local void @_Z5test2I11DoubleFloatEvT_iz(double %t.coerc
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1118,7 +1118,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble2EvT_iz(ptr noundef byv
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 16
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1138,7 +1138,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble2EvT_iz(ptr noundef byv
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1173,7 +1173,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble4EvT_iz(ptr noundef byv
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
 ; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false)
-; CHECK-NEXT:    call void @llvm.va_start(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 16
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
@@ -1193,7 +1193,7 @@ define linkonce_odr dso_local void @_Z5test2I11LongDouble4EvT_iz(ptr noundef byv
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @_Z3usePv(ptr noundef nonnull [[ARGS]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]])
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_debug_info.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_debug_info.ll
index 21f3311a57efa6..f07f3ad06e6077 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/msan_debug_info.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/msan_debug_info.ll
@@ -542,7 +542,7 @@ define void @VAStart(i32 %x, ...) sanitize_memory {
 ; CHECK-NEXT:    [[TMP29:%.*]] = add i64 [[TMP27]], 17592186044416, !dbg [[DBG11]]
 ; CHECK-NEXT:    [[TMP30:%.*]] = inttoptr i64 [[TMP29]] to ptr, !dbg [[DBG11]]
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 24, i1 false), !dbg [[DBG11]]
-; CHECK-NEXT:    call void @llvm.va_start(ptr [[VA]]), !dbg [[DBG11]]
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VA]]), !dbg [[DBG11]]
 ; CHECK-NEXT:    [[TMP31:%.*]] = ptrtoint ptr [[VA]] to i64, !dbg [[DBG11]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = add i64 [[TMP31]], 16, !dbg [[DBG11]]
 ; CHECK-NEXT:    [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr, !dbg [[DBG11]]
diff --git a/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll b/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll
index 188210782edd98..4c5a448d12c482 100644
--- a/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll
+++ b/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll
@@ -23,7 +23,7 @@ define internal i32 @i(ptr inalloca(ptr) %a, ...) {
 ; CHECK-LABEL: define {{[^@]+}}@i
 ; CHECK-SAME: (ptr inalloca(ptr) [[A:%.*]], ...) unnamed_addr {
 ; CHECK-NEXT:    [[AP:%.*]] = alloca ptr, align 4
-; CHECK-NEXT:    call void @llvm.va_start(ptr [[AP]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[AP]])
 ; CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[AP]], align 4
 ; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
 ; CHECK-NEXT:    ret i32 [[L]]
diff --git a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll
index ef365d6eaddb5b..09cf3b244d0fbd 100644
--- a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll
+++ b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll
@@ -17,10 +17,10 @@ define i32 @func1(i32 %a, double %b, ptr %v, ...) nounwind {
 ; CHECK-NEXT:    [[AP:%.*]] = alloca ptr, align 4
 ; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    call void @outlined_ir_func_0(i32 [[A:%.*]], ptr [[A_ADDR]], double [[B:%.*]], ptr [[B_ADDR]])
-; CHECK-NEXT:    call void @llvm.va_start(ptr [[AP]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[AP]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = va_arg ptr [[AP]], i32
-; CHECK-NEXT:    call void @llvm.va_copy(ptr [[V:%.*]], ptr [[AP]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr [[AP]])
+; CHECK-NEXT:    call void @llvm.va_copy.p0.p0(ptr [[V:%.*]], ptr [[AP]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[AP]])
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr [[TMP_LOC]])
 ; CHECK-NEXT:    call void @outlined_ir_func_1(i32 [[TMP0]], ptr [[C]], ptr [[TMP_LOC]])
 ; CHECK-NEXT:    [[TMP_RELOAD:%.*]] = load i32, ptr [[TMP_LOC]], align 4
@@ -52,10 +52,10 @@ define i32 @func2(i32 %a, double %b, ptr %v, ...) nounwind {
 ; CHECK-NEXT:    [[AP:%.*]] = alloca ptr, align 4
 ; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    call void @outlined_ir_func_0(i32 [[A:%.*]], ptr [[A_ADDR]], double [[B:%.*]], ptr [[B_ADDR]])
-; CHECK-NEXT:    call void @llvm.va_start(ptr [[AP]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[AP]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = va_arg ptr [[AP]], i32
-; CHECK-NEXT:    call void @llvm.va_copy(ptr [[V:%.*]], ptr [[AP]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr [[AP]])
+; CHECK-NEXT:    call void @llvm.va_copy.p0.p0(ptr [[V:%.*]], ptr [[AP]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[AP]])
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr [[TMP_LOC]])
 ; CHECK-NEXT:    call void @outlined_ir_func_1(i32 [[TMP0]], ptr [[C]], ptr [[TMP_LOC]])
 ; CHECK-NEXT:    [[TMP_RELOAD:%.*]] = load i32, ptr [[TMP_LOC]], align 4
diff --git a/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll b/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll
index 9f565de960575b..d4b4dcfefc7fa9 100644
--- a/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll
+++ b/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll
@@ -51,7 +51,7 @@ entry:
 ; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    store double [[B]], ptr [[B_ADDR]], align 8
-; CHECK-NEXT:    call void @llvm.va_start(ptr [[AP]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[AP]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = va_arg ptr [[AP]], i32
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr [[TMP_LOC]])
 ; CHECK-NEXT:    call void @outlined_ir_func_0(ptr [[V]], ptr [[AP]], i32 [[TMP0]], ptr [[C]], ptr [[TMP_LOC]])
@@ -70,7 +70,7 @@ entry:
 ; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    store double [[B]], ptr [[B_ADDR]], align 8
-; CHECK-NEXT:    call void @llvm.va_start(ptr [[AP]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[AP]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = va_arg ptr [[AP]], i32
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr [[TMP_LOC]])
 ; CHECK-NEXT:    call void @outlined_ir_func_0(ptr [[V]], ptr [[AP]], i32 [[TMP0]], ptr [[C]], ptr [[TMP_LOC]])
@@ -84,8 +84,8 @@ entry:
 ; CHECK-NEXT:  newFuncRoot:
 ; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
 ; CHECK:       entry_to_outline:
-; CHECK-NEXT:    call void @llvm.va_copy(ptr [[TMP0]], ptr [[TMP1]])
-; CHECK-NEXT:    call void @llvm.va_end(ptr [[TMP1]])
+; CHECK-NEXT:    call void @llvm.va_copy.p0.p0(ptr [[TMP0]], ptr [[TMP1]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[TMP1]])
 ; CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
diff --git a/llvm/test/Transforms/NewGVN/pr31483.ll b/llvm/test/Transforms/NewGVN/pr31483.ll
index 0e7461c2612b9f..82e9a2ab286ee5 100644
--- a/llvm/test/Transforms/NewGVN/pr31483.ll
+++ b/llvm/test/Transforms/NewGVN/pr31483.ll
@@ -41,7 +41,7 @@ define signext i32 @ham(ptr %arg, ptr %arg1) #0 {
 ; CHECK:       bb22:
 ; CHECK-NEXT:    br label [[BB2]]
 ; CHECK:       bb23:
-; CHECK-NEXT:    call void @llvm.va_end(ptr [[TMP]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[TMP]])
 ; CHECK-NEXT:    ret i32 undef
 ;
 bb:
diff --git a/llvm/test/Transforms/Reassociate/vaarg_movable.ll b/llvm/test/Transforms/Reassociate/vaarg_movable.ll
index 337877a54a9071..4e45b219fccd59 100644
--- a/llvm/test/Transforms/Reassociate/vaarg_movable.ll
+++ b/llvm/test/Transforms/Reassociate/vaarg_movable.ll
@@ -10,13 +10,13 @@ define i32 @func(i32 %dummy, ...) {
 ;
 ; CHECK-LABEL: @func(
 ; CHECK-NEXT:    [[VARARGS:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    call void @llvm.va_start(ptr [[VARARGS]])
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VARARGS]])
 ; CHECK-NEXT:    [[V0:%.*]] = va_arg ptr [[VARARGS]], i32
 ; CHECK-NEXT:    [[V1:%.*]] = va_arg ptr [[VARARGS]], i32
 ; CHECK-NEXT:    [[V0_NEG:%.*]] = sub i32 0, [[V0]]
 ; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[V0_NEG]], 1
 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[V1]]
-; CHECK-NEXT:    call void @llvm.va_end(ptr [[VARARGS]])
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VARARGS]])
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
   %varargs = alloca ptr, align 8



More information about the cfe-commits mailing list