[clang] 3391bdc - Revert "[FunctionAttrs] Propagate some func/arg/ret attributes from caller to callsite (WIP)"

Mon Jun 12 22:53:52 PDT 2023

Author: Noah Goldstein
Date: 2023-06-13T00:53:31-05:00
New Revision: 3391bdc255f1a75c59d71c7305959e84d8d5f468

URL: https://github.com/llvm/llvm-project/commit/3391bdc255f1a75c59d71c7305959e84d8d5f468
DIFF: https://github.com/llvm/llvm-project/commit/3391bdc255f1a75c59d71c7305959e84d8d5f468.diff

LOG: Revert "[FunctionAttrs] Propagate some func/arg/ret attributes from caller to callsite (WIP)"

Accidental commit/push!

This reverts commit 4fa971ff62c3c48c606b792c572c03bd4d5906ee.

Added: 
    

Modified: 
    clang/test/CodeGen/LoongArch/inline-asm-constraints.c
    clang/test/CodeGen/LoongArch/inline-asm-operand-modifiers.c
    clang/test/CodeGen/LoongArch/intrinsic-la32.c
    clang/test/CodeGen/LoongArch/intrinsic-la64.c
    clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
    clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c
    clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReseve-StoreCond-64bit-only.c
    clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
    clang/test/CodeGen/PowerPC/ppc64-inline-asm.c
    clang/test/CodeGen/RISCV/riscv-inline-asm.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vwrite-csr.c
    clang/test/CodeGen/X86/fma-builtins-constrained.c
    clang/test/CodeGen/X86/ms-x86-intrinsics.c
    clang/test/CodeGen/aarch64-bf16-ldst-intrinsics.c
    clang/test/CodeGen/aarch64-ls64-inline-asm.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sb.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sh.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sw.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1ub.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uh.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uw.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1b.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1h.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1w.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilege.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilegt.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilerw-bfloat.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilerw.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilewr-bfloat.c
    clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilewr.c
    clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c
    clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c
    clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c
    clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
    clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
    clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c
    clang/test/CodeGen/msp430-builtins.c
    clang/test/CodeGen/nofpclass.c
    clang/test/Headers/wasm.c
    llvm/lib/Transforms/IPO/FunctionAttrs.cpp
    llvm/lib/Transforms/Utils/CMakeLists.txt
    llvm/test/Other/cgscc-devirt-iteration.ll
    llvm/test/Transforms/FunctionAttrs/nonnull.ll
    llvm/test/Transforms/FunctionAttrs/readattrs.ll
    llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll
    llvm/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll
    llvm/test/Transforms/PhaseOrdering/X86/loop-idiom-vs-indvars.ll
    llvm/test/Transforms/PhaseOrdering/memset-tail.ll

Removed: 
    llvm/include/llvm/Transforms/Utils/InferCallsiteAttrs.h
    llvm/lib/Transforms/Utils/InferCallsiteAttrs.cpp


################################################################################
diff  --git a/clang/test/CodeGen/LoongArch/inline-asm-constraints.c b/clang/test/CodeGen/LoongArch/inline-asm-constraints.c
index 66dfc6dcb5c68..b19494284bd99 100644

--- a/clang/test/CodeGen/LoongArch/inline-asm-constraints.c
+++ b/clang/test/CodeGen/LoongArch/inline-asm-constraints.c
@@ -59,12 +59,12 @@ void test_K(void) {
 
 void test_ZB(int *p) {
 // CHECK-LABEL: define{{.*}} void @test_ZB(ptr noundef %p)
-// CHECK: call void asm sideeffect "", "*^ZB"(ptr noundef elementtype(i32) %p)
+// CHECK: call void asm sideeffect "", "*^ZB"(ptr elementtype(i32) %p)
   asm volatile ("" :: "ZB"(*p));
 }
 
 void test_ZC(int *p) {
 // CHECK-LABEL: define{{.*}} void @test_ZC(ptr noundef %p)
-// CHECK: call void asm sideeffect "", "*^ZC"(ptr noundef elementtype(i32) %p)
+// CHECK: call void asm sideeffect "", "*^ZC"(ptr elementtype(i32) %p)
   asm volatile ("" :: "ZC"(*p));
 }

diff  --git a/clang/test/CodeGen/LoongArch/inline-asm-operand-modifiers.c b/clang/test/CodeGen/LoongArch/inline-asm-operand-modifiers.c
index f2b37a9b261fd..b36fe7a7b69bb 100644
--- a/clang/test/CodeGen/LoongArch/inline-asm-operand-modifiers.c
+++ b/clang/test/CodeGen/LoongArch/inline-asm-operand-modifiers.c
@@ -6,7 +6,7 @@
 
 // CHECK-LABEL: @test_z_zero(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 asm sideeffect "add.w $0, $1, ${2:z}", "=r,r,ri"(i32 noundef [[A:%.*]], i32 0) #[[ATTR1:[0-9]+]], !srcloc !2
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 asm sideeffect "add.w $0, $1, ${2:z}", "=r,r,ri"(i32 [[A:%.*]], i32 0) #[[ATTR1:[0-9]+]], !srcloc !2
 // CHECK-NEXT:    ret void
 //
 void test_z_zero(int a) {
@@ -16,7 +16,7 @@ void test_z_zero(int a) {
 
 // CHECK-LABEL: @test_z_nonzero(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 asm sideeffect "add.w $0, $1, ${2:z}", "=r,r,ri"(i32 noundef [[A:%.*]], i32 1) #[[ATTR1]], !srcloc !3
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 asm sideeffect "add.w $0, $1, ${2:z}", "=r,r,ri"(i32 [[A:%.*]], i32 1) #[[ATTR1]], !srcloc !3
 // CHECK-NEXT:    ret void
 //
 void test_z_nonzero(int a) {

diff  --git a/clang/test/CodeGen/LoongArch/intrinsic-la32.c b/clang/test/CodeGen/LoongArch/intrinsic-la32.c
index 286c8828de866..93d54f511a9cd 100644
--- a/clang/test/CodeGen/LoongArch/intrinsic-la32.c
+++ b/clang/test/CodeGen/LoongArch/intrinsic-la32.c
@@ -62,8 +62,8 @@ unsigned int csrrd_w() {
 
 // LA32-LABEL: @csrwr_w(
 // LA32-NEXT:  entry:
-// LA32-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 noundef [[A:%.*]], i32 1)
-// LA32-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 noundef [[A]], i32 1)
+// LA32-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 [[A:%.*]], i32 1)
+// LA32-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 [[A]], i32 1)
 // LA32-NEXT:    ret i32 0
 //
 unsigned int csrwr_w(unsigned int a) {
@@ -74,8 +74,8 @@ unsigned int csrwr_w(unsigned int a) {
 
 // LA32-LABEL: @csrxchg_w(
 // LA32-NEXT:  entry:
-// LA32-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 1)
-// LA32-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 noundef [[A]], i32 noundef [[B]], i32 1)
+// LA32-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 [[A:%.*]], i32 [[B:%.*]], i32 1)
+// LA32-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 [[A]], i32 [[B]], i32 1)
 // LA32-NEXT:    ret i32 0
 //
 unsigned int csrxchg_w(unsigned int a, unsigned int b) {
@@ -86,8 +86,8 @@ unsigned int csrxchg_w(unsigned int a, unsigned int b) {
 
 // LA32-LABEL: @iocsrrd_b(
 // LA32-NEXT:  entry:
-// LA32-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 noundef [[A:%.*]])
-// LA32-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 noundef [[A]])
+// LA32-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 [[A:%.*]])
+// LA32-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 [[A]])
 // LA32-NEXT:    ret i8 0
 //
 unsigned char iocsrrd_b(unsigned int a) {
@@ -98,8 +98,8 @@ unsigned char iocsrrd_b(unsigned int a) {
 
 // LA32-LABEL: @iocsrrd_h(
 // LA32-NEXT:  entry:
-// LA32-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 noundef [[A:%.*]])
-// LA32-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 noundef [[A]])
+// LA32-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]])
+// LA32-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]])
 // LA32-NEXT:    ret i16 0
 //
 unsigned short iocsrrd_h(unsigned int a) {
@@ -110,8 +110,8 @@ unsigned short iocsrrd_h(unsigned int a) {
 
 // LA32-LABEL: @iocsrrd_w(
 // LA32-NEXT:  entry:
-// LA32-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 noundef [[A:%.*]])
-// LA32-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 noundef [[A]])
+// LA32-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 [[A:%.*]])
+// LA32-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 [[A]])
 // LA32-NEXT:    ret i32 0
 //
 unsigned int iocsrrd_w(unsigned int a) {
@@ -123,8 +123,8 @@ unsigned int iocsrrd_w(unsigned int a) {
 // LA32-LABEL: @iocsrwr_b(
 // LA32-NEXT:  entry:
 // LA32-NEXT:    [[CONV_I:%.*]] = zext i8 [[A:%.*]] to i32
-// LA32-NEXT:    tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 noundef [[B:%.*]])
-// LA32-NEXT:    tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 noundef [[B]])
+// LA32-NEXT:    tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 [[B:%.*]])
+// LA32-NEXT:    tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 [[B]])
 // LA32-NEXT:    ret void
 //
 void iocsrwr_b(unsigned char a, unsigned int b) {
@@ -135,8 +135,8 @@ void iocsrwr_b(unsigned char a, unsigned int b) {
 // LA32-LABEL: @iocsrwr_h(
 // LA32-NEXT:  entry:
 // LA32-NEXT:    [[CONV_I:%.*]] = zext i16 [[A:%.*]] to i32
-// LA32-NEXT:    tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 noundef [[B:%.*]])
-// LA32-NEXT:    tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 noundef [[B]])
+// LA32-NEXT:    tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 [[B:%.*]])
+// LA32-NEXT:    tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 [[B]])
 // LA32-NEXT:    ret void
 //
 void iocsrwr_h(unsigned short a, unsigned int b) {
@@ -146,8 +146,8 @@ void iocsrwr_h(unsigned short a, unsigned int b) {
 
 // LA32-LABEL: @iocsrwr_w(
 // LA32-NEXT:  entry:
-// LA32-NEXT:    tail call void @llvm.loongarch.iocsrwr.w(i32 noundef [[A:%.*]], i32 noundef [[B:%.*]])
-// LA32-NEXT:    tail call void @llvm.loongarch.iocsrwr.w(i32 noundef [[A]], i32 noundef [[B]])
+// LA32-NEXT:    tail call void @llvm.loongarch.iocsrwr.w(i32 [[A:%.*]], i32 [[B:%.*]])
+// LA32-NEXT:    tail call void @llvm.loongarch.iocsrwr.w(i32 [[A]], i32 [[B]])
 // LA32-NEXT:    ret void
 //
 void iocsrwr_w(unsigned int a, unsigned int b) {
@@ -157,8 +157,8 @@ void iocsrwr_w(unsigned int a, unsigned int b) {
 
 // LA32-LABEL: @cpucfg(
 // LA32-NEXT:  entry:
-// LA32-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 noundef [[A:%.*]])
-// LA32-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 noundef [[A]])
+// LA32-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 [[A:%.*]])
+// LA32-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 [[A]])
 // LA32-NEXT:    ret i32 0
 //
 unsigned int cpucfg(unsigned int a) {
@@ -192,8 +192,8 @@ int loongarch_movfcsr2gr() {
 
 // LA32-LABEL: @loongarch_movgr2fcsr(
 // LA32-NEXT:  entry:
-// LA32-NEXT:    tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 noundef [[A:%.*]])
-// LA32-NEXT:    tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 noundef [[A]])
+// LA32-NEXT:    tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 [[A:%.*]])
+// LA32-NEXT:    tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 [[A]])
 // LA32-NEXT:    ret void
 //
 void loongarch_movgr2fcsr(int a) {
@@ -206,11 +206,6 @@ void loongarch_movgr2fcsr(int a) {
 // CHECK-NEXT:    tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A:%.*]], i32 1024)
 // CHECK-NEXT:    tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A]], i32 1024)
 // CHECK-NEXT:    ret void
-// LA32-LABEL: @cacop_w(
-// LA32-NEXT:  entry:
-// LA32-NEXT:    tail call void @llvm.loongarch.cacop.w(i32 1, i32 noundef [[A:%.*]], i32 1024)
-// LA32-NEXT:    tail call void @llvm.loongarch.cacop.w(i32 1, i32 noundef [[A]], i32 1024)
-// LA32-NEXT:    ret void
 //
 void cacop_w(unsigned long int a) {
   __cacop_w(1, a, 1024);

diff  --git a/clang/test/CodeGen/LoongArch/intrinsic-la64.c b/clang/test/CodeGen/LoongArch/intrinsic-la64.c
index 9dbbf3bd666fd..a740882eef541 100644
--- a/clang/test/CodeGen/LoongArch/intrinsic-la64.c
+++ b/clang/test/CodeGen/LoongArch/intrinsic-la64.c
@@ -61,8 +61,8 @@ unsigned int csrrd_w() {
 
 // CHECK-LABEL: @csrwr_w(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 noundef [[A:%.*]], i32 1)
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 noundef [[A]], i32 1)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 [[A:%.*]], i32 1)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 [[A]], i32 1)
 // CHECK-NEXT:    ret i32 0
 //
 unsigned int csrwr_w(unsigned int a) {
@@ -73,8 +73,8 @@ unsigned int csrwr_w(unsigned int a) {
 
 // CHECK-LABEL: @csrxchg_w(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 1)
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 noundef [[A]], i32 noundef [[B]], i32 1)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 [[A:%.*]], i32 [[B:%.*]], i32 1)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 [[A]], i32 [[B]], i32 1)
 // CHECK-NEXT:    ret i32 0
 //
 unsigned int csrxchg_w(unsigned int a, unsigned int b) {
@@ -87,8 +87,8 @@ unsigned int csrxchg_w(unsigned int a, unsigned int b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[A:%.*]], 24
 // CHECK-NEXT:    [[CONV_I:%.*]] = ashr exact i32 [[TMP0]], 24
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.b.w(i32 [[CONV_I]], i32 noundef [[B:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crc.w.b.w(i32 noundef [[A]], i32 noundef [[B]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.b.w(i32 [[CONV_I]], i32 [[B:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crc.w.b.w(i32 [[A]], i32 [[B]])
 // CHECK-NEXT:    ret i32 0
 //
 int crc_w_b_w(int a, int b) {
@@ -101,8 +101,8 @@ int crc_w_b_w(int a, int b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[A:%.*]], 16
 // CHECK-NEXT:    [[CONV_I:%.*]] = ashr exact i32 [[TMP0]], 16
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.h.w(i32 [[CONV_I]], i32 noundef [[B:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crc.w.h.w(i32 noundef [[A]], i32 noundef [[B]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.h.w(i32 [[CONV_I]], i32 [[B:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crc.w.h.w(i32 [[A]], i32 [[B]])
 // CHECK-NEXT:    ret i32 0
 //
 int crc_w_h_w(int a, int b) {
@@ -113,8 +113,8 @@ int crc_w_h_w(int a, int b) {
 
 // CHECK-LABEL: @crc_w_w_w(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crc.w.w.w(i32 noundef [[A:%.*]], i32 noundef [[B:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.w.w(i32 noundef [[A]], i32 noundef [[B]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crc.w.w.w(i32 [[A:%.*]], i32 [[B:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.w.w(i32 [[A]], i32 [[B]])
 // CHECK-NEXT:    ret i32 0
 //
 int crc_w_w_w(int a, int b) {
@@ -125,8 +125,8 @@ int crc_w_w_w(int a, int b) {
 
 // CHECK-LABEL: @cacop_d(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.loongarch.cacop.d(i64 1, i64 noundef [[A:%.*]], i64 1024)
-// CHECK-NEXT:    tail call void @llvm.loongarch.cacop.d(i64 1, i64 noundef [[A]], i64 1024)
+// CHECK-NEXT:    tail call void @llvm.loongarch.cacop.d(i64 1, i64 [[A:%.*]], i64 1024)
+// CHECK-NEXT:    tail call void @llvm.loongarch.cacop.d(i64 1, i64 [[A]], i64 1024)
 // CHECK-NEXT:    ret void
 //
 void cacop_d(unsigned long int a) {
@@ -136,8 +136,8 @@ void cacop_d(unsigned long int a) {
 
 // CHECK-LABEL: @crc_w_d_w(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crc.w.d.w(i64 noundef [[A:%.*]], i32 noundef [[B:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.d.w(i64 noundef [[A]], i32 noundef [[B]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crc.w.d.w(i64 [[A:%.*]], i32 [[B:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.d.w(i64 [[A]], i32 [[B]])
 // CHECK-NEXT:    ret i32 0
 //
 int crc_w_d_w(long int a, int b) {
@@ -150,8 +150,8 @@ int crc_w_d_w(long int a, int b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[A:%.*]], 24
 // CHECK-NEXT:    [[CONV_I:%.*]] = ashr exact i32 [[TMP0]], 24
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.b.w(i32 [[CONV_I]], i32 noundef [[B:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crcc.w.b.w(i32 noundef [[A]], i32 noundef [[B]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.b.w(i32 [[CONV_I]], i32 [[B:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crcc.w.b.w(i32 [[A]], i32 [[B]])
 // CHECK-NEXT:    ret i32 0
 //
 int crcc_w_b_w(int a, int b) {
@@ -164,8 +164,8 @@ int crcc_w_b_w(int a, int b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[A:%.*]], 16
 // CHECK-NEXT:    [[CONV_I:%.*]] = ashr exact i32 [[TMP0]], 16
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.h.w(i32 [[CONV_I]], i32 noundef [[B:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crcc.w.h.w(i32 noundef [[A]], i32 noundef [[B]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.h.w(i32 [[CONV_I]], i32 [[B:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crcc.w.h.w(i32 [[A]], i32 [[B]])
 // CHECK-NEXT:    ret i32 0
 //
 int crcc_w_h_w(int a, int b) {
@@ -176,8 +176,8 @@ int crcc_w_h_w(int a, int b) {
 
 // CHECK-LABEL: @crcc_w_w_w(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crcc.w.w.w(i32 noundef [[A:%.*]], i32 noundef [[B:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.w.w(i32 noundef [[A]], i32 noundef [[B]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crcc.w.w.w(i32 [[A:%.*]], i32 [[B:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.w.w(i32 [[A]], i32 [[B]])
 // CHECK-NEXT:    ret i32 0
 //
 int crcc_w_w_w(int a, int b) {
@@ -188,8 +188,8 @@ int crcc_w_w_w(int a, int b) {
 
 // CHECK-LABEL: @crcc_w_d_w(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crcc.w.d.w(i64 noundef [[A:%.*]], i32 noundef [[B:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.d.w(i64 noundef [[A]], i32 noundef [[B]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crcc.w.d.w(i64 [[A:%.*]], i32 [[B:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.d.w(i64 [[A]], i32 [[B]])
 // CHECK-NEXT:    ret i32 0
 //
 int crcc_w_d_w(long int a, int b) {
@@ -212,8 +212,8 @@ unsigned long int csrrd_d() {
 
 // CHECK-LABEL: @csrwr_d(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.loongarch.csrwr.d(i64 noundef [[A:%.*]], i32 1)
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.csrwr.d(i64 noundef [[A]], i32 1)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.loongarch.csrwr.d(i64 [[A:%.*]], i32 1)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.csrwr.d(i64 [[A]], i32 1)
 // CHECK-NEXT:    ret i64 0
 //
 unsigned long int csrwr_d(unsigned long int a) {
@@ -224,8 +224,8 @@ unsigned long int csrwr_d(unsigned long int a) {
 
 // CHECK-LABEL: @csrxchg_d(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.loongarch.csrxchg.d(i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i32 1)
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.csrxchg.d(i64 noundef [[A]], i64 noundef [[B]], i32 1)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.loongarch.csrxchg.d(i64 [[A:%.*]], i64 [[B:%.*]], i32 1)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.csrxchg.d(i64 [[A]], i64 [[B]], i32 1)
 // CHECK-NEXT:    ret i64 0
 //
 unsigned long int csrxchg_d(unsigned long int a, unsigned long int b) {
@@ -236,8 +236,8 @@ unsigned long int csrxchg_d(unsigned long int a, unsigned long int b) {
 
 // CHECK-LABEL: @iocsrrd_b(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 noundef [[A:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 noundef [[A]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 [[A:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 [[A]])
 // CHECK-NEXT:    ret i8 0
 //
 unsigned char iocsrrd_b(unsigned int a) {
@@ -248,8 +248,8 @@ unsigned char iocsrrd_b(unsigned int a) {
 
 // CHECK-LABEL: @iocsrrd_h(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 noundef [[A:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 noundef [[A]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]])
 // CHECK-NEXT:    ret i16 0
 //
 unsigned short iocsrrd_h(unsigned int a) {
@@ -260,8 +260,8 @@ unsigned short iocsrrd_h(unsigned int a) {
 
 // CHECK-LABEL: @iocsrrd_w(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 noundef [[A:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 noundef [[A]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 [[A:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 [[A]])
 // CHECK-NEXT:    ret i32 0
 //
 unsigned int iocsrrd_w(unsigned int a) {
@@ -273,8 +273,8 @@ unsigned int iocsrrd_w(unsigned int a) {
 // CHECK-LABEL: @iocsrwr_b(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[CONV_I:%.*]] = zext i8 [[A:%.*]] to i32
-// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 noundef [[B:%.*]])
-// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 noundef [[B]])
+// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 [[B:%.*]])
+// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 [[B]])
 // CHECK-NEXT:    ret void
 //
 void iocsrwr_b(unsigned char a, unsigned int b) {
@@ -285,8 +285,8 @@ void iocsrwr_b(unsigned char a, unsigned int b) {
 // CHECK-LABEL: @iocsrwr_h(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[CONV_I:%.*]] = zext i16 [[A:%.*]] to i32
-// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 noundef [[B:%.*]])
-// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 noundef [[B]])
+// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 [[B:%.*]])
+// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 [[B]])
 // CHECK-NEXT:    ret void
 //
 void iocsrwr_h(unsigned short a, unsigned int b) {
@@ -296,8 +296,8 @@ void iocsrwr_h(unsigned short a, unsigned int b) {
 
 // CHECK-LABEL: @iocsrwr_w(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.w(i32 noundef [[A:%.*]], i32 noundef [[B:%.*]])
-// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.w(i32 noundef [[A]], i32 noundef [[B]])
+// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.w(i32 [[A:%.*]], i32 [[B:%.*]])
+// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.w(i32 [[A]], i32 [[B]])
 // CHECK-NEXT:    ret void
 //
 void iocsrwr_w(unsigned int a, unsigned int b) {
@@ -307,8 +307,8 @@ void iocsrwr_w(unsigned int a, unsigned int b) {
 
 // CHECK-LABEL: @iocsrrd_d(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.loongarch.iocsrrd.d(i32 noundef [[A:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.iocsrrd.d(i32 noundef [[A]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.loongarch.iocsrrd.d(i32 [[A:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.iocsrrd.d(i32 [[A]])
 // CHECK-NEXT:    ret i64 0
 //
 unsigned long int iocsrrd_d(unsigned int a) {
@@ -319,8 +319,8 @@ unsigned long int iocsrrd_d(unsigned int a) {
 
 // CHECK-LABEL: @iocsrwr_d(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.d(i64 noundef [[A:%.*]], i32 noundef [[B:%.*]])
-// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.d(i64 noundef [[A]], i32 noundef [[B]])
+// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.d(i64 [[A:%.*]], i32 [[B:%.*]])
+// CHECK-NEXT:    tail call void @llvm.loongarch.iocsrwr.d(i64 [[A]], i32 [[B]])
 // CHECK-NEXT:    ret void
 //
 void iocsrwr_d(unsigned long int a, unsigned int b) {
@@ -330,8 +330,8 @@ void iocsrwr_d(unsigned long int a, unsigned int b) {
 
 // CHECK-LABEL: @asrtle_d(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.loongarch.asrtle.d(i64 noundef [[A:%.*]], i64 noundef [[B:%.*]])
-// CHECK-NEXT:    tail call void @llvm.loongarch.asrtle.d(i64 noundef [[A]], i64 noundef [[B]])
+// CHECK-NEXT:    tail call void @llvm.loongarch.asrtle.d(i64 [[A:%.*]], i64 [[B:%.*]])
+// CHECK-NEXT:    tail call void @llvm.loongarch.asrtle.d(i64 [[A]], i64 [[B]])
 // CHECK-NEXT:    ret void
 //
 void asrtle_d(long int a, long int b) {
@@ -341,8 +341,8 @@ void asrtle_d(long int a, long int b) {
 
 // CHECK-LABEL: @asrtgt_d(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.loongarch.asrtgt.d(i64 noundef [[A:%.*]], i64 noundef [[B:%.*]])
-// CHECK-NEXT:    tail call void @llvm.loongarch.asrtgt.d(i64 noundef [[A]], i64 noundef [[B]])
+// CHECK-NEXT:    tail call void @llvm.loongarch.asrtgt.d(i64 [[A:%.*]], i64 [[B:%.*]])
+// CHECK-NEXT:    tail call void @llvm.loongarch.asrtgt.d(i64 [[A]], i64 [[B]])
 // CHECK-NEXT:    ret void
 //
 void asrtgt_d(long int a, long int b) {
@@ -352,8 +352,8 @@ void asrtgt_d(long int a, long int b) {
 
 // CHECK-LABEL: @lddir_d(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lddir.d(i64 noundef [[A:%.*]], i64 1)
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lddir.d(i64 noundef [[A]], i64 1)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lddir.d(i64 [[A:%.*]], i64 1)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lddir.d(i64 [[A]], i64 1)
 // CHECK-NEXT:    ret i64 0
 //
 long int lddir_d(long int a) {
@@ -364,8 +364,8 @@ long int lddir_d(long int a) {
 
 // CHECK-LABEL: @ldpte_d(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.loongarch.ldpte.d(i64 noundef [[A:%.*]], i64 1)
-// CHECK-NEXT:    tail call void @llvm.loongarch.ldpte.d(i64 noundef [[A]], i64 1)
+// CHECK-NEXT:    tail call void @llvm.loongarch.ldpte.d(i64 [[A:%.*]], i64 1)
+// CHECK-NEXT:    tail call void @llvm.loongarch.ldpte.d(i64 [[A]], i64 1)
 // CHECK-NEXT:    ret void
 //
 void ldpte_d(long int a) {
@@ -375,8 +375,8 @@ void ldpte_d(long int a) {
 
 // CHECK-LABEL: @cpucfg(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 noundef [[A:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 noundef [[A]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 [[A:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 [[A]])
 // CHECK-NEXT:    ret i32 0
 //
 unsigned int cpucfg(unsigned int a) {
@@ -419,8 +419,8 @@ int loongarch_movfcsr2gr() {
 
 // CHECK-LABEL: @loongarch_movgr2fcsr(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 noundef [[A:%.*]])
-// CHECK-NEXT:    tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 noundef [[A]])
+// CHECK-NEXT:    tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 [[A:%.*]])
+// CHECK-NEXT:    tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 [[A]])
 // CHECK-NEXT:    ret void
 //
 void loongarch_movgr2fcsr(int a) {

diff  --git a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
index f1f7bb67c7111..471a31a8c5eac 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
@@ -6,13 +6,13 @@
 
 // CHECK-LE-LABEL: @test1(
 // CHECK-LE-NEXT:  entry:
-// CHECK-LE-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> noundef [[VC4:%.*]], <16 x i8> noundef [[VC3:%.*]], <16 x i8> noundef [[VC2:%.*]], <16 x i8> noundef [[VC1:%.*]])
+// CHECK-LE-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC4:%.*]], <16 x i8> [[VC3:%.*]], <16 x i8> [[VC2:%.*]], <16 x i8> [[VC1:%.*]])
 // CHECK-LE-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2:![0-9]+]]
 // CHECK-LE-NEXT:    ret void
 //
 // CHECK-BE-LABEL: @test1(
 // CHECK-BE-NEXT:  entry:
-// CHECK-BE-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]], <16 x i8> noundef [[VC3:%.*]], <16 x i8> noundef [[VC4:%.*]])
+// CHECK-BE-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC1:%.*]], <16 x i8> [[VC2:%.*]], <16 x i8> [[VC3:%.*]], <16 x i8> [[VC4:%.*]])
 // CHECK-BE-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2:![0-9]+]]
 // CHECK-BE-NEXT:    ret void
 //
@@ -27,13 +27,13 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vec
 
 // CHECK-LE-LABEL: @test2(
 // CHECK-LE-NEXT:  entry:
-// CHECK-LE-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> noundef [[VC2:%.*]], <16 x i8> noundef [[VC1:%.*]])
+// CHECK-LE-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC2:%.*]], <16 x i8> [[VC1:%.*]])
 // CHECK-LE-NEXT:    store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]]
 // CHECK-LE-NEXT:    ret void
 //
 // CHECK-BE-LABEL: @test2(
 // CHECK-BE-NEXT:  entry:
-// CHECK-BE-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> noundef [[VC1:%.*]], <16 x i8> noundef [[VC2:%.*]])
+// CHECK-BE-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC1:%.*]], <16 x i8> [[VC2:%.*]])
 // CHECK-BE-NEXT:    store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]]
 // CHECK-BE-NEXT:    ret void
 //

diff  --git a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c
index 5cee9a8071aff..95e28da9b0bf0 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c
@@ -4,9 +4,9 @@
 // RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu pwr10 \
 // RUN: -emit-llvm %s -o - | FileCheck %s
 
-// CHECK-LABEL: @test1(
+// CHECK-LABEL: define {{[^@]+}}@test1(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], <16 x i8> noundef [[VC]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
@@ -18,7 +18,7 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   *((__vector_quad *)resp) = res;
 }
 
-// CHECK-LABEL: @test2(
+// CHECK-LABEL: define {{[^@]+}}@test2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP0]])
@@ -39,9 +39,9 @@ void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   __builtin_mma_disassemble_acc(resp, (__vector_quad*)vqp);
 }
 
-// CHECK-LABEL: @test3(
+// CHECK-LABEL: define {{[^@]+}}@test3(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
@@ -53,7 +53,7 @@ void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   *((__vector_pair *)resp) = res;
 }
 
-// CHECK-LABEL: @test4(
+// CHECK-LABEL: define {{[^@]+}}@test4(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[TMP0]])
@@ -68,7 +68,7 @@ void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   __builtin_vsx_disassemble_pair(resp, (__vector_pair*)vpp);
 }
 
-// CHECK-LABEL: @test5(
+// CHECK-LABEL: define {{[^@]+}}@test5(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> [[TMP0]])
@@ -82,7 +82,7 @@ void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test6(
+// CHECK-LABEL: define {{[^@]+}}@test6(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> [[TMP0]])
@@ -96,7 +96,7 @@ void test6(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test7(
+// CHECK-LABEL: define {{[^@]+}}@test7(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
@@ -109,9 +109,9 @@ void test7(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test8(
+// CHECK-LABEL: define {{[^@]+}}@test8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -122,9 +122,9 @@ void test8(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test9(
+// CHECK-LABEL: define {{[^@]+}}@test9(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -135,9 +135,9 @@ void test9(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test10(
+// CHECK-LABEL: define {{[^@]+}}@test10(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -148,9 +148,9 @@ void test10(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test11(
+// CHECK-LABEL: define {{[^@]+}}@test11(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -161,9 +161,9 @@ void test11(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test12(
+// CHECK-LABEL: define {{[^@]+}}@test12(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -174,9 +174,9 @@ void test12(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test13(
+// CHECK-LABEL: define {{[^@]+}}@test13(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -187,10 +187,10 @@ void test13(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test14(
+// CHECK-LABEL: define {{[^@]+}}@test14(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -201,9 +201,9 @@ void test14(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test15(
+// CHECK-LABEL: define {{[^@]+}}@test15(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -214,9 +214,9 @@ void test15(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test16(
+// CHECK-LABEL: define {{[^@]+}}@test16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -227,9 +227,9 @@ void test16(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test17(
+// CHECK-LABEL: define {{[^@]+}}@test17(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -240,9 +240,9 @@ void test17(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test18(
+// CHECK-LABEL: define {{[^@]+}}@test18(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -253,9 +253,9 @@ void test18(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test19(
+// CHECK-LABEL: define {{[^@]+}}@test19(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -266,9 +266,9 @@ void test19(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test20(
+// CHECK-LABEL: define {{[^@]+}}@test20(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -279,10 +279,10 @@ void test20(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test21(
+// CHECK-LABEL: define {{[^@]+}}@test21(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -293,10 +293,10 @@ void test21(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test22(
+// CHECK-LABEL: define {{[^@]+}}@test22(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -307,10 +307,10 @@ void test22(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test23(
+// CHECK-LABEL: define {{[^@]+}}@test23(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -321,10 +321,10 @@ void test23(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test24(
+// CHECK-LABEL: define {{[^@]+}}@test24(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -335,10 +335,10 @@ void test24(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test25(
+// CHECK-LABEL: define {{[^@]+}}@test25(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -349,10 +349,10 @@ void test25(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test26(
+// CHECK-LABEL: define {{[^@]+}}@test26(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -363,10 +363,10 @@ void test26(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test27(
+// CHECK-LABEL: define {{[^@]+}}@test27(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -377,10 +377,10 @@ void test27(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test28(
+// CHECK-LABEL: define {{[^@]+}}@test28(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -391,10 +391,10 @@ void test28(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test29(
+// CHECK-LABEL: define {{[^@]+}}@test29(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -405,10 +405,10 @@ void test29(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test30(
+// CHECK-LABEL: define {{[^@]+}}@test30(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -419,10 +419,10 @@ void test30(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test31(
+// CHECK-LABEL: define {{[^@]+}}@test31(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -433,10 +433,10 @@ void test31(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test32(
+// CHECK-LABEL: define {{[^@]+}}@test32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -447,10 +447,10 @@ void test32(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test33(
+// CHECK-LABEL: define {{[^@]+}}@test33(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -461,10 +461,10 @@ void test33(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test34(
+// CHECK-LABEL: define {{[^@]+}}@test34(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -475,10 +475,10 @@ void test34(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test35(
+// CHECK-LABEL: define {{[^@]+}}@test35(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -489,10 +489,10 @@ void test35(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test36(
+// CHECK-LABEL: define {{[^@]+}}@test36(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -503,10 +503,10 @@ void test36(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test37(
+// CHECK-LABEL: define {{[^@]+}}@test37(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -517,10 +517,10 @@ void test37(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test38(
+// CHECK-LABEL: define {{[^@]+}}@test38(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -531,10 +531,10 @@ void test38(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test39(
+// CHECK-LABEL: define {{[^@]+}}@test39(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -545,10 +545,10 @@ void test39(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test40(
+// CHECK-LABEL: define {{[^@]+}}@test40(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -559,10 +559,10 @@ void test40(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test41(
+// CHECK-LABEL: define {{[^@]+}}@test41(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -573,10 +573,10 @@ void test41(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test42(
+// CHECK-LABEL: define {{[^@]+}}@test42(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -587,10 +587,10 @@ void test42(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test43(
+// CHECK-LABEL: define {{[^@]+}}@test43(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -601,10 +601,10 @@ void test43(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test44(
+// CHECK-LABEL: define {{[^@]+}}@test44(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -615,10 +615,10 @@ void test44(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test45(
+// CHECK-LABEL: define {{[^@]+}}@test45(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -629,10 +629,10 @@ void test45(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test46(
+// CHECK-LABEL: define {{[^@]+}}@test46(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -643,10 +643,10 @@ void test46(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test47(
+// CHECK-LABEL: define {{[^@]+}}@test47(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -657,11 +657,11 @@ void test47(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test48(
+// CHECK-LABEL: define {{[^@]+}}@test48(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> noundef [[VC:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
 // CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -672,11 +672,11 @@ void test48(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test49(
+// CHECK-LABEL: define {{[^@]+}}@test49(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> noundef [[VC:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
 // CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -687,11 +687,11 @@ void test49(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test50(
+// CHECK-LABEL: define {{[^@]+}}@test50(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> noundef [[VC:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
 // CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -702,11 +702,11 @@ void test50(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test51(
+// CHECK-LABEL: define {{[^@]+}}@test51(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> noundef [[VC:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
 // CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -717,11 +717,11 @@ void test51(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test52(
+// CHECK-LABEL: define {{[^@]+}}@test52(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> noundef [[VC:%.*]], i32 0, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -732,11 +732,11 @@ void test52(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test53(
+// CHECK-LABEL: define {{[^@]+}}@test53(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> noundef [[VC:%.*]], i32 0, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -747,11 +747,11 @@ void test53(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test54(
+// CHECK-LABEL: define {{[^@]+}}@test54(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> noundef [[VC:%.*]], i32 0, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -762,11 +762,11 @@ void test54(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test55(
+// CHECK-LABEL: define {{[^@]+}}@test55(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> noundef [[VC:%.*]], i32 0, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -777,9 +777,9 @@ void test55(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test56(
+// CHECK-LABEL: define {{[^@]+}}@test56(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -790,9 +790,9 @@ void test56(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test57(
+// CHECK-LABEL: define {{[^@]+}}@test57(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -803,10 +803,10 @@ void test57(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test58(
+// CHECK-LABEL: define {{[^@]+}}@test58(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -817,10 +817,10 @@ void test58(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test59(
+// CHECK-LABEL: define {{[^@]+}}@test59(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -831,10 +831,10 @@ void test59(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test60(
+// CHECK-LABEL: define {{[^@]+}}@test60(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -845,10 +845,10 @@ void test60(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test61(
+// CHECK-LABEL: define {{[^@]+}}@test61(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -859,10 +859,10 @@ void test61(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test62(
+// CHECK-LABEL: define {{[^@]+}}@test62(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -873,10 +873,10 @@ void test62(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test63(
+// CHECK-LABEL: define {{[^@]+}}@test63(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -887,10 +887,10 @@ void test63(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test64(
+// CHECK-LABEL: define {{[^@]+}}@test64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -901,10 +901,10 @@ void test64(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test65(
+// CHECK-LABEL: define {{[^@]+}}@test65(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -915,10 +915,10 @@ void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test66(
+// CHECK-LABEL: define {{[^@]+}}@test66(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr noundef [[VPP:%.*]])
-// CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP0]], ptr noundef [[VP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]])
+// CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP0]], ptr [[VP2:%.*]])
 // CHECK-NEXT:    ret void
 //
 void test66(const __vector_pair *vpp, __vector_pair *vp2) {
@@ -926,7 +926,7 @@ void test66(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_vsx_stxvp(vp, 0L, vp2);
 }
 
-// CHECK-LABEL: @test67(
+// CHECK-LABEL: define {{[^@]+}}@test67(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFSET:%.*]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
@@ -939,7 +939,7 @@ void test67(const __vector_pair *vpp, signed long offset, __vector_pair *vp2) {
   __builtin_vsx_stxvp(vp, offset, vp2);
 }
 
-// CHECK-LABEL: @test68(
+// CHECK-LABEL: define {{[^@]+}}@test68(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 18
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
@@ -952,7 +952,7 @@ void test68(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_vsx_stxvp(vp, 18L, vp2);
 }
 
-// CHECK-LABEL: @test69(
+// CHECK-LABEL: define {{[^@]+}}@test69(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 1
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
@@ -965,7 +965,7 @@ void test69(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_vsx_stxvp(vp, 1L, vp2);
 }
 
-// CHECK-LABEL: @test70(
+// CHECK-LABEL: define {{[^@]+}}@test70(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 42
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
@@ -978,7 +978,7 @@ void test70(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_vsx_stxvp(vp, 42L, vp2);
 }
 
-// CHECK-LABEL: @test71(
+// CHECK-LABEL: define {{[^@]+}}@test71(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32768
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
@@ -991,7 +991,7 @@ void test71(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_vsx_stxvp(vp, 32768L, vp2);
 }
 
-// CHECK-LABEL: @test72(
+// CHECK-LABEL: define {{[^@]+}}@test72(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32799
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
@@ -1004,12 +1004,12 @@ void test72(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_vsx_stxvp(vp, 32799L, vp2);
 }
 
-// CHECK-LABEL: @test73(
+// CHECK-LABEL: define {{[^@]+}}@test73(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 8
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> noundef [[VC:%.*]], i32 0, i32 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -1020,11 +1020,11 @@ void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test74(
+// CHECK-LABEL: define {{[^@]+}}@test74(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr noundef [[VPP:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> noundef [[VC:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
 // CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -1035,12 +1035,12 @@ void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test75(
+// CHECK-LABEL: define {{[^@]+}}@test75(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFS:%.*]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> noundef [[VC:%.*]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]])
 // CHECK-NEXT:    store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -1051,9 +1051,9 @@ void test75(unsigned char *vqp, signed long offs, const __vector_pair *vpp, vect
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test76(
+// CHECK-LABEL: define {{[^@]+}}@test76(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> noundef [[VC:%.*]], <16 x i8> noundef [[VC]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
 // CHECK-NEXT:    store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 //
@@ -1065,7 +1065,7 @@ void test76(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   *((__vector_pair *)resp) = res;
 }
 
-// CHECK-LABEL: @test77(
+// CHECK-LABEL: define {{[^@]+}}@test77(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[TMP0]])
@@ -1080,10 +1080,10 @@ void test77(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
   __builtin_mma_disassemble_pair(resp, (__vector_pair*)vpp);
 }
 
-// CHECK-LABEL: @test78(
+// CHECK-LABEL: define {{[^@]+}}@test78(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr noundef [[VPP:%.*]])
-// CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP0]], ptr noundef [[VP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]])
+// CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP0]], ptr [[VP2:%.*]])
 // CHECK-NEXT:    ret void
 //
 void test78(const __vector_pair *vpp, __vector_pair *vp2) {
@@ -1091,7 +1091,7 @@ void test78(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_mma_stxvp(vp, 0L, vp2);
 }
 
-// CHECK-LABEL: @test79(
+// CHECK-LABEL: define {{[^@]+}}@test79(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFSET:%.*]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
@@ -1104,7 +1104,7 @@ void test79(const __vector_pair *vpp, signed long offset, __vector_pair *vp2) {
   __builtin_mma_stxvp(vp, offset, vp2);
 }
 
-// CHECK-LABEL: @test80(
+// CHECK-LABEL: define {{[^@]+}}@test80(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 18
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
@@ -1117,7 +1117,7 @@ void test80(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_mma_stxvp(vp, 18L, vp2);
 }
 
-// CHECK-LABEL: @test81(
+// CHECK-LABEL: define {{[^@]+}}@test81(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 1
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
@@ -1130,7 +1130,7 @@ void test81(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_mma_stxvp(vp, 1L, vp2);
 }
 
-// CHECK-LABEL: @test82(
+// CHECK-LABEL: define {{[^@]+}}@test82(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 42
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
@@ -1143,7 +1143,7 @@ void test82(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_mma_stxvp(vp, 42L, vp2);
 }
 
-// CHECK-LABEL: @test83(
+// CHECK-LABEL: define {{[^@]+}}@test83(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32768
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
@@ -1156,7 +1156,7 @@ void test83(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_mma_stxvp(vp, 32768L, vp2);
 }
 
-// CHECK-LABEL: @test84(
+// CHECK-LABEL: define {{[^@]+}}@test84(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32799
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
@@ -1169,12 +1169,12 @@ void test84(const __vector_pair *vpp, __vector_pair *vp2) {
   __builtin_mma_stxvp(vp, 32799L, vp2);
 }
 
-// CHECK-LABEL: @test85(
+// CHECK-LABEL: define {{[^@]+}}@test85(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 8
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> noundef [[VC:%.*]], i32 0, i32 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
 // CHECK-NEXT:    store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -1185,11 +1185,11 @@ void test85(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test86(
+// CHECK-LABEL: define {{[^@]+}}@test86(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr noundef [[VPP:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> noundef [[VC:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
 // CHECK-NEXT:    store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -1200,12 +1200,12 @@ void test86(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char v
   *((__vector_quad *)resp) = vq;
 }
 
-// CHECK-LABEL: @test87(
+// CHECK-LABEL: define {{[^@]+}}@test87(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFS:%.*]]
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> noundef [[VC:%.*]])
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]])
 // CHECK-NEXT:    store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //

diff  --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReseve-StoreCond-64bit-only.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReseve-StoreCond-64bit-only.c
index a647a4b3f98fc..d6fbaf3eb57da 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReseve-StoreCond-64bit-only.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReseve-StoreCond-64bit-only.c
@@ -10,14 +10,14 @@
 
 long test_ldarx(volatile long* a) {
   // CHECK64-LABEL: @test_ldarx
-  // CHECK64: %0 = tail call i64 asm sideeffect "ldarx $0, ${1:y}", "=r,*Z,~{memory}"(ptr noundef elementtype(i64) %a)
+  // CHECK64: %0 = tail call i64 asm sideeffect "ldarx $0, ${1:y}", "=r,*Z,~{memory}"(ptr elementtype(i64) %a)
   // CHECK32-ERROR: error: this builtin is only available on 64-bit targets
   return __ldarx(a);
 }
 
 int test_stdcx(volatile long* addr, long val) {
   // CHECK64-LABEL: @test_stdcx
-  // CHECK64: %0 = tail call i32 @llvm.ppc.stdcx(ptr noundef %addr, i64 noundef %val)
+  // CHECK64: %0 = tail call i32 @llvm.ppc.stdcx(ptr %addr, i64 %val)
   // CHECK32-ERROR: error: this builtin is only available on 64-bit targets
   return __stdcx(addr, val);
 }

diff  --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReseve-StoreCond.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
index c9268c431286e..e05e35fa98b74 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
@@ -13,34 +13,34 @@
 
 int test_lwarx(volatile int* a) {
   // CHECK-LABEL: @test_lwarx
-  // CHECK: %0 = tail call i32 asm sideeffect "lwarx $0, ${1:y}", "=r,*Z,~{memory}"(ptr noundef elementtype(i32) %a)
+  // CHECK: %0 = tail call i32 asm sideeffect "lwarx $0, ${1:y}", "=r,*Z,~{memory}"(ptr elementtype(i32) %a)
   return __lwarx(a);
 }
 
 short test_lharx(volatile short *a) {
   // CHECK-LABEL: @test_lharx
-  // CHECK: %0 = tail call i16 asm sideeffect "lharx $0, ${1:y}", "=r,*Z,~{memory}"(ptr noundef elementtype(i16) %a)
+  // CHECK: %0 = tail call i16 asm sideeffect "lharx $0, ${1:y}", "=r,*Z,~{memory}"(ptr elementtype(i16) %a)
   // CHECK-NON-PWR8-ERR:  error: this builtin is only valid on POWER8 or later CPUs
   return __lharx(a);
 }
 
 char test_lbarx(volatile char *a) {
   // CHECK-LABEL: @test_lbarx
-  // CHECK: %0 = tail call i8 asm sideeffect "lbarx $0, ${1:y}", "=r,*Z,~{memory}"(ptr noundef elementtype(i8) %a)
+  // CHECK: %0 = tail call i8 asm sideeffect "lbarx $0, ${1:y}", "=r,*Z,~{memory}"(ptr elementtype(i8) %a)
   // CHECK-NON-PWR8-ERR:  error: this builtin is only valid on POWER8 or later CPUs
   return __lbarx(a);
 }
 
 int test_stwcx(volatile int* a, int val) {
   // CHECK-LABEL: @test_stwcx
-  // CHECK: %0 = tail call i32 @llvm.ppc.stwcx(ptr noundef %a, i32 noundef %val)
+  // CHECK: %0 = tail call i32 @llvm.ppc.stwcx(ptr %a, i32 %val)
   return __stwcx(a, val);
 }
 
 int test_sthcx(volatile short *a, short val) {
   // CHECK-LABEL: @test_sthcx
   // CHECK: %0 = sext i16 %val to i32
-  // CHECK: %1 = tail call i32 @llvm.ppc.sthcx(ptr noundef %a, i32 %0)
+  // CHECK: %1 = tail call i32 @llvm.ppc.sthcx(ptr %a, i32 %0)
   // CHECK-NON-PWR8-ERR:  error: this builtin is only valid on POWER8 or later CPUs
   return __sthcx(a, val);
 }
@@ -48,14 +48,14 @@ int test_sthcx(volatile short *a, short val) {
 // Extra test cases that previously caused error during usage.
 int test_lharx_intret(volatile short *a) {
   // CHECK-LABEL: @test_lharx_intret
-  // CHECK: %0 = tail call i16 asm sideeffect "lharx $0, ${1:y}", "=r,*Z,~{memory}"(ptr noundef elementtype(i16) %a)
+  // CHECK: %0 = tail call i16 asm sideeffect "lharx $0, ${1:y}", "=r,*Z,~{memory}"(ptr elementtype(i16) %a)
   // CHECK-NON-PWR8-ERR:  error: this builtin is only valid on POWER8 or later CPUs
   return __lharx(a);
 }
 
 int test_lbarx_intret(volatile char *a) {
   // CHECK-LABEL: @test_lbarx_intret
-  // CHECK: %0 = tail call i8 asm sideeffect "lbarx $0, ${1:y}", "=r,*Z,~{memory}"(ptr noundef elementtype(i8) %a)
+  // CHECK: %0 = tail call i8 asm sideeffect "lbarx $0, ${1:y}", "=r,*Z,~{memory}"(ptr elementtype(i8) %a)
   // CHECK-NON-PWR8-ERR:  error: this builtin is only valid on POWER8 or later CPUs
   return __lbarx(a);
 }

diff  --git a/clang/test/CodeGen/PowerPC/ppc64-inline-asm.c b/clang/test/CodeGen/PowerPC/ppc64-inline-asm.c
index 6a42d89c76852..e3f2c2ff5507c 100644
--- a/clang/test/CodeGen/PowerPC/ppc64-inline-asm.c
+++ b/clang/test/CodeGen/PowerPC/ppc64-inline-asm.c
@@ -5,7 +5,7 @@ _Bool test_wc_i1(_Bool b1, _Bool b2) {
   asm("crand %0, %1, %2" : "=wc"(o) : "wc"(b1), "wc"(b2) : );
   return o;
 // CHECK-LABEL: define{{.*}} zeroext i1 @test_wc_i1(i1 noundef zeroext %b1, i1 noundef zeroext %b2)
-// CHECK: call i8 asm "crand $0, $1, $2", "=^wc,^wc,^wc"(i1 noundef %b1, i1 noundef %b2)
+// CHECK: call i8 asm "crand $0, $1, $2", "=^wc,^wc,^wc"(i1 %b1, i1 %b2)
 }
 
 int test_wc_i32(int b1, int b2) {
@@ -13,7 +13,7 @@ int test_wc_i32(int b1, int b2) {
   asm("crand %0, %1, %2" : "=wc"(o) : "wc"(b1), "wc"(b2) : );
   return o;
 // CHECK-LABEL: signext i32 @test_wc_i32(i32 noundef signext %b1, i32 noundef signext %b2)
-// CHECK: call i32 asm "crand $0, $1, $2", "=^wc,^wc,^wc"(i32 noundef %b1, i32 noundef %b2)
+// CHECK: call i32 asm "crand $0, $1, $2", "=^wc,^wc,^wc"(i32 %b1, i32 %b2)
 }
 
 unsigned char test_wc_i8(unsigned char b1, unsigned char b2) {
@@ -21,27 +21,27 @@ unsigned char test_wc_i8(unsigned char b1, unsigned char b2) {
   asm("crand %0, %1, %2" : "=wc"(o) : "wc"(b1), "wc"(b2) : );
   return o;
 // CHECK-LABEL: zeroext i8 @test_wc_i8(i8 noundef zeroext %b1, i8 noundef zeroext %b2)
-// CHECK: call i8 asm "crand $0, $1, $2", "=^wc,^wc,^wc"(i8 noundef %b1, i8 noundef %b2)
+// CHECK: call i8 asm "crand $0, $1, $2", "=^wc,^wc,^wc"(i8 %b1, i8 %b2)
 }
 
 float test_fmaxf(float x, float y) {
   asm("xsmaxdp %x0, %x1, %x2" : "=ww"(x) : "ww"(x), "ww"(y));
   return x;
 // CHECK-LABEL: float @test_fmaxf(float noundef %x, float noundef %y)
-// CHECK: call float asm "xsmaxdp ${0:x}, ${1:x}, ${2:x}", "=^ww,^ww,^ww"(float noundef %x, float noundef %y)
+// CHECK: call float asm "xsmaxdp ${0:x}, ${1:x}, ${2:x}", "=^ww,^ww,^ww"(float %x, float %y)
 }
 
 double test_fmax(double x, double y) {
   asm("xsmaxdp %x0, %x1, %x2" : "=ws"(x) : "ws"(x), "ws"(y));
   return x;
 // CHECK-LABEL: double @test_fmax(double noundef %x, double noundef %y)
-// CHECK: call double asm "xsmaxdp ${0:x}, ${1:x}, ${2:x}", "=^ws,^ws,^ws"(double noundef %x, double noundef %y)
+// CHECK: call double asm "xsmaxdp ${0:x}, ${1:x}, ${2:x}", "=^ws,^ws,^ws"(double %x, double %y)
 }
 
 void testZ(void *addr) {
   asm volatile ("dcbz %y0\n" :: "Z"(*(unsigned char *)addr) : "memory");
 // CHECK-LABEL: void @testZ(ptr noundef %addr)
-// CHECK: call void asm sideeffect "dcbz ${0:y}\0A", "*Z,~{memory}"(ptr noundef elementtype(i8) %addr)
+// CHECK: call void asm sideeffect "dcbz ${0:y}\0A", "*Z,~{memory}"(ptr elementtype(i8) %addr)
 }
 
 void testZwOff(void *addr, long long off) {

diff  --git a/clang/test/CodeGen/RISCV/riscv-inline-asm.c b/clang/test/CodeGen/RISCV/riscv-inline-asm.c
index 8e386d54ad814..48de5ba3a27e3 100644
--- a/clang/test/CodeGen/RISCV/riscv-inline-asm.c
+++ b/clang/test/CodeGen/RISCV/riscv-inline-asm.c
@@ -41,7 +41,7 @@ void test_f(void) {
 
 void test_A(int *p) {
 // CHECK-LABEL: define{{.*}} void @test_A(ptr noundef %p)
-// CHECK: call void asm sideeffect "", "*A"(ptr noundef elementtype(i32) %p)
+// CHECK: call void asm sideeffect "", "*A"(ptr elementtype(i32) %p)
   asm volatile("" :: "A"(*p));
 }
 

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vwrite-csr.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vwrite-csr.c
index 172167925b920..01da208b7bdce 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vwrite-csr.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vwrite-csr.c
@@ -7,7 +7,7 @@
 
 // CHECK-LABEL: @vwrite_csr_vstart(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void asm sideeffect "csrw\09vstart, ${0:z}", "rJ,~{memory}"(i64 noundef [[VALUE:%.*]]) #[[ATTR1:[0-9]+]], !srcloc !4
+// CHECK-NEXT:    tail call void asm sideeffect "csrw\09vstart, ${0:z}", "rJ,~{memory}"(i64 [[VALUE:%.*]]) #[[ATTR1:[0-9]+]], !srcloc !4
 // CHECK-NEXT:    ret void
 //
 void vwrite_csr_vstart(unsigned long value) {
@@ -16,7 +16,7 @@ void vwrite_csr_vstart(unsigned long value) {
 
 // CHECK-LABEL: @vwrite_csr_vxsat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void asm sideeffect "csrw\09vxsat, ${0:z}", "rJ,~{memory}"(i64 noundef [[VALUE:%.*]]) #[[ATTR1]], !srcloc !5
+// CHECK-NEXT:    tail call void asm sideeffect "csrw\09vxsat, ${0:z}", "rJ,~{memory}"(i64 [[VALUE:%.*]]) #[[ATTR1]], !srcloc !5
 // CHECK-NEXT:    ret void
 //
 void vwrite_csr_vxsat(unsigned long value) {
@@ -25,7 +25,7 @@ void vwrite_csr_vxsat(unsigned long value) {
 
 // CHECK-LABEL: @vwrite_csr_vxrm(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void asm sideeffect "csrw\09vxrm, ${0:z}", "rJ,~{memory}"(i64 noundef [[VALUE:%.*]]) #[[ATTR1]], !srcloc !6
+// CHECK-NEXT:    tail call void asm sideeffect "csrw\09vxrm, ${0:z}", "rJ,~{memory}"(i64 [[VALUE:%.*]]) #[[ATTR1]], !srcloc !6
 // CHECK-NEXT:    ret void
 //
 void vwrite_csr_vxrm(unsigned long value) {
@@ -34,7 +34,7 @@ void vwrite_csr_vxrm(unsigned long value) {
 
 // CHECK-LABEL: @vwrite_csr_vcsr(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void asm sideeffect "csrw\09vcsr, ${0:z}", "rJ,~{memory}"(i64 noundef [[VALUE:%.*]]) #[[ATTR1]], !srcloc !7
+// CHECK-NEXT:    tail call void asm sideeffect "csrw\09vcsr, ${0:z}", "rJ,~{memory}"(i64 [[VALUE:%.*]]) #[[ATTR1]], !srcloc !7
 // CHECK-NEXT:    ret void
 //
 void vwrite_csr_vcsr(unsigned long value) {

diff  --git a/clang/test/CodeGen/X86/fma-builtins-constrained.c b/clang/test/CodeGen/X86/fma-builtins-constrained.c
index e3b541e95de85..74177ef7d1c9d 100644
--- a/clang/test/CodeGen/X86/fma-builtins-constrained.c
+++ b/clang/test/CodeGen/X86/fma-builtins-constrained.c
@@ -15,16 +15,16 @@
 
 __m128 test_mm_fmadd_ps(__m128 a, __m128 b, __m128 c) {
   // COMMON-LABEL: test_mm_fmadd_ps
-  // UNCONSTRAINED: call <4 x float> @llvm.fma.v4f32(<4 x float> noundef %{{.*}}, <4 x float> noundef %{{.*}}, <4 x float> noundef %{{.*}})
-  // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> noundef %{{.*}}, <4 x float> noundef %{{.*}}, <4 x float> noundef %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfmadd213ps
   return _mm_fmadd_ps(a, b, c);
 }
 
 __m128d test_mm_fmadd_pd(__m128d a, __m128d b, __m128d c) {
   // COMMON-LABEL: test_mm_fmadd_pd
-  // UNCONSTRAINED: call <2 x double> @llvm.fma.v2f64(<2 x double> noundef %{{.*}}, <2 x double> noundef %{{.*}}, <2 x double> noundef %{{.*}})
-  // CONSTRAINED: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> noundef %{{.*}}, <2 x double> noundef %{{.*}}, <2 x double> noundef %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
+  // CONSTRAINED: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfmadd213pd
   return _mm_fmadd_pd(a, b, c);
 }
@@ -56,8 +56,8 @@ __m128d test_mm_fmadd_sd(__m128d a, __m128d b, __m128d c) {
 __m128 test_mm_fmsub_ps(__m128 a, __m128 b, __m128 c) {
   // COMMON-LABEL: test_mm_fmsub_ps
   // COMMONIR: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
-  // UNCONSTRAINED: call <4 x float> @llvm.fma.v4f32(<4 x float> noundef %{{.*}}, <4 x float> noundef %{{.*}}, <4 x float> %{{.*}})
-  // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> noundef %{{.*}}, <4 x float> noundef %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfmsub213ps
   return _mm_fmsub_ps(a, b, c);
 }
@@ -65,8 +65,8 @@ __m128 test_mm_fmsub_ps(__m128 a, __m128 b, __m128 c) {
 __m128d test_mm_fmsub_pd(__m128d a, __m128d b, __m128d c) {
   // COMMON-LABEL: test_mm_fmsub_pd
   // COMMONIR: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
-  // UNCONSTRAINED: call <2 x double> @llvm.fma.v2f64(<2 x double> noundef %{{.*}}, <2 x double> noundef %{{.*}}, <2 x double> %{{.*}})
-  // CONSTRAINED: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> noundef %{{.*}}, <2 x double> noundef %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
+  // CONSTRAINED: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfmsub213pd
   return _mm_fmsub_pd(a, b, c);
 }
@@ -100,8 +100,8 @@ __m128d test_mm_fmsub_sd(__m128d a, __m128d b, __m128d c) {
 __m128 test_mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) {
   // COMMON-LABEL: test_mm_fnmadd_ps
   // COMMONIR: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
-  // UNCONSTRAINED: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> noundef %{{.*}}, <4 x float> noundef %{{.*}})
-  // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> noundef %{{.*}}, <4 x float> noundef %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfnmadd213ps
   return _mm_fnmadd_ps(a, b, c);
 }
@@ -109,8 +109,8 @@ __m128 test_mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) {
 __m128d test_mm_fnmadd_pd(__m128d a, __m128d b, __m128d c) {
   // COMMON-LABEL: test_mm_fnmadd_pd
   // COMMONIR: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
-  // UNCONSTRAINED: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> noundef %{{.*}}, <2 x double> noundef %{{.*}})
-  // CONSTRAINED: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> noundef %{{.*}}, <2 x double> noundef %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
+  // CONSTRAINED: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfnmadd213pd
   return _mm_fnmadd_pd(a, b, c);
 }
@@ -145,8 +145,8 @@ __m128 test_mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) {
   // COMMON-LABEL: test_mm_fnmsub_ps
   // COMMONIR: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
   // COMMONIR: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
-  // UNCONSTRAINED: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> noundef %{{.*}}, <4 x float> %{{.*}})
-  // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> noundef %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfnmsub213ps
   return _mm_fnmsub_ps(a, b, c);
 }
@@ -155,8 +155,8 @@ __m128d test_mm_fnmsub_pd(__m128d a, __m128d b, __m128d c) {
   // COMMON-LABEL: test_mm_fnmsub_pd
   // COMMONIR: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
   // COMMONIR: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
-  // UNCONSTRAINED: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> noundef %{{.*}}, <2 x double> %{{.*}})
-  // CONSTRAINED: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> noundef %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
+  // CONSTRAINED: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfnmsub213pd
   return _mm_fnmsub_pd(a, b, c);
 }
@@ -192,7 +192,7 @@ __m128d test_mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) {
 __m128 test_mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c) {
   // COMMON-LABEL: test_mm_fmaddsub_ps
   // COMMONIR-NOT: fneg
-  // COMMONIR: tail call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> noundef %{{.*}}, <4 x float> noundef %{{.*}}, <4 x float> noundef %{{.*}})
+  // COMMONIR: tail call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
   // CHECK-ASM: vfmaddsub213ps
   return _mm_fmaddsub_ps(a, b, c);
 }
@@ -200,7 +200,7 @@ __m128 test_mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c) {
 __m128d test_mm_fmaddsub_pd(__m128d a, __m128d b, __m128d c) {
   // COMMON-LABEL: test_mm_fmaddsub_pd
   // COMMONIR-NOT: fneg
-  // COMMONIR: tail call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> noundef %{{.*}}, <2 x double> noundef %{{.*}}, <2 x double> noundef %{{.*}})
+  // COMMONIR: tail call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
   // CHECK-ASM: vfmaddsub213pd
   return _mm_fmaddsub_pd(a, b, c);
 }
@@ -208,7 +208,7 @@ __m128d test_mm_fmaddsub_pd(__m128d a, __m128d b, __m128d c) {
 __m128 test_mm_fmsubadd_ps(__m128 a, __m128 b, __m128 c) {
   // COMMON-LABEL: test_mm_fmsubadd_ps
   // COMMONIR: [[FNEG:%.+]] = fneg <4 x float> %{{.*}}
-  // COMMONIR: tail call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> noundef %{{.*}}, <4 x float> noundef %{{.*}}, <4 x float> [[FNEG]])
+  // COMMONIR: tail call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[FNEG]])
   // CHECK-ASM: vfmsubadd213ps
   return _mm_fmsubadd_ps(a, b, c);
 }
@@ -216,23 +216,23 @@ __m128 test_mm_fmsubadd_ps(__m128 a, __m128 b, __m128 c) {
 __m128d test_mm_fmsubadd_pd(__m128d a, __m128d b, __m128d c) {
   // COMMON-LABEL: test_mm_fmsubadd_pd
   // COMMONIR: [[FNEG:%.+]] = fneg <2 x double> %{{.*}}
-  // COMMONIR: tail call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> noundef %{{.*}}, <2 x double> noundef %{{.*}}, <2 x double> [[FNEG]])
+  // COMMONIR: tail call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[FNEG]])
   // CHECK-ASM: vfmsubadd213pd
   return _mm_fmsubadd_pd(a, b, c);
 }
 
 __m256 test_mm256_fmadd_ps(__m256 a, __m256 b, __m256 c) {
   // COMMON-LABEL: test_mm256_fmadd_ps
-  // UNCONSTRAINED: call <8 x float> @llvm.fma.v8f32(<8 x float> noundef %{{.*}}, <8 x float> noundef %{{.*}}, <8 x float> noundef %{{.*}})
-  // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> noundef %{{.*}}, <8 x float> noundef %{{.*}}, <8 x float> noundef %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
+  // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfmadd213ps
   return _mm256_fmadd_ps(a, b, c);
 }
 
 __m256d test_mm256_fmadd_pd(__m256d a, __m256d b, __m256d c) {
   // COMMON-LABEL: test_mm256_fmadd_pd
-  // UNCONSTRAINED: call <4 x double> @llvm.fma.v4f64(<4 x double> noundef %{{.*}}, <4 x double> noundef %{{.*}}, <4 x double> noundef %{{.*}})
-  // CONSTRAINED: call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> noundef %{{.*}}, <4 x double> noundef %{{.*}}, <4 x double> noundef %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
+  // CONSTRAINED: call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfmadd213pd
   return _mm256_fmadd_pd(a, b, c);
 }
@@ -240,8 +240,8 @@ __m256d test_mm256_fmadd_pd(__m256d a, __m256d b, __m256d c) {
 __m256 test_mm256_fmsub_ps(__m256 a, __m256 b, __m256 c) {
   // COMMON-LABEL: test_mm256_fmsub_ps
   // COMMONIR: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
-  // UNCONSTRAINED: call <8 x float> @llvm.fma.v8f32(<8 x float> noundef %{{.*}}, <8 x float> noundef %{{.*}}, <8 x float> %{{.*}})
-  // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> noundef %{{.*}}, <8 x float> noundef %{{.*}}, <8 x float> %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
+  // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfmsub213ps
   return _mm256_fmsub_ps(a, b, c);
 }
@@ -249,8 +249,8 @@ __m256 test_mm256_fmsub_ps(__m256 a, __m256 b, __m256 c) {
 __m256d test_mm256_fmsub_pd(__m256d a, __m256d b, __m256d c) {
   // COMMON-LABEL: test_mm256_fmsub_pd
   // COMMONIR: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
-  // UNCONSTRAINED: call <4 x double> @llvm.fma.v4f64(<4 x double> noundef %{{.*}}, <4 x double> noundef %{{.*}}, <4 x double> %{{.*}})
-  // CONSTRAINED: call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> noundef %{{.*}}, <4 x double> noundef %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
+  // CONSTRAINED: call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfmsub213pd
   return _mm256_fmsub_pd(a, b, c);
 }
@@ -258,8 +258,8 @@ __m256d test_mm256_fmsub_pd(__m256d a, __m256d b, __m256d c) {
 __m256 test_mm256_fnmadd_ps(__m256 a, __m256 b, __m256 c) {
   // COMMON-LABEL: test_mm256_fnmadd_ps
   // COMMONIR: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
-  // UNCONSTRAINED: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> noundef %{{.*}}, <8 x float> noundef %{{.*}})
-  // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> noundef %{{.*}}, <8 x float> noundef %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
+  // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfnmadd213ps
   return _mm256_fnmadd_ps(a, b, c);
 }
@@ -267,8 +267,8 @@ __m256 test_mm256_fnmadd_ps(__m256 a, __m256 b, __m256 c) {
 __m256d test_mm256_fnmadd_pd(__m256d a, __m256d b, __m256d c) {
   // COMMON-LABEL: test_mm256_fnmadd_pd
   // COMMONIR: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
-  // UNCONSTRAINED: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> noundef %{{.*}}, <4 x double> noundef %{{.*}})
-  // CONSTRAINED: call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> noundef %{{.*}}, <4 x double> noundef %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
+  // CONSTRAINED: call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfnmadd213pd
   return _mm256_fnmadd_pd(a, b, c);
 }
@@ -277,8 +277,8 @@ __m256 test_mm256_fnmsub_ps(__m256 a, __m256 b, __m256 c) {
   // COMMON-LABEL: test_mm256_fnmsub_ps
   // COMMONIR: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
   // COMMONIR: [[NEG2:%.+]] = fneg <8 x float> %{{.*}}
-  // UNCONSTRAINED: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> noundef %{{.*}}, <8 x float> %{{.*}})
-  // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> noundef %{{.*}}, <8 x float> %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
+  // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfnmsub213ps
   return _mm256_fnmsub_ps(a, b, c);
 }
@@ -287,8 +287,8 @@ __m256d test_mm256_fnmsub_pd(__m256d a, __m256d b, __m256d c) {
   // COMMON-LABEL: test_mm256_fnmsub_pd
   // COMMONIR: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
   // COMMONIR: [[NEG2:%.+]] = fneg <4 x double> %{{.+}}
-  // UNCONSTRAINED: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> noundef %{{.*}}, <4 x double> %{{.*}})
-  // CONSTRAINED: call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> noundef %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}})
+  // UNCONSTRAINED: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
+  // CONSTRAINED: call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}})
   // CHECK-ASM: vfnmsub213pd
   return _mm256_fnmsub_pd(a, b, c);
 }
@@ -296,7 +296,7 @@ __m256d test_mm256_fnmsub_pd(__m256d a, __m256d b, __m256d c) {
 __m256 test_mm256_fmaddsub_ps(__m256 a, __m256 b, __m256 c) {
   // COMMON-LABEL: test_mm256_fmaddsub_ps
   // COMMONIR-NOT: fneg
-  // COMMONIR: tail call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> noundef %{{.*}}, <8 x float> noundef %{{.*}}, <8 x float> noundef %{{.*}})
+  // COMMONIR: tail call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
   // CHECK-ASM: vfmaddsub213ps
   return _mm256_fmaddsub_ps(a, b, c);
 }
@@ -304,7 +304,7 @@ __m256 test_mm256_fmaddsub_ps(__m256 a, __m256 b, __m256 c) {
 __m256d test_mm256_fmaddsub_pd(__m256d a, __m256d b, __m256d c) {
   // COMMON-LABEL: test_mm256_fmaddsub_pd
   // COMMONIR-NOT: fneg
-  // COMMONIR: tail call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> noundef %{{.*}}, <4 x double> noundef %{{.*}}, <4 x double> noundef %{{.*}})
+  // COMMONIR: tail call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
   // CHECK-ASM: vfmaddsub213pd
   return _mm256_fmaddsub_pd(a, b, c);
 }
@@ -312,7 +312,7 @@ __m256d test_mm256_fmaddsub_pd(__m256d a, __m256d b, __m256d c) {
 __m256 test_mm256_fmsubadd_ps(__m256 a, __m256 b, __m256 c) {
   // COMMON-LABEL: test_mm256_fmsubadd_ps
   // COMMONIR: [[FNEG:%.+]] = fneg <8 x float> %{{.*}}
-  // COMMONIR: tail call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> noundef %{{.*}}, <8 x float> noundef %{{.*}}, <8 x float> [[FNEG]])
+  // COMMONIR: tail call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[FNEG]])
   // CHECK-ASM: vfmsubadd213ps
   return _mm256_fmsubadd_ps(a, b, c);
 }
@@ -320,7 +320,7 @@ __m256 test_mm256_fmsubadd_ps(__m256 a, __m256 b, __m256 c) {
 __m256d test_mm256_fmsubadd_pd(__m256d a, __m256d b, __m256d c) {
   // COMMON-LABEL: test_mm256_fmsubadd_pd
   // COMMONIR: [[FNEG:%.+]] = fneg <4 x double> %{{.*}}
-  // COMMONIR: tail call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> noundef %{{.*}}, <4 x double> noundef %{{.*}}, <4 x double> [[FNEG]])
+  // COMMONIR: tail call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[FNEG]])
   // CHECK-ASM: vfmsubadd213pd
   return _mm256_fmsubadd_pd(a, b, c);
 }

diff  --git a/clang/test/CodeGen/X86/ms-x86-intrinsics.c b/clang/test/CodeGen/X86/ms-x86-intrinsics.c
index 61f01b34b0183..b4b8f5dc0b8c5 100644
--- a/clang/test/CodeGen/X86/ms-x86-intrinsics.c
+++ b/clang/test/CodeGen/X86/ms-x86-intrinsics.c
@@ -147,7 +147,7 @@ unsigned __int64 test__shiftleft128(unsigned __int64 l, unsigned __int64 h,
 }
 // CHECK-X64-LABEL: define dso_local i64 @test__shiftleft128(i64 noundef %l, i64 noundef %h, i8 noundef %d)
 // CHECK-X64: = zext i8 %{{.*}} to i64
-// CHECK-X64: = tail call i64 @llvm.fshl.i64(i64 noundef %h, i64 noundef %l, i64 %{{.*}})
+// CHECK-X64: = tail call i64 @llvm.fshl.i64(i64 %h, i64 %l, i64 %{{.*}})
 // CHECK-X64:  ret i64 %
 
 unsigned __int64 test__shiftright128(unsigned __int64 l, unsigned __int64 h,
@@ -156,7 +156,7 @@ unsigned __int64 test__shiftright128(unsigned __int64 l, unsigned __int64 h,
 }
 // CHECK-X64-LABEL: define dso_local i64 @test__shiftright128(i64 noundef %l, i64 noundef %h, i8 noundef %d)
 // CHECK-X64: = zext i8 %{{.*}} to i64
-// CHECK-X64: = tail call i64 @llvm.fshr.i64(i64 noundef %h, i64 noundef %l, i64 %{{.*}})
+// CHECK-X64: = tail call i64 @llvm.fshr.i64(i64 %h, i64 %l, i64 %{{.*}})
 // CHECK-X64:  ret i64 %
 
 #endif // defined(__x86_64__)

diff  --git a/clang/test/CodeGen/aarch64-bf16-ldst-intrinsics.c b/clang/test/CodeGen/aarch64-bf16-ldst-intrinsics.c
index e6791d9782e7b..84d7fd05c276e 100644
--- a/clang/test/CodeGen/aarch64-bf16-ldst-intrinsics.c
+++ b/clang/test/CodeGen/aarch64-bf16-ldst-intrinsics.c
@@ -8,29 +8,19 @@
 
 #include "arm_neon.h"
 
-// CHECK64-LABEL: @test_vld1_bf16(
-// CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[TMP0:%.*]] = load <4 x bfloat>, ptr [[PTR:%.*]], align 2
-// CHECK64-NEXT:    ret <4 x bfloat> [[TMP0]]
-//
-// CHECK32-LABEL: @test_vld1_bf16(
-// CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD1:%.*]] = load <4 x bfloat>, ptr [[PTR:%.*]], align 2
-// CHECK32-NEXT:    ret <4 x bfloat> [[VLD1]]
+// CHECK-LABEL: @test_vld1_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x bfloat>, ptr [[PTR:%.*]], align 2
+// CHECK-NEXT:    ret <4 x bfloat> [[TMP1]]
 //
 bfloat16x4_t test_vld1_bf16(bfloat16_t const *ptr) {
   return vld1_bf16(ptr);
 }
 
-// CHECK64-LABEL: @test_vld1q_bf16(
-// CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[TMP0:%.*]] = load <8 x bfloat>, ptr [[PTR:%.*]], align 2
-// CHECK64-NEXT:    ret <8 x bfloat> [[TMP0]]
-//
-// CHECK32-LABEL: @test_vld1q_bf16(
-// CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD1:%.*]] = load <8 x bfloat>, ptr [[PTR:%.*]], align 2
-// CHECK32-NEXT:    ret <8 x bfloat> [[VLD1]]
+// CHECK-LABEL: @test_vld1q_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x bfloat>, ptr [[PTR:%.*]], align 2
+// CHECK-NEXT:    ret <8 x bfloat> [[TMP1]]
 //
 bfloat16x8_t test_vld1q_bf16(bfloat16_t const *ptr) {
   return vld1q_bf16(ptr);
@@ -69,7 +59,7 @@ bfloat16x4_t test_vld1_dup_bf16(bfloat16_t const *ptr) {
 
 // CHECK64-LABEL: @test_vld1_bf16_x2(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD1XN:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld1x2.v4bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD1XN:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld1x2.v4bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 0
 // CHECK64-NEXT:    [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 1
 // CHECK64-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_BFLOAT16X4X2_T:%.*]] poison, <4 x bfloat> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
@@ -78,7 +68,7 @@ bfloat16x4_t test_vld1_dup_bf16(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vld1_bf16_x2(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD1XN:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x2.v4bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK32-NEXT:    [[VLD1XN:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x2.v4bf16.p0(ptr [[PTR:%.*]])
 // CHECK32-NEXT:    [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 0
 // CHECK32-NEXT:    [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 1
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <4 x bfloat> [[VLD1XN_FCA_0_EXTRACT]] to <2 x i32>
@@ -93,7 +83,7 @@ bfloat16x4x2_t test_vld1_bf16_x2(bfloat16_t const *ptr) {
 
 // CHECK64-LABEL: @test_vld1q_bf16_x2(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD1XN:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld1x2.v8bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD1XN:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld1x2.v8bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 0
 // CHECK64-NEXT:    [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 1
 // CHECK64-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_BFLOAT16X8X2_T:%.*]] poison, <8 x bfloat> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
@@ -102,7 +92,7 @@ bfloat16x4x2_t test_vld1_bf16_x2(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vld1q_bf16_x2(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD1XN:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x2.v8bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK32-NEXT:    [[VLD1XN:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x2.v8bf16.p0(ptr [[PTR:%.*]])
 // CHECK32-NEXT:    [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 0
 // CHECK32-NEXT:    [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 1
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <8 x bfloat> [[VLD1XN_FCA_0_EXTRACT]] to <4 x i32>
@@ -117,7 +107,7 @@ bfloat16x8x2_t test_vld1q_bf16_x2(bfloat16_t const *ptr) {
 
 // CHECK64-LABEL: @test_vld1_bf16_x3(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD1XN:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld1x3.v4bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD1XN:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld1x3.v4bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 0
 // CHECK64-NEXT:    [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 1
 // CHECK64-NEXT:    [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 2
@@ -128,7 +118,7 @@ bfloat16x8x2_t test_vld1q_bf16_x2(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vld1_bf16_x3(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD1XN:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x3.v4bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK32-NEXT:    [[VLD1XN:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x3.v4bf16.p0(ptr [[PTR:%.*]])
 // CHECK32-NEXT:    [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 0
 // CHECK32-NEXT:    [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 1
 // CHECK32-NEXT:    [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 2
@@ -146,7 +136,7 @@ bfloat16x4x3_t test_vld1_bf16_x3(bfloat16_t const *ptr) {
 
 // CHECK64-LABEL: @test_vld1q_bf16_x3(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD1XN:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld1x3.v8bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD1XN:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld1x3.v8bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 0
 // CHECK64-NEXT:    [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 1
 // CHECK64-NEXT:    [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 2
@@ -157,7 +147,7 @@ bfloat16x4x3_t test_vld1_bf16_x3(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vld1q_bf16_x3(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD1XN:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x3.v8bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK32-NEXT:    [[VLD1XN:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x3.v8bf16.p0(ptr [[PTR:%.*]])
 // CHECK32-NEXT:    [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 0
 // CHECK32-NEXT:    [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 1
 // CHECK32-NEXT:    [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 2
@@ -175,7 +165,7 @@ bfloat16x8x3_t test_vld1q_bf16_x3(bfloat16_t const *ptr) {
 
 // CHECK64-LABEL: @test_vld1_bf16_x4(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD1XN:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld1x4.v4bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD1XN:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld1x4.v4bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 0
 // CHECK64-NEXT:    [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 1
 // CHECK64-NEXT:    [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 2
@@ -188,7 +178,7 @@ bfloat16x8x3_t test_vld1q_bf16_x3(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vld1_bf16_x4(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD1XN:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x4.v4bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK32-NEXT:    [[VLD1XN:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x4.v4bf16.p0(ptr [[PTR:%.*]])
 // CHECK32-NEXT:    [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 0
 // CHECK32-NEXT:    [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 1
 // CHECK32-NEXT:    [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD1XN]], 2
@@ -209,7 +199,7 @@ bfloat16x4x4_t test_vld1_bf16_x4(bfloat16_t const *ptr) {
 
 // CHECK64-LABEL: @test_vld1q_bf16_x4(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD1XN:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld1x4.v8bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD1XN:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld1x4.v8bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 0
 // CHECK64-NEXT:    [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 1
 // CHECK64-NEXT:    [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 2
@@ -222,7 +212,7 @@ bfloat16x4x4_t test_vld1_bf16_x4(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vld1q_bf16_x4(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD1XN:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x4.v8bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK32-NEXT:    [[VLD1XN:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x4.v8bf16.p0(ptr [[PTR:%.*]])
 // CHECK32-NEXT:    [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 0
 // CHECK32-NEXT:    [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 1
 // CHECK32-NEXT:    [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD1XN]], 2
@@ -254,7 +244,7 @@ bfloat16x8_t test_vld1q_dup_bf16(bfloat16_t const *ptr) {
 
 // CHECK64-LABEL: @test_vld2_bf16(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD2:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld2.v4bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD2:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld2.v4bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD2]], 0
 // CHECK64-NEXT:    [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD2]], 1
 // CHECK64-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_BFLOAT16X4X2_T:%.*]] poison, <4 x bfloat> [[VLD2_FCA_0_EXTRACT]], 0, 0
@@ -263,13 +253,13 @@ bfloat16x8_t test_vld1q_dup_bf16(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vld2_bf16(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD2_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld2.v4bf16.p0(ptr noundef [[PTR:%.*]], i32 2)
+// CHECK32-NEXT:    [[VLD2_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld2.v4bf16.p0(ptr [[PTR:%.*]], i32 2)
 // CHECK32-NEXT:    [[VLD2_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD2_V]], 0
 // CHECK32-NEXT:    [[VLD2_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD2_V]], 1
-// CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <4 x bfloat> [[VLD2_V_FCA_0_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x bfloat> [[VLD2_V_FCA_1_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <2 x i32>] poison, <2 x i32> [[TMP0]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP1]], 1
+// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x bfloat> [[VLD2_V_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x bfloat> [[VLD2_V_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <2 x i32>] poison, <2 x i32> [[TMP1]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP2]], 1
 // CHECK32-NEXT:    ret [2 x <2 x i32>] [[DOTFCA_1_INSERT]]
 //
 bfloat16x4x2_t test_vld2_bf16(bfloat16_t const *ptr) {
@@ -278,7 +268,7 @@ bfloat16x4x2_t test_vld2_bf16(bfloat16_t const *ptr) {
 
 // CHECK64-LABEL: @test_vld2q_bf16(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD2:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld2.v8bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD2:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld2.v8bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD2]], 0
 // CHECK64-NEXT:    [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD2]], 1
 // CHECK64-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_BFLOAT16X8X2_T:%.*]] poison, <8 x bfloat> [[VLD2_FCA_0_EXTRACT]], 0, 0
@@ -287,13 +277,13 @@ bfloat16x4x2_t test_vld2_bf16(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vld2q_bf16(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD2Q_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld2.v8bf16.p0(ptr noundef [[PTR:%.*]], i32 2)
+// CHECK32-NEXT:    [[VLD2Q_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld2.v8bf16.p0(ptr [[PTR:%.*]], i32 2)
 // CHECK32-NEXT:    [[VLD2Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD2Q_V]], 0
 // CHECK32-NEXT:    [[VLD2Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD2Q_V]], 1
-// CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <8 x bfloat> [[VLD2Q_V_FCA_0_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <8 x bfloat> [[VLD2Q_V_FCA_1_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <4 x i32>] poison, <4 x i32> [[TMP0]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP1]], 1
+// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <8 x bfloat> [[VLD2Q_V_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <8 x bfloat> [[VLD2Q_V_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <4 x i32>] poison, <4 x i32> [[TMP1]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP2]], 1
 // CHECK32-NEXT:    ret [2 x <4 x i32>] [[DOTFCA_1_INSERT]]
 //
 bfloat16x8x2_t test_vld2q_bf16(bfloat16_t const *ptr) {
@@ -304,7 +294,7 @@ bfloat16x8x2_t test_vld2q_bf16(bfloat16_t const *ptr) {
 // CHECK64-NEXT:  entry:
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x bfloat>] [[SRC_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x bfloat>] [[SRC_COERCE]], 1
-// CHECK64-NEXT:    [[VLD2_LANE:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld2lane.v4bf16.p0(<4 x bfloat> [[SRC_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[SRC_COERCE_FCA_1_EXTRACT]], i64 1, ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD2_LANE:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld2lane.v4bf16.p0(<4 x bfloat> [[SRC_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[SRC_COERCE_FCA_1_EXTRACT]], i64 1, ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD2_LANE]], 0
 // CHECK64-NEXT:    [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD2_LANE]], 1
 // CHECK64-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_BFLOAT16X4X2_T:%.*]] poison, <4 x bfloat> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
@@ -317,13 +307,13 @@ bfloat16x8x2_t test_vld2q_bf16(bfloat16_t const *ptr) {
 // CHECK32-NEXT:    [[SRC_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[SRC_COERCE]], 1
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[SRC_COERCE_FCA_0_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[SRC_COERCE_FCA_1_EXTRACT]] to <4 x bfloat>
-// CHECK32-NEXT:    [[VLD2_LANE_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld2lane.v4bf16.p0(ptr noundef [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], i32 1, i32 2)
+// CHECK32-NEXT:    [[VLD2_LANE_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld2lane.v4bf16.p0(ptr [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], i32 1, i32 2)
 // CHECK32-NEXT:    [[VLD2_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD2_LANE_V]], 0
 // CHECK32-NEXT:    [[VLD2_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD2_LANE_V]], 1
-// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x bfloat> [[VLD2_LANE_V_FCA_0_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <4 x bfloat> [[VLD2_LANE_V_FCA_1_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <2 x i32>] poison, <2 x i32> [[TMP2]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP3]], 1
+// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <4 x bfloat> [[VLD2_LANE_V_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP4:%.*]] = bitcast <4 x bfloat> [[VLD2_LANE_V_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <2 x i32>] poison, <2 x i32> [[TMP3]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP4]], 1
 // CHECK32-NEXT:    ret [2 x <2 x i32>] [[DOTFCA_1_INSERT]]
 //
 bfloat16x4x2_t test_vld2_lane_bf16(bfloat16_t const *ptr, bfloat16x4x2_t src) {
@@ -334,7 +324,7 @@ bfloat16x4x2_t test_vld2_lane_bf16(bfloat16_t const *ptr, bfloat16x4x2_t src) {
 // CHECK64-NEXT:  entry:
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[SRC_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[SRC_COERCE]], 1
-// CHECK64-NEXT:    [[VLD2_LANE:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld2lane.v8bf16.p0(<8 x bfloat> [[SRC_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[SRC_COERCE_FCA_1_EXTRACT]], i64 7, ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD2_LANE:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld2lane.v8bf16.p0(<8 x bfloat> [[SRC_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[SRC_COERCE_FCA_1_EXTRACT]], i64 7, ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD2_LANE]], 0
 // CHECK64-NEXT:    [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD2_LANE]], 1
 // CHECK64-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_BFLOAT16X8X2_T:%.*]] poison, <8 x bfloat> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
@@ -347,13 +337,13 @@ bfloat16x4x2_t test_vld2_lane_bf16(bfloat16_t const *ptr, bfloat16x4x2_t src) {
 // CHECK32-NEXT:    [[SRC_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[SRC_COERCE]], 1
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[SRC_COERCE_FCA_0_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[SRC_COERCE_FCA_1_EXTRACT]] to <8 x bfloat>
-// CHECK32-NEXT:    [[VLD2Q_LANE_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld2lane.v8bf16.p0(ptr noundef [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], i32 7, i32 2)
+// CHECK32-NEXT:    [[VLD2Q_LANE_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld2lane.v8bf16.p0(ptr [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], i32 7, i32 2)
 // CHECK32-NEXT:    [[VLD2Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD2Q_LANE_V]], 0
 // CHECK32-NEXT:    [[VLD2Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD2Q_LANE_V]], 1
-// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <8 x bfloat> [[VLD2Q_LANE_V_FCA_0_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <8 x bfloat> [[VLD2Q_LANE_V_FCA_1_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <4 x i32>] poison, <4 x i32> [[TMP2]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP3]], 1
+// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <8 x bfloat> [[VLD2Q_LANE_V_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP4:%.*]] = bitcast <8 x bfloat> [[VLD2Q_LANE_V_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <4 x i32>] poison, <4 x i32> [[TMP3]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP4]], 1
 // CHECK32-NEXT:    ret [2 x <4 x i32>] [[DOTFCA_1_INSERT]]
 //
 bfloat16x8x2_t test_vld2q_lane_bf16(bfloat16_t const *ptr, bfloat16x8x2_t src) {
@@ -362,7 +352,7 @@ bfloat16x8x2_t test_vld2q_lane_bf16(bfloat16_t const *ptr, bfloat16x8x2_t src) {
 
 // CHECK64-LABEL: @test_vld3_bf16(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD3:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld3.v4bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD3:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld3.v4bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3]], 0
 // CHECK64-NEXT:    [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3]], 1
 // CHECK64-NEXT:    [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3]], 2
@@ -373,16 +363,16 @@ bfloat16x8x2_t test_vld2q_lane_bf16(bfloat16_t const *ptr, bfloat16x8x2_t src) {
 //
 // CHECK32-LABEL: @test_vld3_bf16(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD3_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld3.v4bf16.p0(ptr noundef [[PTR:%.*]], i32 2)
+// CHECK32-NEXT:    [[VLD3_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld3.v4bf16.p0(ptr [[PTR:%.*]], i32 2)
 // CHECK32-NEXT:    [[VLD3_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3_V]], 0
 // CHECK32-NEXT:    [[VLD3_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3_V]], 1
 // CHECK32-NEXT:    [[VLD3_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3_V]], 2
-// CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <4 x bfloat> [[VLD3_V_FCA_0_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x bfloat> [[VLD3_V_FCA_1_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x bfloat> [[VLD3_V_FCA_2_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <2 x i32>] poison, <2 x i32> [[TMP0]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP1]], 1
-// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <2 x i32>] [[DOTFCA_1_INSERT]], <2 x i32> [[TMP2]], 2
+// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x bfloat> [[VLD3_V_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x bfloat> [[VLD3_V_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <4 x bfloat> [[VLD3_V_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <2 x i32>] poison, <2 x i32> [[TMP1]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP2]], 1
+// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <2 x i32>] [[DOTFCA_1_INSERT]], <2 x i32> [[TMP3]], 2
 // CHECK32-NEXT:    ret [3 x <2 x i32>] [[DOTFCA_2_INSERT]]
 //
 bfloat16x4x3_t test_vld3_bf16(bfloat16_t const *ptr) {
@@ -391,7 +381,7 @@ bfloat16x4x3_t test_vld3_bf16(bfloat16_t const *ptr) {
 
 // CHECK64-LABEL: @test_vld3q_bf16(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD3:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld3.v8bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD3:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld3.v8bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3]], 0
 // CHECK64-NEXT:    [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3]], 1
 // CHECK64-NEXT:    [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3]], 2
@@ -402,16 +392,16 @@ bfloat16x4x3_t test_vld3_bf16(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vld3q_bf16(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD3Q_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld3.v8bf16.p0(ptr noundef [[PTR:%.*]], i32 2)
+// CHECK32-NEXT:    [[VLD3Q_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld3.v8bf16.p0(ptr [[PTR:%.*]], i32 2)
 // CHECK32-NEXT:    [[VLD3Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3Q_V]], 0
 // CHECK32-NEXT:    [[VLD3Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3Q_V]], 1
 // CHECK32-NEXT:    [[VLD3Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3Q_V]], 2
-// CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <8 x bfloat> [[VLD3Q_V_FCA_0_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <8 x bfloat> [[VLD3Q_V_FCA_1_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <8 x bfloat> [[VLD3Q_V_FCA_2_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <4 x i32>] poison, <4 x i32> [[TMP0]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP1]], 1
-// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <4 x i32>] [[DOTFCA_1_INSERT]], <4 x i32> [[TMP2]], 2
+// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <8 x bfloat> [[VLD3Q_V_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <8 x bfloat> [[VLD3Q_V_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <8 x bfloat> [[VLD3Q_V_FCA_2_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <4 x i32>] poison, <4 x i32> [[TMP1]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP2]], 1
+// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <4 x i32>] [[DOTFCA_1_INSERT]], <4 x i32> [[TMP3]], 2
 // CHECK32-NEXT:    ret [3 x <4 x i32>] [[DOTFCA_2_INSERT]]
 //
 bfloat16x8x3_t test_vld3q_bf16(bfloat16_t const *ptr) {
@@ -423,7 +413,7 @@ bfloat16x8x3_t test_vld3q_bf16(bfloat16_t const *ptr) {
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x bfloat>] [[SRC_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x bfloat>] [[SRC_COERCE]], 1
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x bfloat>] [[SRC_COERCE]], 2
-// CHECK64-NEXT:    [[VLD3_LANE:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld3lane.v4bf16.p0(<4 x bfloat> [[SRC_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[SRC_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[SRC_COERCE_FCA_2_EXTRACT]], i64 1, ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD3_LANE:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld3lane.v4bf16.p0(<4 x bfloat> [[SRC_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[SRC_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[SRC_COERCE_FCA_2_EXTRACT]], i64 1, ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3_LANE]], 0
 // CHECK64-NEXT:    [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3_LANE]], 1
 // CHECK64-NEXT:    [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3_LANE]], 2
@@ -440,16 +430,16 @@ bfloat16x8x3_t test_vld3q_bf16(bfloat16_t const *ptr) {
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[SRC_COERCE_FCA_0_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[SRC_COERCE_FCA_1_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[SRC_COERCE_FCA_2_EXTRACT]] to <4 x bfloat>
-// CHECK32-NEXT:    [[VLD3_LANE_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld3lane.v4bf16.p0(ptr noundef [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]], i32 1, i32 2)
+// CHECK32-NEXT:    [[VLD3_LANE_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld3lane.v4bf16.p0(ptr [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]], i32 1, i32 2)
 // CHECK32-NEXT:    [[VLD3_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3_LANE_V]], 0
 // CHECK32-NEXT:    [[VLD3_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3_LANE_V]], 1
 // CHECK32-NEXT:    [[VLD3_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3_LANE_V]], 2
-// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <4 x bfloat> [[VLD3_LANE_V_FCA_0_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP4:%.*]] = bitcast <4 x bfloat> [[VLD3_LANE_V_FCA_1_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP5:%.*]] = bitcast <4 x bfloat> [[VLD3_LANE_V_FCA_2_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <2 x i32>] poison, <2 x i32> [[TMP3]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP4]], 1
-// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <2 x i32>] [[DOTFCA_1_INSERT]], <2 x i32> [[TMP5]], 2
+// CHECK32-NEXT:    [[TMP4:%.*]] = bitcast <4 x bfloat> [[VLD3_LANE_V_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP5:%.*]] = bitcast <4 x bfloat> [[VLD3_LANE_V_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP6:%.*]] = bitcast <4 x bfloat> [[VLD3_LANE_V_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <2 x i32>] poison, <2 x i32> [[TMP4]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP5]], 1
+// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <2 x i32>] [[DOTFCA_1_INSERT]], <2 x i32> [[TMP6]], 2
 // CHECK32-NEXT:    ret [3 x <2 x i32>] [[DOTFCA_2_INSERT]]
 //
 bfloat16x4x3_t test_vld3_lane_bf16(bfloat16_t const *ptr, bfloat16x4x3_t src) {
@@ -461,7 +451,7 @@ bfloat16x4x3_t test_vld3_lane_bf16(bfloat16_t const *ptr, bfloat16x4x3_t src) {
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x bfloat>] [[SRC_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x bfloat>] [[SRC_COERCE]], 1
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x bfloat>] [[SRC_COERCE]], 2
-// CHECK64-NEXT:    [[VLD3_LANE:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld3lane.v8bf16.p0(<8 x bfloat> [[SRC_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[SRC_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[SRC_COERCE_FCA_2_EXTRACT]], i64 7, ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD3_LANE:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld3lane.v8bf16.p0(<8 x bfloat> [[SRC_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[SRC_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[SRC_COERCE_FCA_2_EXTRACT]], i64 7, ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3_LANE]], 0
 // CHECK64-NEXT:    [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3_LANE]], 1
 // CHECK64-NEXT:    [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3_LANE]], 2
@@ -478,16 +468,16 @@ bfloat16x4x3_t test_vld3_lane_bf16(bfloat16_t const *ptr, bfloat16x4x3_t src) {
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[SRC_COERCE_FCA_0_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[SRC_COERCE_FCA_1_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[SRC_COERCE_FCA_2_EXTRACT]] to <8 x bfloat>
-// CHECK32-NEXT:    [[VLD3Q_LANE_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld3lane.v8bf16.p0(ptr noundef [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]], i32 7, i32 2)
+// CHECK32-NEXT:    [[VLD3Q_LANE_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld3lane.v8bf16.p0(ptr [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]], i32 7, i32 2)
 // CHECK32-NEXT:    [[VLD3Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3Q_LANE_V]], 0
 // CHECK32-NEXT:    [[VLD3Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3Q_LANE_V]], 1
 // CHECK32-NEXT:    [[VLD3Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3Q_LANE_V]], 2
-// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <8 x bfloat> [[VLD3Q_LANE_V_FCA_0_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP4:%.*]] = bitcast <8 x bfloat> [[VLD3Q_LANE_V_FCA_1_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP5:%.*]] = bitcast <8 x bfloat> [[VLD3Q_LANE_V_FCA_2_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <4 x i32>] poison, <4 x i32> [[TMP3]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP4]], 1
-// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <4 x i32>] [[DOTFCA_1_INSERT]], <4 x i32> [[TMP5]], 2
+// CHECK32-NEXT:    [[TMP4:%.*]] = bitcast <8 x bfloat> [[VLD3Q_LANE_V_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP5:%.*]] = bitcast <8 x bfloat> [[VLD3Q_LANE_V_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP6:%.*]] = bitcast <8 x bfloat> [[VLD3Q_LANE_V_FCA_2_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <4 x i32>] poison, <4 x i32> [[TMP4]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP5]], 1
+// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <4 x i32>] [[DOTFCA_1_INSERT]], <4 x i32> [[TMP6]], 2
 // CHECK32-NEXT:    ret [3 x <4 x i32>] [[DOTFCA_2_INSERT]]
 //
 bfloat16x8x3_t test_vld3q_lane_bf16(bfloat16_t const *ptr, bfloat16x8x3_t src) {
@@ -497,7 +487,7 @@ bfloat16x8x3_t test_vld3q_lane_bf16(bfloat16_t const *ptr, bfloat16x8x3_t src) {
 
 // CHECK64-LABEL: @test_vld4_bf16(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD4:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld4.v4bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD4:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld4.v4bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4]], 0
 // CHECK64-NEXT:    [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4]], 1
 // CHECK64-NEXT:    [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4]], 2
@@ -510,19 +500,19 @@ bfloat16x8x3_t test_vld3q_lane_bf16(bfloat16_t const *ptr, bfloat16x8x3_t src) {
 //
 // CHECK32-LABEL: @test_vld4_bf16(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD4_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld4.v4bf16.p0(ptr noundef [[PTR:%.*]], i32 2)
+// CHECK32-NEXT:    [[VLD4_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld4.v4bf16.p0(ptr [[PTR:%.*]], i32 2)
 // CHECK32-NEXT:    [[VLD4_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4_V]], 0
 // CHECK32-NEXT:    [[VLD4_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4_V]], 1
 // CHECK32-NEXT:    [[VLD4_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4_V]], 2
 // CHECK32-NEXT:    [[VLD4_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4_V]], 3
-// CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <4 x bfloat> [[VLD4_V_FCA_0_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x bfloat> [[VLD4_V_FCA_1_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x bfloat> [[VLD4_V_FCA_2_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <4 x bfloat> [[VLD4_V_FCA_3_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <2 x i32>] poison, <2 x i32> [[TMP0]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP1]], 1
-// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_1_INSERT]], <2 x i32> [[TMP2]], 2
-// CHECK32-NEXT:    [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_2_INSERT]], <2 x i32> [[TMP3]], 3
+// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x bfloat> [[VLD4_V_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x bfloat> [[VLD4_V_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <4 x bfloat> [[VLD4_V_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP4:%.*]] = bitcast <4 x bfloat> [[VLD4_V_FCA_3_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <2 x i32>] poison, <2 x i32> [[TMP1]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP2]], 1
+// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_1_INSERT]], <2 x i32> [[TMP3]], 2
+// CHECK32-NEXT:    [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_2_INSERT]], <2 x i32> [[TMP4]], 3
 // CHECK32-NEXT:    ret [4 x <2 x i32>] [[DOTFCA_3_INSERT]]
 //
 bfloat16x4x4_t test_vld4_bf16(bfloat16_t const *ptr) {
@@ -531,7 +521,7 @@ bfloat16x4x4_t test_vld4_bf16(bfloat16_t const *ptr) {
 
 // CHECK64-LABEL: @test_vld4q_bf16(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD4:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld4.v8bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD4:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld4.v8bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4]], 0
 // CHECK64-NEXT:    [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4]], 1
 // CHECK64-NEXT:    [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4]], 2
@@ -544,19 +534,19 @@ bfloat16x4x4_t test_vld4_bf16(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vld4q_bf16(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD4Q_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld4.v8bf16.p0(ptr noundef [[PTR:%.*]], i32 2)
+// CHECK32-NEXT:    [[VLD4Q_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld4.v8bf16.p0(ptr [[PTR:%.*]], i32 2)
 // CHECK32-NEXT:    [[VLD4Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4Q_V]], 0
 // CHECK32-NEXT:    [[VLD4Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4Q_V]], 1
 // CHECK32-NEXT:    [[VLD4Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4Q_V]], 2
 // CHECK32-NEXT:    [[VLD4Q_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4Q_V]], 3
-// CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <8 x bfloat> [[VLD4Q_V_FCA_0_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <8 x bfloat> [[VLD4Q_V_FCA_1_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <8 x bfloat> [[VLD4Q_V_FCA_2_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <8 x bfloat> [[VLD4Q_V_FCA_3_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <4 x i32>] poison, <4 x i32> [[TMP0]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP1]], 1
-// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_1_INSERT]], <4 x i32> [[TMP2]], 2
-// CHECK32-NEXT:    [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_2_INSERT]], <4 x i32> [[TMP3]], 3
+// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <8 x bfloat> [[VLD4Q_V_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <8 x bfloat> [[VLD4Q_V_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <8 x bfloat> [[VLD4Q_V_FCA_2_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP4:%.*]] = bitcast <8 x bfloat> [[VLD4Q_V_FCA_3_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <4 x i32>] poison, <4 x i32> [[TMP1]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP2]], 1
+// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_1_INSERT]], <4 x i32> [[TMP3]], 2
+// CHECK32-NEXT:    [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_2_INSERT]], <4 x i32> [[TMP4]], 3
 // CHECK32-NEXT:    ret [4 x <4 x i32>] [[DOTFCA_3_INSERT]]
 //
 bfloat16x8x4_t test_vld4q_bf16(bfloat16_t const *ptr) {
@@ -569,7 +559,7 @@ bfloat16x8x4_t test_vld4q_bf16(bfloat16_t const *ptr) {
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x bfloat>] [[SRC_COERCE]], 1
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x bfloat>] [[SRC_COERCE]], 2
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x bfloat>] [[SRC_COERCE]], 3
-// CHECK64-NEXT:    [[VLD4_LANE:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld4lane.v4bf16.p0(<4 x bfloat> [[SRC_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[SRC_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[SRC_COERCE_FCA_2_EXTRACT]], <4 x bfloat> [[SRC_COERCE_FCA_3_EXTRACT]], i64 1, ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD4_LANE:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld4lane.v4bf16.p0(<4 x bfloat> [[SRC_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[SRC_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[SRC_COERCE_FCA_2_EXTRACT]], <4 x bfloat> [[SRC_COERCE_FCA_3_EXTRACT]], i64 1, ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4_LANE]], 0
 // CHECK64-NEXT:    [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4_LANE]], 1
 // CHECK64-NEXT:    [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4_LANE]], 2
@@ -590,19 +580,19 @@ bfloat16x8x4_t test_vld4q_bf16(bfloat16_t const *ptr) {
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[SRC_COERCE_FCA_1_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[SRC_COERCE_FCA_2_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[SRC_COERCE_FCA_3_EXTRACT]] to <4 x bfloat>
-// CHECK32-NEXT:    [[VLD4_LANE_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld4lane.v4bf16.p0(ptr noundef [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]], <4 x bfloat> [[TMP3]], i32 1, i32 2)
+// CHECK32-NEXT:    [[VLD4_LANE_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld4lane.v4bf16.p0(ptr [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]], <4 x bfloat> [[TMP3]], i32 1, i32 2)
 // CHECK32-NEXT:    [[VLD4_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4_LANE_V]], 0
 // CHECK32-NEXT:    [[VLD4_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4_LANE_V]], 1
 // CHECK32-NEXT:    [[VLD4_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4_LANE_V]], 2
 // CHECK32-NEXT:    [[VLD4_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4_LANE_V]], 3
-// CHECK32-NEXT:    [[TMP4:%.*]] = bitcast <4 x bfloat> [[VLD4_LANE_V_FCA_0_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP5:%.*]] = bitcast <4 x bfloat> [[VLD4_LANE_V_FCA_1_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP6:%.*]] = bitcast <4 x bfloat> [[VLD4_LANE_V_FCA_2_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP7:%.*]] = bitcast <4 x bfloat> [[VLD4_LANE_V_FCA_3_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <2 x i32>] poison, <2 x i32> [[TMP4]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP5]], 1
-// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_1_INSERT]], <2 x i32> [[TMP6]], 2
-// CHECK32-NEXT:    [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_2_INSERT]], <2 x i32> [[TMP7]], 3
+// CHECK32-NEXT:    [[TMP5:%.*]] = bitcast <4 x bfloat> [[VLD4_LANE_V_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP6:%.*]] = bitcast <4 x bfloat> [[VLD4_LANE_V_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP7:%.*]] = bitcast <4 x bfloat> [[VLD4_LANE_V_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP8:%.*]] = bitcast <4 x bfloat> [[VLD4_LANE_V_FCA_3_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <2 x i32>] poison, <2 x i32> [[TMP5]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP6]], 1
+// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_1_INSERT]], <2 x i32> [[TMP7]], 2
+// CHECK32-NEXT:    [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_2_INSERT]], <2 x i32> [[TMP8]], 3
 // CHECK32-NEXT:    ret [4 x <2 x i32>] [[DOTFCA_3_INSERT]]
 //
 bfloat16x4x4_t test_vld4_lane_bf16(bfloat16_t const *ptr, bfloat16x4x4_t src) {
@@ -615,7 +605,7 @@ bfloat16x4x4_t test_vld4_lane_bf16(bfloat16_t const *ptr, bfloat16x4x4_t src) {
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x bfloat>] [[SRC_COERCE]], 1
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x bfloat>] [[SRC_COERCE]], 2
 // CHECK64-NEXT:    [[SRC_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x bfloat>] [[SRC_COERCE]], 3
-// CHECK64-NEXT:    [[VLD4_LANE:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld4lane.v8bf16.p0(<8 x bfloat> [[SRC_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[SRC_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[SRC_COERCE_FCA_2_EXTRACT]], <8 x bfloat> [[SRC_COERCE_FCA_3_EXTRACT]], i64 7, ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD4_LANE:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld4lane.v8bf16.p0(<8 x bfloat> [[SRC_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[SRC_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[SRC_COERCE_FCA_2_EXTRACT]], <8 x bfloat> [[SRC_COERCE_FCA_3_EXTRACT]], i64 7, ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4_LANE]], 0
 // CHECK64-NEXT:    [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4_LANE]], 1
 // CHECK64-NEXT:    [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4_LANE]], 2
@@ -636,19 +626,19 @@ bfloat16x4x4_t test_vld4_lane_bf16(bfloat16_t const *ptr, bfloat16x4x4_t src) {
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[SRC_COERCE_FCA_1_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[SRC_COERCE_FCA_2_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[SRC_COERCE_FCA_3_EXTRACT]] to <8 x bfloat>
-// CHECK32-NEXT:    [[VLD4Q_LANE_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld4lane.v8bf16.p0(ptr noundef [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]], <8 x bfloat> [[TMP3]], i32 7, i32 2)
+// CHECK32-NEXT:    [[VLD4Q_LANE_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld4lane.v8bf16.p0(ptr [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]], <8 x bfloat> [[TMP3]], i32 7, i32 2)
 // CHECK32-NEXT:    [[VLD4Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4Q_LANE_V]], 0
 // CHECK32-NEXT:    [[VLD4Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4Q_LANE_V]], 1
 // CHECK32-NEXT:    [[VLD4Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4Q_LANE_V]], 2
 // CHECK32-NEXT:    [[VLD4Q_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4Q_LANE_V]], 3
-// CHECK32-NEXT:    [[TMP4:%.*]] = bitcast <8 x bfloat> [[VLD4Q_LANE_V_FCA_0_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP5:%.*]] = bitcast <8 x bfloat> [[VLD4Q_LANE_V_FCA_1_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP6:%.*]] = bitcast <8 x bfloat> [[VLD4Q_LANE_V_FCA_2_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP7:%.*]] = bitcast <8 x bfloat> [[VLD4Q_LANE_V_FCA_3_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <4 x i32>] poison, <4 x i32> [[TMP4]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP5]], 1
-// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_1_INSERT]], <4 x i32> [[TMP6]], 2
-// CHECK32-NEXT:    [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_2_INSERT]], <4 x i32> [[TMP7]], 3
+// CHECK32-NEXT:    [[TMP5:%.*]] = bitcast <8 x bfloat> [[VLD4Q_LANE_V_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP6:%.*]] = bitcast <8 x bfloat> [[VLD4Q_LANE_V_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP7:%.*]] = bitcast <8 x bfloat> [[VLD4Q_LANE_V_FCA_2_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP8:%.*]] = bitcast <8 x bfloat> [[VLD4Q_LANE_V_FCA_3_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <4 x i32>] poison, <4 x i32> [[TMP5]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP6]], 1
+// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_1_INSERT]], <4 x i32> [[TMP7]], 2
+// CHECK32-NEXT:    [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_2_INSERT]], <4 x i32> [[TMP8]], 3
 // CHECK32-NEXT:    ret [4 x <4 x i32>] [[DOTFCA_3_INSERT]]
 //
 bfloat16x8x4_t test_vld4q_lane_bf16(bfloat16_t const *ptr, bfloat16x8x4_t src) {
@@ -657,7 +647,7 @@ bfloat16x8x4_t test_vld4q_lane_bf16(bfloat16_t const *ptr, bfloat16x8x4_t src) {
 
 // CHECK64-LABEL: @test_vld2_dup_bf16(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD2:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld2r.v4bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD2:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld2r.v4bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD2]], 0
 // CHECK64-NEXT:    [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD2]], 1
 // CHECK64-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_BFLOAT16X4X2_T:%.*]] poison, <4 x bfloat> [[VLD2_FCA_0_EXTRACT]], 0, 0
@@ -666,13 +656,13 @@ bfloat16x8x4_t test_vld4q_lane_bf16(bfloat16_t const *ptr, bfloat16x8x4_t src) {
 //
 // CHECK32-LABEL: @test_vld2_dup_bf16(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD2_DUP_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld2dup.v4bf16.p0(ptr noundef [[PTR:%.*]], i32 2)
+// CHECK32-NEXT:    [[VLD2_DUP_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld2dup.v4bf16.p0(ptr [[PTR:%.*]], i32 2)
 // CHECK32-NEXT:    [[VLD2_DUP_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD2_DUP_V]], 0
 // CHECK32-NEXT:    [[VLD2_DUP_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat> } [[VLD2_DUP_V]], 1
-// CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <4 x bfloat> [[VLD2_DUP_V_FCA_0_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x bfloat> [[VLD2_DUP_V_FCA_1_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <2 x i32>] poison, <2 x i32> [[TMP0]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP1]], 1
+// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x bfloat> [[VLD2_DUP_V_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x bfloat> [[VLD2_DUP_V_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <2 x i32>] poison, <2 x i32> [[TMP1]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP2]], 1
 // CHECK32-NEXT:    ret [2 x <2 x i32>] [[DOTFCA_1_INSERT]]
 //
 bfloat16x4x2_t test_vld2_dup_bf16(bfloat16_t const *ptr) {
@@ -681,7 +671,7 @@ bfloat16x4x2_t test_vld2_dup_bf16(bfloat16_t const *ptr) {
 
 // CHECK64-LABEL: @test_vld2q_dup_bf16(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD2:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld2r.v8bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD2:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld2r.v8bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD2]], 0
 // CHECK64-NEXT:    [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD2]], 1
 // CHECK64-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_BFLOAT16X8X2_T:%.*]] poison, <8 x bfloat> [[VLD2_FCA_0_EXTRACT]], 0, 0
@@ -690,13 +680,13 @@ bfloat16x4x2_t test_vld2_dup_bf16(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vld2q_dup_bf16(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD2Q_DUP_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld2dup.v8bf16.p0(ptr noundef [[PTR:%.*]], i32 2)
+// CHECK32-NEXT:    [[VLD2Q_DUP_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld2dup.v8bf16.p0(ptr [[PTR:%.*]], i32 2)
 // CHECK32-NEXT:    [[VLD2Q_DUP_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD2Q_DUP_V]], 0
 // CHECK32-NEXT:    [[VLD2Q_DUP_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat> } [[VLD2Q_DUP_V]], 1
-// CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <8 x bfloat> [[VLD2Q_DUP_V_FCA_0_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <8 x bfloat> [[VLD2Q_DUP_V_FCA_1_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <4 x i32>] poison, <4 x i32> [[TMP0]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP1]], 1
+// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <8 x bfloat> [[VLD2Q_DUP_V_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <8 x bfloat> [[VLD2Q_DUP_V_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <4 x i32>] poison, <4 x i32> [[TMP1]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP2]], 1
 // CHECK32-NEXT:    ret [2 x <4 x i32>] [[DOTFCA_1_INSERT]]
 //
 bfloat16x8x2_t test_vld2q_dup_bf16(bfloat16_t const *ptr) {
@@ -705,7 +695,7 @@ bfloat16x8x2_t test_vld2q_dup_bf16(bfloat16_t const *ptr) {
 
 // CHECK64-LABEL: @test_vld3_dup_bf16(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD3:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld3r.v4bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD3:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld3r.v4bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3]], 0
 // CHECK64-NEXT:    [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3]], 1
 // CHECK64-NEXT:    [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3]], 2
@@ -716,16 +706,16 @@ bfloat16x8x2_t test_vld2q_dup_bf16(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vld3_dup_bf16(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD3_DUP_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld3dup.v4bf16.p0(ptr noundef [[PTR:%.*]], i32 2)
+// CHECK32-NEXT:    [[VLD3_DUP_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld3dup.v4bf16.p0(ptr [[PTR:%.*]], i32 2)
 // CHECK32-NEXT:    [[VLD3_DUP_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3_DUP_V]], 0
 // CHECK32-NEXT:    [[VLD3_DUP_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3_DUP_V]], 1
 // CHECK32-NEXT:    [[VLD3_DUP_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD3_DUP_V]], 2
-// CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <4 x bfloat> [[VLD3_DUP_V_FCA_0_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x bfloat> [[VLD3_DUP_V_FCA_1_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x bfloat> [[VLD3_DUP_V_FCA_2_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <2 x i32>] poison, <2 x i32> [[TMP0]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP1]], 1
-// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <2 x i32>] [[DOTFCA_1_INSERT]], <2 x i32> [[TMP2]], 2
+// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x bfloat> [[VLD3_DUP_V_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x bfloat> [[VLD3_DUP_V_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <4 x bfloat> [[VLD3_DUP_V_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <2 x i32>] poison, <2 x i32> [[TMP1]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP2]], 1
+// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <2 x i32>] [[DOTFCA_1_INSERT]], <2 x i32> [[TMP3]], 2
 // CHECK32-NEXT:    ret [3 x <2 x i32>] [[DOTFCA_2_INSERT]]
 //
 bfloat16x4x3_t test_vld3_dup_bf16(bfloat16_t const *ptr) {
@@ -734,7 +724,7 @@ bfloat16x4x3_t test_vld3_dup_bf16(bfloat16_t const *ptr) {
 
 // CHECK64-LABEL: @test_vld3q_dup_bf16(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD3:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld3r.v8bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD3:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld3r.v8bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3]], 0
 // CHECK64-NEXT:    [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3]], 1
 // CHECK64-NEXT:    [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3]], 2
@@ -745,16 +735,16 @@ bfloat16x4x3_t test_vld3_dup_bf16(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vld3q_dup_bf16(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD3Q_DUP_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld3dup.v8bf16.p0(ptr noundef [[PTR:%.*]], i32 2)
+// CHECK32-NEXT:    [[VLD3Q_DUP_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld3dup.v8bf16.p0(ptr [[PTR:%.*]], i32 2)
 // CHECK32-NEXT:    [[VLD3Q_DUP_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3Q_DUP_V]], 0
 // CHECK32-NEXT:    [[VLD3Q_DUP_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3Q_DUP_V]], 1
 // CHECK32-NEXT:    [[VLD3Q_DUP_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD3Q_DUP_V]], 2
-// CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <8 x bfloat> [[VLD3Q_DUP_V_FCA_0_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <8 x bfloat> [[VLD3Q_DUP_V_FCA_1_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <8 x bfloat> [[VLD3Q_DUP_V_FCA_2_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <4 x i32>] poison, <4 x i32> [[TMP0]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP1]], 1
-// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <4 x i32>] [[DOTFCA_1_INSERT]], <4 x i32> [[TMP2]], 2
+// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <8 x bfloat> [[VLD3Q_DUP_V_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <8 x bfloat> [[VLD3Q_DUP_V_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <8 x bfloat> [[VLD3Q_DUP_V_FCA_2_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <4 x i32>] poison, <4 x i32> [[TMP1]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP2]], 1
+// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <4 x i32>] [[DOTFCA_1_INSERT]], <4 x i32> [[TMP3]], 2
 // CHECK32-NEXT:    ret [3 x <4 x i32>] [[DOTFCA_2_INSERT]]
 //
 bfloat16x8x3_t test_vld3q_dup_bf16(bfloat16_t const *ptr) {
@@ -763,7 +753,7 @@ bfloat16x8x3_t test_vld3q_dup_bf16(bfloat16_t const *ptr) {
 
 // CHECK64-LABEL: @test_vld4_dup_bf16(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD4:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld4r.v4bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD4:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld4r.v4bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4]], 0
 // CHECK64-NEXT:    [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4]], 1
 // CHECK64-NEXT:    [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4]], 2
@@ -776,19 +766,19 @@ bfloat16x8x3_t test_vld3q_dup_bf16(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vld4_dup_bf16(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD4_DUP_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld4dup.v4bf16.p0(ptr noundef [[PTR:%.*]], i32 2)
+// CHECK32-NEXT:    [[VLD4_DUP_V:%.*]] = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld4dup.v4bf16.p0(ptr [[PTR:%.*]], i32 2)
 // CHECK32-NEXT:    [[VLD4_DUP_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4_DUP_V]], 0
 // CHECK32-NEXT:    [[VLD4_DUP_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4_DUP_V]], 1
 // CHECK32-NEXT:    [[VLD4_DUP_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4_DUP_V]], 2
 // CHECK32-NEXT:    [[VLD4_DUP_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } [[VLD4_DUP_V]], 3
-// CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <4 x bfloat> [[VLD4_DUP_V_FCA_0_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x bfloat> [[VLD4_DUP_V_FCA_1_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x bfloat> [[VLD4_DUP_V_FCA_2_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <4 x bfloat> [[VLD4_DUP_V_FCA_3_EXTRACT]] to <2 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <2 x i32>] poison, <2 x i32> [[TMP0]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP1]], 1
-// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_1_INSERT]], <2 x i32> [[TMP2]], 2
-// CHECK32-NEXT:    [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_2_INSERT]], <2 x i32> [[TMP3]], 3
+// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x bfloat> [[VLD4_DUP_V_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x bfloat> [[VLD4_DUP_V_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <4 x bfloat> [[VLD4_DUP_V_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[TMP4:%.*]] = bitcast <4 x bfloat> [[VLD4_DUP_V_FCA_3_EXTRACT]] to <2 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <2 x i32>] poison, <2 x i32> [[TMP1]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_0_INSERT]], <2 x i32> [[TMP2]], 1
+// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_1_INSERT]], <2 x i32> [[TMP3]], 2
+// CHECK32-NEXT:    [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <2 x i32>] [[DOTFCA_2_INSERT]], <2 x i32> [[TMP4]], 3
 // CHECK32-NEXT:    ret [4 x <2 x i32>] [[DOTFCA_3_INSERT]]
 //
 bfloat16x4x4_t test_vld4_dup_bf16(bfloat16_t const *ptr) {
@@ -797,7 +787,7 @@ bfloat16x4x4_t test_vld4_dup_bf16(bfloat16_t const *ptr) {
 
 // CHECK64-LABEL: @test_vld4q_dup_bf16(
 // CHECK64-NEXT:  entry:
-// CHECK64-NEXT:    [[VLD4:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld4r.v8bf16.p0(ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    [[VLD4:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld4r.v8bf16.p0(ptr [[PTR:%.*]])
 // CHECK64-NEXT:    [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4]], 0
 // CHECK64-NEXT:    [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4]], 1
 // CHECK64-NEXT:    [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4]], 2
@@ -810,19 +800,19 @@ bfloat16x4x4_t test_vld4_dup_bf16(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vld4q_dup_bf16(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    [[VLD4Q_DUP_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld4dup.v8bf16.p0(ptr noundef [[PTR:%.*]], i32 2)
+// CHECK32-NEXT:    [[VLD4Q_DUP_V:%.*]] = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld4dup.v8bf16.p0(ptr [[PTR:%.*]], i32 2)
 // CHECK32-NEXT:    [[VLD4Q_DUP_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4Q_DUP_V]], 0
 // CHECK32-NEXT:    [[VLD4Q_DUP_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4Q_DUP_V]], 1
 // CHECK32-NEXT:    [[VLD4Q_DUP_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4Q_DUP_V]], 2
 // CHECK32-NEXT:    [[VLD4Q_DUP_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } [[VLD4Q_DUP_V]], 3
-// CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <8 x bfloat> [[VLD4Q_DUP_V_FCA_0_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <8 x bfloat> [[VLD4Q_DUP_V_FCA_1_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <8 x bfloat> [[VLD4Q_DUP_V_FCA_2_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <8 x bfloat> [[VLD4Q_DUP_V_FCA_3_EXTRACT]] to <4 x i32>
-// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <4 x i32>] poison, <4 x i32> [[TMP0]], 0
-// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP1]], 1
-// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_1_INSERT]], <4 x i32> [[TMP2]], 2
-// CHECK32-NEXT:    [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_2_INSERT]], <4 x i32> [[TMP3]], 3
+// CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <8 x bfloat> [[VLD4Q_DUP_V_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <8 x bfloat> [[VLD4Q_DUP_V_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <8 x bfloat> [[VLD4Q_DUP_V_FCA_2_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[TMP4:%.*]] = bitcast <8 x bfloat> [[VLD4Q_DUP_V_FCA_3_EXTRACT]] to <4 x i32>
+// CHECK32-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <4 x i32>] poison, <4 x i32> [[TMP1]], 0
+// CHECK32-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_0_INSERT]], <4 x i32> [[TMP2]], 1
+// CHECK32-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_1_INSERT]], <4 x i32> [[TMP3]], 2
+// CHECK32-NEXT:    [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <4 x i32>] [[DOTFCA_2_INSERT]], <4 x i32> [[TMP4]], 3
 // CHECK32-NEXT:    ret [4 x <4 x i32>] [[DOTFCA_3_INSERT]]
 //
 bfloat16x8x4_t test_vld4q_dup_bf16(bfloat16_t const *ptr) {
@@ -836,7 +826,7 @@ bfloat16x8x4_t test_vld4q_dup_bf16(bfloat16_t const *ptr) {
 //
 // CHECK32-LABEL: @test_vst1_bf16(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1.p0.v4bf16(ptr noundef [[PTR:%.*]], <4 x bfloat> noundef [[VAL:%.*]], i32 2)
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1.p0.v4bf16(ptr [[PTR:%.*]], <4 x bfloat> [[VAL:%.*]], i32 2)
 // CHECK32-NEXT:    ret void
 //
 void test_vst1_bf16(bfloat16_t *ptr, bfloat16x4_t val) {
@@ -850,7 +840,7 @@ void test_vst1_bf16(bfloat16_t *ptr, bfloat16x4_t val) {
 //
 // CHECK32-LABEL: @test_vst1q_bf16(
 // CHECK32-NEXT:  entry:
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1.p0.v8bf16(ptr noundef [[PTR:%.*]], <8 x bfloat> noundef [[VAL:%.*]], i32 2)
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1.p0.v8bf16(ptr [[PTR:%.*]], <8 x bfloat> [[VAL:%.*]], i32 2)
 // CHECK32-NEXT:    ret void
 //
 void test_vst1q_bf16(bfloat16_t *ptr, bfloat16x8_t val) {
@@ -881,7 +871,7 @@ void test_vst1q_lane_bf16(bfloat16_t *ptr, bfloat16x8_t val) {
 // CHECK64-NEXT:  entry:
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x bfloat>] [[VAL_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x bfloat>] [[VAL_COERCE]], 1
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st1x2.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st1x2.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst1_bf16_x2(
@@ -890,7 +880,7 @@ void test_vst1q_lane_bf16(bfloat16_t *ptr, bfloat16x8_t val) {
 // CHECK32-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[VAL_COERCE]], 1
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_0_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <4 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1x2.p0.v4bf16(ptr noundef [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]])
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1x2.p0.v4bf16(ptr [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]])
 // CHECK32-NEXT:    ret void
 //
 void test_vst1_bf16_x2(bfloat16_t *ptr, bfloat16x4x2_t val) {
@@ -901,7 +891,7 @@ void test_vst1_bf16_x2(bfloat16_t *ptr, bfloat16x4x2_t val) {
 // CHECK64-NEXT:  entry:
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VAL_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VAL_COERCE]], 1
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st1x2.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st1x2.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst1q_bf16_x2(
@@ -910,7 +900,7 @@ void test_vst1_bf16_x2(bfloat16_t *ptr, bfloat16x4x2_t val) {
 // CHECK32-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[VAL_COERCE]], 1
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_0_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <8 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1x2.p0.v8bf16(ptr noundef [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]])
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1x2.p0.v8bf16(ptr [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]])
 // CHECK32-NEXT:    ret void
 //
 void test_vst1q_bf16_x2(bfloat16_t *ptr, bfloat16x8x2_t val) {
@@ -922,7 +912,7 @@ void test_vst1q_bf16_x2(bfloat16_t *ptr, bfloat16x8x2_t val) {
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x bfloat>] [[VAL_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x bfloat>] [[VAL_COERCE]], 1
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x bfloat>] [[VAL_COERCE]], 2
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st1x3.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st1x3.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst1_bf16_x3(
@@ -933,7 +923,7 @@ void test_vst1q_bf16_x2(bfloat16_t *ptr, bfloat16x8x2_t val) {
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_0_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_2_EXTRACT]] to <4 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1x3.p0.v4bf16(ptr noundef [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]])
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1x3.p0.v4bf16(ptr [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]])
 // CHECK32-NEXT:    ret void
 //
 void test_vst1_bf16_x3(bfloat16_t *ptr, bfloat16x4x3_t val) {
@@ -945,7 +935,7 @@ void test_vst1_bf16_x3(bfloat16_t *ptr, bfloat16x4x3_t val) {
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x bfloat>] [[VAL_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x bfloat>] [[VAL_COERCE]], 1
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x bfloat>] [[VAL_COERCE]], 2
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st1x3.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st1x3.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst1q_bf16_x3(
@@ -956,7 +946,7 @@ void test_vst1_bf16_x3(bfloat16_t *ptr, bfloat16x4x3_t val) {
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_0_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_2_EXTRACT]] to <8 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1x3.p0.v8bf16(ptr noundef [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]])
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1x3.p0.v8bf16(ptr [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]])
 // CHECK32-NEXT:    ret void
 //
 void test_vst1q_bf16_x3(bfloat16_t *ptr, bfloat16x8x3_t val) {
@@ -969,7 +959,7 @@ void test_vst1q_bf16_x3(bfloat16_t *ptr, bfloat16x8x3_t val) {
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x bfloat>] [[VAL_COERCE]], 1
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x bfloat>] [[VAL_COERCE]], 2
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x bfloat>] [[VAL_COERCE]], 3
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st1x4.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_3_EXTRACT]], ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st1x4.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_3_EXTRACT]], ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst1_bf16_x4(
@@ -982,7 +972,7 @@ void test_vst1q_bf16_x3(bfloat16_t *ptr, bfloat16x8x3_t val) {
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_2_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_3_EXTRACT]] to <4 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1x4.p0.v4bf16(ptr noundef [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]], <4 x bfloat> [[TMP3]])
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1x4.p0.v4bf16(ptr [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]], <4 x bfloat> [[TMP3]])
 // CHECK32-NEXT:    ret void
 //
 void test_vst1_bf16_x4(bfloat16_t *ptr, bfloat16x4x4_t val) {
@@ -995,7 +985,7 @@ void test_vst1_bf16_x4(bfloat16_t *ptr, bfloat16x4x4_t val) {
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x bfloat>] [[VAL_COERCE]], 1
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x bfloat>] [[VAL_COERCE]], 2
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x bfloat>] [[VAL_COERCE]], 3
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st1x4.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_3_EXTRACT]], ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st1x4.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_3_EXTRACT]], ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst1q_bf16_x4(
@@ -1008,7 +998,7 @@ void test_vst1_bf16_x4(bfloat16_t *ptr, bfloat16x4x4_t val) {
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_2_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_3_EXTRACT]] to <8 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1x4.p0.v8bf16(ptr noundef [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]], <8 x bfloat> [[TMP3]])
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst1x4.p0.v8bf16(ptr [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]], <8 x bfloat> [[TMP3]])
 // CHECK32-NEXT:    ret void
 //
 void test_vst1q_bf16_x4(bfloat16_t *ptr, bfloat16x8x4_t val) {
@@ -1019,7 +1009,7 @@ void test_vst1q_bf16_x4(bfloat16_t *ptr, bfloat16x8x4_t val) {
 // CHECK64-NEXT:  entry:
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x bfloat>] [[VAL_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x bfloat>] [[VAL_COERCE]], 1
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st2.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st2.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst2_bf16(
@@ -1028,7 +1018,7 @@ void test_vst1q_bf16_x4(bfloat16_t *ptr, bfloat16x8x4_t val) {
 // CHECK32-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[VAL_COERCE]], 1
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_0_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <4 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst2.p0.v4bf16(ptr noundef [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], i32 2)
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst2.p0.v4bf16(ptr [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], i32 2)
 // CHECK32-NEXT:    ret void
 //
 void test_vst2_bf16(bfloat16_t *ptr, bfloat16x4x2_t val) {
@@ -1039,7 +1029,7 @@ void test_vst2_bf16(bfloat16_t *ptr, bfloat16x4x2_t val) {
 // CHECK64-NEXT:  entry:
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VAL_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VAL_COERCE]], 1
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st2.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st2.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst2q_bf16(
@@ -1048,7 +1038,7 @@ void test_vst2_bf16(bfloat16_t *ptr, bfloat16x4x2_t val) {
 // CHECK32-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[VAL_COERCE]], 1
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_0_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <8 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst2.p0.v8bf16(ptr noundef [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], i32 2)
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst2.p0.v8bf16(ptr [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], i32 2)
 // CHECK32-NEXT:    ret void
 //
 void test_vst2q_bf16(bfloat16_t *ptr, bfloat16x8x2_t val) {
@@ -1059,7 +1049,7 @@ void test_vst2q_bf16(bfloat16_t *ptr, bfloat16x8x2_t val) {
 // CHECK64-NEXT:  entry:
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x bfloat>] [[VAL_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x bfloat>] [[VAL_COERCE]], 1
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st2lane.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], i64 1, ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st2lane.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], i64 1, ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst2_lane_bf16(
@@ -1068,7 +1058,7 @@ void test_vst2q_bf16(bfloat16_t *ptr, bfloat16x8x2_t val) {
 // CHECK32-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[VAL_COERCE]], 1
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_0_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <4 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst2lane.p0.v4bf16(ptr noundef [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], i32 1, i32 2)
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst2lane.p0.v4bf16(ptr [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], i32 1, i32 2)
 // CHECK32-NEXT:    ret void
 //
 void test_vst2_lane_bf16(bfloat16_t *ptr, bfloat16x4x2_t val) {
@@ -1079,7 +1069,7 @@ void test_vst2_lane_bf16(bfloat16_t *ptr, bfloat16x4x2_t val) {
 // CHECK64-NEXT:  entry:
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VAL_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VAL_COERCE]], 1
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st2lane.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], i64 7, ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st2lane.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], i64 7, ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst2q_lane_bf16(
@@ -1088,7 +1078,7 @@ void test_vst2_lane_bf16(bfloat16_t *ptr, bfloat16x4x2_t val) {
 // CHECK32-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[VAL_COERCE]], 1
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_0_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <8 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst2lane.p0.v8bf16(ptr noundef [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], i32 7, i32 2)
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst2lane.p0.v8bf16(ptr [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], i32 7, i32 2)
 // CHECK32-NEXT:    ret void
 //
 void test_vst2q_lane_bf16(bfloat16_t *ptr, bfloat16x8x2_t val) {
@@ -1100,7 +1090,7 @@ void test_vst2q_lane_bf16(bfloat16_t *ptr, bfloat16x8x2_t val) {
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x bfloat>] [[VAL_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x bfloat>] [[VAL_COERCE]], 1
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x bfloat>] [[VAL_COERCE]], 2
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st3.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st3.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst3_bf16(
@@ -1111,7 +1101,7 @@ void test_vst2q_lane_bf16(bfloat16_t *ptr, bfloat16x8x2_t val) {
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_0_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_2_EXTRACT]] to <4 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst3.p0.v4bf16(ptr noundef [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]], i32 2)
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst3.p0.v4bf16(ptr [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]], i32 2)
 // CHECK32-NEXT:    ret void
 //
 void test_vst3_bf16(bfloat16_t *ptr, bfloat16x4x3_t val) {
@@ -1123,7 +1113,7 @@ void test_vst3_bf16(bfloat16_t *ptr, bfloat16x4x3_t val) {
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x bfloat>] [[VAL_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x bfloat>] [[VAL_COERCE]], 1
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x bfloat>] [[VAL_COERCE]], 2
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st3.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st3.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst3q_bf16(
@@ -1134,7 +1124,7 @@ void test_vst3_bf16(bfloat16_t *ptr, bfloat16x4x3_t val) {
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_0_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_2_EXTRACT]] to <8 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst3.p0.v8bf16(ptr noundef [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]], i32 2)
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst3.p0.v8bf16(ptr [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]], i32 2)
 // CHECK32-NEXT:    ret void
 //
 void test_vst3q_bf16(bfloat16_t *ptr, bfloat16x8x3_t val) {
@@ -1146,7 +1136,7 @@ void test_vst3q_bf16(bfloat16_t *ptr, bfloat16x8x3_t val) {
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x bfloat>] [[VAL_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x bfloat>] [[VAL_COERCE]], 1
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x bfloat>] [[VAL_COERCE]], 2
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st3lane.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], i64 1, ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st3lane.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], i64 1, ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst3_lane_bf16(
@@ -1157,7 +1147,7 @@ void test_vst3q_bf16(bfloat16_t *ptr, bfloat16x8x3_t val) {
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_0_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_2_EXTRACT]] to <4 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst3lane.p0.v4bf16(ptr noundef [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]], i32 1, i32 2)
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst3lane.p0.v4bf16(ptr [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]], i32 1, i32 2)
 // CHECK32-NEXT:    ret void
 //
 void test_vst3_lane_bf16(bfloat16_t *ptr, bfloat16x4x3_t val) {
@@ -1169,7 +1159,7 @@ void test_vst3_lane_bf16(bfloat16_t *ptr, bfloat16x4x3_t val) {
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x bfloat>] [[VAL_COERCE:%.*]], 0
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x bfloat>] [[VAL_COERCE]], 1
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x bfloat>] [[VAL_COERCE]], 2
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st3lane.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], i64 7, ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st3lane.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], i64 7, ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst3q_lane_bf16(
@@ -1180,7 +1170,7 @@ void test_vst3_lane_bf16(bfloat16_t *ptr, bfloat16x4x3_t val) {
 // CHECK32-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_0_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_2_EXTRACT]] to <8 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst3lane.p0.v8bf16(ptr noundef [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]], i32 7, i32 2)
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst3lane.p0.v8bf16(ptr [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]], i32 7, i32 2)
 // CHECK32-NEXT:    ret void
 //
 void test_vst3q_lane_bf16(bfloat16_t *ptr, bfloat16x8x3_t val) {
@@ -1193,7 +1183,7 @@ void test_vst3q_lane_bf16(bfloat16_t *ptr, bfloat16x8x3_t val) {
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x bfloat>] [[VAL_COERCE]], 1
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x bfloat>] [[VAL_COERCE]], 2
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x bfloat>] [[VAL_COERCE]], 3
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st4.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_3_EXTRACT]], ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st4.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_3_EXTRACT]], ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst4_bf16(
@@ -1206,7 +1196,7 @@ void test_vst3q_lane_bf16(bfloat16_t *ptr, bfloat16x8x3_t val) {
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_2_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_3_EXTRACT]] to <4 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst4.p0.v4bf16(ptr noundef [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]], <4 x bfloat> [[TMP3]], i32 2)
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst4.p0.v4bf16(ptr [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]], <4 x bfloat> [[TMP3]], i32 2)
 // CHECK32-NEXT:    ret void
 //
 void test_vst4_bf16(bfloat16_t *ptr, bfloat16x4x4_t val) {
@@ -1219,7 +1209,7 @@ void test_vst4_bf16(bfloat16_t *ptr, bfloat16x4x4_t val) {
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x bfloat>] [[VAL_COERCE]], 1
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x bfloat>] [[VAL_COERCE]], 2
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x bfloat>] [[VAL_COERCE]], 3
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st4.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_3_EXTRACT]], ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st4.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_3_EXTRACT]], ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst4q_bf16(
@@ -1232,7 +1222,7 @@ void test_vst4_bf16(bfloat16_t *ptr, bfloat16x4x4_t val) {
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_2_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_3_EXTRACT]] to <8 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst4.p0.v8bf16(ptr noundef [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]], <8 x bfloat> [[TMP3]], i32 2)
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst4.p0.v8bf16(ptr [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]], <8 x bfloat> [[TMP3]], i32 2)
 // CHECK32-NEXT:    ret void
 //
 void test_vst4q_bf16(bfloat16_t *ptr, bfloat16x8x4_t val) {
@@ -1245,7 +1235,7 @@ void test_vst4q_bf16(bfloat16_t *ptr, bfloat16x8x4_t val) {
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x bfloat>] [[VAL_COERCE]], 1
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x bfloat>] [[VAL_COERCE]], 2
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x bfloat>] [[VAL_COERCE]], 3
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st4lane.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_3_EXTRACT]], i64 1, ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st4lane.v4bf16.p0(<4 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], <4 x bfloat> [[VAL_COERCE_FCA_3_EXTRACT]], i64 1, ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst4_lane_bf16(
@@ -1258,7 +1248,7 @@ void test_vst4q_bf16(bfloat16_t *ptr, bfloat16x8x4_t val) {
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_2_EXTRACT]] to <4 x bfloat>
 // CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[VAL_COERCE_FCA_3_EXTRACT]] to <4 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst4lane.p0.v4bf16(ptr noundef [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]], <4 x bfloat> [[TMP3]], i32 1, i32 2)
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst4lane.p0.v4bf16(ptr [[PTR:%.*]], <4 x bfloat> [[TMP0]], <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP2]], <4 x bfloat> [[TMP3]], i32 1, i32 2)
 // CHECK32-NEXT:    ret void
 //
 void test_vst4_lane_bf16(bfloat16_t *ptr, bfloat16x4x4_t val) {
@@ -1271,7 +1261,7 @@ void test_vst4_lane_bf16(bfloat16_t *ptr, bfloat16x4x4_t val) {
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x bfloat>] [[VAL_COERCE]], 1
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x bfloat>] [[VAL_COERCE]], 2
 // CHECK64-NEXT:    [[VAL_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x bfloat>] [[VAL_COERCE]], 3
-// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st4lane.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_3_EXTRACT]], i64 7, ptr noundef [[PTR:%.*]])
+// CHECK64-NEXT:    tail call void @llvm.aarch64.neon.st4lane.v8bf16.p0(<8 x bfloat> [[VAL_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_1_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_2_EXTRACT]], <8 x bfloat> [[VAL_COERCE_FCA_3_EXTRACT]], i64 7, ptr [[PTR:%.*]])
 // CHECK64-NEXT:    ret void
 //
 // CHECK32-LABEL: @test_vst4q_lane_bf16(
@@ -1284,7 +1274,7 @@ void test_vst4_lane_bf16(bfloat16_t *ptr, bfloat16x4x4_t val) {
 // CHECK32-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_1_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_2_EXTRACT]] to <8 x bfloat>
 // CHECK32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[VAL_COERCE_FCA_3_EXTRACT]] to <8 x bfloat>
-// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst4lane.p0.v8bf16(ptr noundef [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]], <8 x bfloat> [[TMP3]], i32 7, i32 2)
+// CHECK32-NEXT:    tail call void @llvm.arm.neon.vst4lane.p0.v8bf16(ptr [[PTR:%.*]], <8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]], <8 x bfloat> [[TMP3]], i32 7, i32 2)
 // CHECK32-NEXT:    ret void
 //
 void test_vst4q_lane_bf16(bfloat16_t *ptr, bfloat16x8x4_t val) {

diff  --git a/clang/test/CodeGen/aarch64-ls64-inline-asm.c b/clang/test/CodeGen/aarch64-ls64-inline-asm.c
index e59f08d694ecb..ae07b876f50b2 100644
--- a/clang/test/CodeGen/aarch64-ls64-inline-asm.c
+++ b/clang/test/CodeGen/aarch64-ls64-inline-asm.c
@@ -5,7 +5,7 @@ struct foo { unsigned long long x[8]; };
 
 // CHECK-LABEL: @load(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr noundef [[ADDR:%.*]]) #[[ATTR1:[0-9]+]], !srcloc !2
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr [[ADDR:%.*]]) #[[ATTR1:[0-9]+]], !srcloc !2
 // CHECK-NEXT:    store i512 [[TMP0]], ptr [[OUTPUT:%.*]], align 8
 // CHECK-NEXT:    ret void
 //
@@ -16,8 +16,8 @@ void load(struct foo *output, void *addr)
 
 // CHECK-LABEL: @store(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i512, ptr [[INPUT:%.*]], align 8
-// CHECK-NEXT:    tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[TMP0]], ptr noundef [[ADDR:%.*]]) #[[ATTR1]], !srcloc !3
+// CHECK-NEXT:    [[TMP1:%.*]] = load i512, ptr [[INPUT:%.*]], align 8
+// CHECK-NEXT:    tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[TMP1]], ptr [[ADDR:%.*]]) #[[ATTR1]], !srcloc !3
 // CHECK-NEXT:    ret void
 //
 void store(const struct foo *input, void *addr)
@@ -72,7 +72,7 @@ void store(const struct foo *input, void *addr)
 // CHECK-NEXT:    [[S_SROA_0_0_INSERT_EXT:%.*]] = zext i64 [[CONV]] to i512
 // CHECK-NEXT:    [[S_SROA_0_0_INSERT_MASK:%.*]] = or i512 [[S_SROA_4_0_INSERT_MASK]], [[S_SROA_4_0_INSERT_SHIFT]]
 // CHECK-NEXT:    [[S_SROA_0_0_INSERT_INSERT:%.*]] = or i512 [[S_SROA_0_0_INSERT_MASK]], [[S_SROA_0_0_INSERT_EXT]]
-// CHECK-NEXT:    tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[S_SROA_0_0_INSERT_INSERT]], ptr noundef [[ADDR:%.*]]) #[[ATTR1]], !srcloc !8
+// CHECK-NEXT:    tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[S_SROA_0_0_INSERT_INSERT]], ptr [[ADDR:%.*]]) #[[ATTR1]], !srcloc !8
 // CHECK-NEXT:    ret void
 //
 void store2(int *in, void *addr)

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1.c
index eb113c3851362..e02d1a77ec6f5 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1.c
@@ -114,13 +114,13 @@ svfloat64_t test_svldnt1_gather_u64base_f64(svbool_t pg, svuint64_t bases) {
 // CHECK-LABEL: @test_svldnt1_gather_s64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z33test_svldnt1_gather_s64offset_s64u10__SVBool_tPKlu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 svint64_t test_svldnt1_gather_s64offset_s64(svbool_t pg, const int64_t *base, svint64_t offsets) {
@@ -130,13 +130,13 @@ svint64_t test_svldnt1_gather_s64offset_s64(svbool_t pg, const int64_t *base, sv
 // CHECK-LABEL: @test_svldnt1_gather_s64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z33test_svldnt1_gather_s64offset_u64u10__SVBool_tPKmu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 svuint64_t test_svldnt1_gather_s64offset_u64(svbool_t pg, const uint64_t *base, svint64_t offsets) {
@@ -146,13 +146,13 @@ svuint64_t test_svldnt1_gather_s64offset_u64(svbool_t pg, const uint64_t *base,
 // CHECK-LABEL: @test_svldnt1_gather_s64offset_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z33test_svldnt1_gather_s64offset_f64u10__SVBool_tPKdu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 svfloat64_t test_svldnt1_gather_s64offset_f64(svbool_t pg, const float64_t *base, svint64_t offsets) {
@@ -162,13 +162,13 @@ svfloat64_t test_svldnt1_gather_s64offset_f64(svbool_t pg, const float64_t *base
 // CHECK-LABEL: @test_svldnt1_gather_u32offset_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z33test_svldnt1_gather_u32offset_s32u10__SVBool_tPKiu12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 svint32_t test_svldnt1_gather_u32offset_s32(svbool_t pg, const int32_t *base, svuint32_t offsets) {
@@ -178,13 +178,13 @@ svint32_t test_svldnt1_gather_u32offset_s32(svbool_t pg, const int32_t *base, sv
 // CHECK-LABEL: @test_svldnt1_gather_u64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z33test_svldnt1_gather_u64offset_s64u10__SVBool_tPKlu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 svint64_t test_svldnt1_gather_u64offset_s64(svbool_t pg, const int64_t *base, svuint64_t offsets) {
@@ -194,13 +194,13 @@ svint64_t test_svldnt1_gather_u64offset_s64(svbool_t pg, const int64_t *base, sv
 // CHECK-LABEL: @test_svldnt1_gather_u32offset_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z33test_svldnt1_gather_u32offset_u32u10__SVBool_tPKju12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 svuint32_t test_svldnt1_gather_u32offset_u32(svbool_t pg, const uint32_t *base, svuint32_t offsets) {
@@ -210,13 +210,13 @@ svuint32_t test_svldnt1_gather_u32offset_u32(svbool_t pg, const uint32_t *base,
 // CHECK-LABEL: @test_svldnt1_gather_u64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z33test_svldnt1_gather_u64offset_u64u10__SVBool_tPKmu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 svuint64_t test_svldnt1_gather_u64offset_u64(svbool_t pg, const uint64_t *base, svuint64_t offsets) {
@@ -226,13 +226,13 @@ svuint64_t test_svldnt1_gather_u64offset_u64(svbool_t pg, const uint64_t *base,
 // CHECK-LABEL: @test_svldnt1_gather_u32offset_f32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z33test_svldnt1_gather_u32offset_f32u10__SVBool_tPKfu12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 svfloat32_t test_svldnt1_gather_u32offset_f32(svbool_t pg, const float32_t *base, svuint32_t offsets) {
@@ -242,13 +242,13 @@ svfloat32_t test_svldnt1_gather_u32offset_f32(svbool_t pg, const float32_t *base
 // CHECK-LABEL: @test_svldnt1_gather_u64offset_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z33test_svldnt1_gather_u64offset_f64u10__SVBool_tPKdu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 svfloat64_t test_svldnt1_gather_u64offset_f64(svbool_t pg, const float64_t *base, svuint64_t offsets) {
@@ -258,13 +258,13 @@ svfloat64_t test_svldnt1_gather_u64offset_f64(svbool_t pg, const float64_t *base
 // CHECK-LABEL: @test_svldnt1_gather_u32base_offset_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z38test_svldnt1_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 svint32_t test_svldnt1_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) {
@@ -274,13 +274,13 @@ svint32_t test_svldnt1_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases,
 // CHECK-LABEL: @test_svldnt1_gather_u64base_offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z38test_svldnt1_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 svint64_t test_svldnt1_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) {
@@ -290,13 +290,13 @@ svint64_t test_svldnt1_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases,
 // CHECK-LABEL: @test_svldnt1_gather_u32base_offset_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z38test_svldnt1_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 svuint32_t test_svldnt1_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) {
@@ -306,13 +306,13 @@ svuint32_t test_svldnt1_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases,
 // CHECK-LABEL: @test_svldnt1_gather_u64base_offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z38test_svldnt1_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 svuint64_t test_svldnt1_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
@@ -322,13 +322,13 @@ svuint64_t test_svldnt1_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases,
 // CHECK-LABEL: @test_svldnt1_gather_u32base_offset_f32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z38test_svldnt1_gather_u32base_offset_f32u10__SVBool_tu12__SVUint32_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 svfloat32_t test_svldnt1_gather_u32base_offset_f32(svbool_t pg, svuint32_t bases, int64_t offset) {
@@ -338,13 +338,13 @@ svfloat32_t test_svldnt1_gather_u32base_offset_f32(svbool_t pg, svuint32_t bases
 // CHECK-LABEL: @test_svldnt1_gather_u64base_offset_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z38test_svldnt1_gather_u64base_offset_f64u10__SVBool_tu12__SVUint64_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 svfloat64_t test_svldnt1_gather_u64base_offset_f64(svbool_t pg, svuint64_t bases, int64_t offset) {
@@ -354,13 +354,13 @@ svfloat64_t test_svldnt1_gather_u64base_offset_f64(svbool_t pg, svuint64_t bases
 // CHECK-LABEL: @test_svldnt1_gather_s64index_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z32test_svldnt1_gather_s64index_s64u10__SVBool_tPKlu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 svint64_t test_svldnt1_gather_s64index_s64(svbool_t pg, const int64_t *base, svint64_t indices) {
@@ -370,13 +370,13 @@ svint64_t test_svldnt1_gather_s64index_s64(svbool_t pg, const int64_t *base, svi
 // CHECK-LABEL: @test_svldnt1_gather_s64index_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z32test_svldnt1_gather_s64index_u64u10__SVBool_tPKmu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 svuint64_t test_svldnt1_gather_s64index_u64(svbool_t pg, const uint64_t *base, svint64_t indices) {
@@ -386,13 +386,13 @@ svuint64_t test_svldnt1_gather_s64index_u64(svbool_t pg, const uint64_t *base, s
 // CHECK-LABEL: @test_svldnt1_gather_s64index_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z32test_svldnt1_gather_s64index_f64u10__SVBool_tPKdu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 svfloat64_t test_svldnt1_gather_s64index_f64(svbool_t pg, const float64_t *base, svint64_t indices) {
@@ -402,13 +402,13 @@ svfloat64_t test_svldnt1_gather_s64index_f64(svbool_t pg, const float64_t *base,
 // CHECK-LABEL: @test_svldnt1_gather_u64index_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z32test_svldnt1_gather_u64index_s64u10__SVBool_tPKlu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 svint64_t test_svldnt1_gather_u64index_s64(svbool_t pg, const int64_t *base, svuint64_t indices) {
@@ -418,13 +418,13 @@ svint64_t test_svldnt1_gather_u64index_s64(svbool_t pg, const int64_t *base, svu
 // CHECK-LABEL: @test_svldnt1_gather_u64index_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z32test_svldnt1_gather_u64index_u64u10__SVBool_tPKmu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 svuint64_t test_svldnt1_gather_u64index_u64(svbool_t pg, const uint64_t *base, svuint64_t indices) {
@@ -434,13 +434,13 @@ svuint64_t test_svldnt1_gather_u64index_u64(svbool_t pg, const uint64_t *base, s
 // CHECK-LABEL: @test_svldnt1_gather_u64index_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z32test_svldnt1_gather_u64index_f64u10__SVBool_tPKdu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr noundef readonly [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.index.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 svfloat64_t test_svldnt1_gather_u64index_f64(svbool_t pg, const float64_t *base, svuint64_t indices) {

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sb.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sb.c
index 54f4e8bdda7fa..b64c7999ba7a2 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sb.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sb.c
@@ -90,14 +90,14 @@ svuint64_t test_svldnt1sb_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
 // CHECK-LABEL: @test_svldnt1sb_gather_s64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sb_gather_s64offset_s64u10__SVBool_tPKau11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -108,14 +108,14 @@ svint64_t test_svldnt1sb_gather_s64offset_s64(svbool_t pg, const int8_t *base, s
 // CHECK-LABEL: @test_svldnt1sb_gather_s64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sb_gather_s64offset_u64u10__SVBool_tPKau11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -126,14 +126,14 @@ svuint64_t test_svldnt1sb_gather_s64offset_u64(svbool_t pg, const int8_t *base,
 // CHECK-LABEL: @test_svldnt1sb_gather_u32offset_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sb_gather_u32offset_s32u10__SVBool_tPKau12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -144,14 +144,14 @@ svint32_t test_svldnt1sb_gather_u32offset_s32(svbool_t pg, const int8_t *base, s
 // CHECK-LABEL: @test_svldnt1sb_gather_u64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sb_gather_u64offset_s64u10__SVBool_tPKau12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -162,14 +162,14 @@ svint64_t test_svldnt1sb_gather_u64offset_s64(svbool_t pg, const int8_t *base, s
 // CHECK-LABEL: @test_svldnt1sb_gather_u32offset_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sb_gather_u32offset_u32u10__SVBool_tPKau12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -180,14 +180,14 @@ svuint32_t test_svldnt1sb_gather_u32offset_u32(svbool_t pg, const int8_t *base,
 // CHECK-LABEL: @test_svldnt1sb_gather_u64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sb_gather_u64offset_u64u10__SVBool_tPKau12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -198,14 +198,14 @@ svuint64_t test_svldnt1sb_gather_u64offset_u64(svbool_t pg, const int8_t *base,
 // CHECK-LABEL: @test_svldnt1sb_gather_u32base_offset_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1sb_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -216,14 +216,14 @@ svint32_t test_svldnt1sb_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases
 // CHECK-LABEL: @test_svldnt1sb_gather_u64base_offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1sb_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -234,14 +234,14 @@ svint64_t test_svldnt1sb_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases
 // CHECK-LABEL: @test_svldnt1sb_gather_u32base_offset_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1sb_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -252,14 +252,14 @@ svuint32_t test_svldnt1sb_gather_u32base_offset_u32(svbool_t pg, svuint32_t base
 // CHECK-LABEL: @test_svldnt1sb_gather_u64base_offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1sb_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sh.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sh.c
index 0383b9e879cb0..f823311efbb65 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sh.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sh.c
@@ -90,14 +90,14 @@ svuint64_t test_svldnt1sh_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
 // CHECK-LABEL: @test_svldnt1sh_gather_s64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sh_gather_s64offset_s64u10__SVBool_tPKsu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -108,14 +108,14 @@ svint64_t test_svldnt1sh_gather_s64offset_s64(svbool_t pg, const int16_t *base,
 // CHECK-LABEL: @test_svldnt1sh_gather_s64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sh_gather_s64offset_u64u10__SVBool_tPKsu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -126,14 +126,14 @@ svuint64_t test_svldnt1sh_gather_s64offset_u64(svbool_t pg, const int16_t *base,
 // CHECK-LABEL: @test_svldnt1sh_gather_u32offset_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sh_gather_u32offset_s32u10__SVBool_tPKsu12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -144,14 +144,14 @@ svint32_t test_svldnt1sh_gather_u32offset_s32(svbool_t pg, const int16_t *base,
 // CHECK-LABEL: @test_svldnt1sh_gather_u64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sh_gather_u64offset_s64u10__SVBool_tPKsu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -162,14 +162,14 @@ svint64_t test_svldnt1sh_gather_u64offset_s64(svbool_t pg, const int16_t *base,
 // CHECK-LABEL: @test_svldnt1sh_gather_u32offset_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sh_gather_u32offset_u32u10__SVBool_tPKsu12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -180,14 +180,14 @@ svuint32_t test_svldnt1sh_gather_u32offset_u32(svbool_t pg, const int16_t *base,
 // CHECK-LABEL: @test_svldnt1sh_gather_u64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sh_gather_u64offset_u64u10__SVBool_tPKsu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -198,14 +198,14 @@ svuint64_t test_svldnt1sh_gather_u64offset_u64(svbool_t pg, const int16_t *base,
 // CHECK-LABEL: @test_svldnt1sh_gather_u32base_offset_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1sh_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -216,14 +216,14 @@ svint32_t test_svldnt1sh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases
 // CHECK-LABEL: @test_svldnt1sh_gather_u64base_offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1sh_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -234,14 +234,14 @@ svint64_t test_svldnt1sh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases
 // CHECK-LABEL: @test_svldnt1sh_gather_u32base_offset_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1sh_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -252,14 +252,14 @@ svuint32_t test_svldnt1sh_gather_u32base_offset_u32(svbool_t pg, svuint32_t base
 // CHECK-LABEL: @test_svldnt1sh_gather_u64base_offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1sh_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -270,14 +270,14 @@ svuint64_t test_svldnt1sh_gather_u64base_offset_u64(svbool_t pg, svuint64_t base
 // CHECK-LABEL: @test_svldnt1sh_gather_s64index_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1sh_gather_s64index_s64u10__SVBool_tPKsu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -288,14 +288,14 @@ svint64_t test_svldnt1sh_gather_s64index_s64(svbool_t pg, const int16_t *base, s
 // CHECK-LABEL: @test_svldnt1sh_gather_s64index_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1sh_gather_s64index_u64u10__SVBool_tPKsu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -306,14 +306,14 @@ svuint64_t test_svldnt1sh_gather_s64index_u64(svbool_t pg, const int16_t *base,
 // CHECK-LABEL: @test_svldnt1sh_gather_u64index_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1sh_gather_u64index_s64u10__SVBool_tPKsu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -324,14 +324,14 @@ svint64_t test_svldnt1sh_gather_u64index_s64(svbool_t pg, const int16_t *base, s
 // CHECK-LABEL: @test_svldnt1sh_gather_u64index_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1sh_gather_u64index_u64u10__SVBool_tPKsu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sw.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sw.c
index c772d7a07d546..1b7825ad97459 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sw.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1sw.c
@@ -54,14 +54,14 @@ svuint64_t test_svldnt1sw_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
 // CHECK-LABEL: @test_svldnt1sw_gather_s64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sw_gather_s64offset_s64u10__SVBool_tPKiu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -72,14 +72,14 @@ svint64_t test_svldnt1sw_gather_s64offset_s64(svbool_t pg, const int32_t *base,
 // CHECK-LABEL: @test_svldnt1sw_gather_s64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sw_gather_s64offset_u64u10__SVBool_tPKiu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -90,14 +90,14 @@ svuint64_t test_svldnt1sw_gather_s64offset_u64(svbool_t pg, const int32_t *base,
 // CHECK-LABEL: @test_svldnt1sw_gather_u64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sw_gather_u64offset_s64u10__SVBool_tPKiu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -108,14 +108,14 @@ svint64_t test_svldnt1sw_gather_u64offset_s64(svbool_t pg, const int32_t *base,
 // CHECK-LABEL: @test_svldnt1sw_gather_u64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1sw_gather_u64offset_u64u10__SVBool_tPKiu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -126,14 +126,14 @@ svuint64_t test_svldnt1sw_gather_u64offset_u64(svbool_t pg, const int32_t *base,
 // CHECK-LABEL: @test_svldnt1sw_gather_u64base_offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1sw_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -144,14 +144,14 @@ svint64_t test_svldnt1sw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases
 // CHECK-LABEL: @test_svldnt1sw_gather_u64base_offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1sw_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -162,14 +162,14 @@ svuint64_t test_svldnt1sw_gather_u64base_offset_u64(svbool_t pg, svuint64_t base
 // CHECK-LABEL: @test_svldnt1sw_gather_s64index_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1sw_gather_s64index_s64u10__SVBool_tPKiu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -180,14 +180,14 @@ svint64_t test_svldnt1sw_gather_s64index_s64(svbool_t pg, const int32_t *base, s
 // CHECK-LABEL: @test_svldnt1sw_gather_s64index_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1sw_gather_s64index_u64u10__SVBool_tPKiu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -198,14 +198,14 @@ svuint64_t test_svldnt1sw_gather_s64index_u64(svbool_t pg, const int32_t *base,
 // CHECK-LABEL: @test_svldnt1sw_gather_u64index_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1sw_gather_u64index_s64u10__SVBool_tPKiu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -216,14 +216,14 @@ svint64_t test_svldnt1sw_gather_u64index_s64(svbool_t pg, const int32_t *base, s
 // CHECK-LABEL: @test_svldnt1sw_gather_u64index_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1sw_gather_u64index_u64u10__SVBool_tPKiu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1ub.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1ub.c
index 7a349a42f221a..5b800a447fd39 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1ub.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1ub.c
@@ -90,14 +90,14 @@ svuint64_t test_svldnt1ub_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
 // CHECK-LABEL: @test_svldnt1ub_gather_s64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1ub_gather_s64offset_s64u10__SVBool_tPKhu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -108,14 +108,14 @@ svint64_t test_svldnt1ub_gather_s64offset_s64(svbool_t pg, const uint8_t *base,
 // CHECK-LABEL: @test_svldnt1ub_gather_s64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1ub_gather_s64offset_u64u10__SVBool_tPKhu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -126,14 +126,14 @@ svuint64_t test_svldnt1ub_gather_s64offset_u64(svbool_t pg, const uint8_t *base,
 // CHECK-LABEL: @test_svldnt1ub_gather_u32offset_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1ub_gather_u32offset_s32u10__SVBool_tPKhu12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -144,14 +144,14 @@ svint32_t test_svldnt1ub_gather_u32offset_s32(svbool_t pg, const uint8_t *base,
 // CHECK-LABEL: @test_svldnt1ub_gather_u64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1ub_gather_u64offset_s64u10__SVBool_tPKhu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -162,14 +162,14 @@ svint64_t test_svldnt1ub_gather_u64offset_s64(svbool_t pg, const uint8_t *base,
 // CHECK-LABEL: @test_svldnt1ub_gather_u32offset_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1ub_gather_u32offset_u32u10__SVBool_tPKhu12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -180,14 +180,14 @@ svuint32_t test_svldnt1ub_gather_u32offset_u32(svbool_t pg, const uint8_t *base,
 // CHECK-LABEL: @test_svldnt1ub_gather_u64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1ub_gather_u64offset_u64u10__SVBool_tPKhu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -198,14 +198,14 @@ svuint64_t test_svldnt1ub_gather_u64offset_u64(svbool_t pg, const uint8_t *base,
 // CHECK-LABEL: @test_svldnt1ub_gather_u32base_offset_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1ub_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -216,14 +216,14 @@ svint32_t test_svldnt1ub_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases
 // CHECK-LABEL: @test_svldnt1ub_gather_u64base_offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1ub_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -234,14 +234,14 @@ svint64_t test_svldnt1ub_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases
 // CHECK-LABEL: @test_svldnt1ub_gather_u32base_offset_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1ub_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -252,14 +252,14 @@ svuint32_t test_svldnt1ub_gather_u32base_offset_u32(svbool_t pg, svuint32_t base
 // CHECK-LABEL: @test_svldnt1ub_gather_u64base_offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1ub_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uh.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uh.c
index 449a7a58b3985..c50febdbc5499 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uh.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uh.c
@@ -90,14 +90,14 @@ svuint64_t test_svldnt1uh_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
 // CHECK-LABEL: @test_svldnt1uh_gather_s64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1uh_gather_s64offset_s64u10__SVBool_tPKtu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -108,14 +108,14 @@ svint64_t test_svldnt1uh_gather_s64offset_s64(svbool_t pg, const uint16_t *base,
 // CHECK-LABEL: @test_svldnt1uh_gather_s64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1uh_gather_s64offset_u64u10__SVBool_tPKtu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -126,14 +126,14 @@ svuint64_t test_svldnt1uh_gather_s64offset_u64(svbool_t pg, const uint16_t *base
 // CHECK-LABEL: @test_svldnt1uh_gather_u32offset_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1uh_gather_u32offset_s32u10__SVBool_tPKtu12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -144,14 +144,14 @@ svint32_t test_svldnt1uh_gather_u32offset_s32(svbool_t pg, const uint16_t *base,
 // CHECK-LABEL: @test_svldnt1uh_gather_u64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1uh_gather_u64offset_s64u10__SVBool_tPKtu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -162,14 +162,14 @@ svint64_t test_svldnt1uh_gather_u64offset_s64(svbool_t pg, const uint16_t *base,
 // CHECK-LABEL: @test_svldnt1uh_gather_u32offset_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1uh_gather_u32offset_u32u10__SVBool_tPKtu12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -180,14 +180,14 @@ svuint32_t test_svldnt1uh_gather_u32offset_u32(svbool_t pg, const uint16_t *base
 // CHECK-LABEL: @test_svldnt1uh_gather_u64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1uh_gather_u64offset_u64u10__SVBool_tPKtu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -198,14 +198,14 @@ svuint64_t test_svldnt1uh_gather_u64offset_u64(svbool_t pg, const uint16_t *base
 // CHECK-LABEL: @test_svldnt1uh_gather_u32base_offset_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1uh_gather_u32base_offset_s32u10__SVBool_tu12__SVUint32_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -216,14 +216,14 @@ svint32_t test_svldnt1uh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases
 // CHECK-LABEL: @test_svldnt1uh_gather_u64base_offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1uh_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -234,14 +234,14 @@ svint64_t test_svldnt1uh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases
 // CHECK-LABEL: @test_svldnt1uh_gather_u32base_offset_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1uh_gather_u32base_offset_u32u10__SVBool_tu12__SVUint32_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
@@ -252,14 +252,14 @@ svuint32_t test_svldnt1uh_gather_u32base_offset_u32(svbool_t pg, svuint32_t base
 // CHECK-LABEL: @test_svldnt1uh_gather_u64base_offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1uh_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -270,14 +270,14 @@ svuint64_t test_svldnt1uh_gather_u64base_offset_u64(svbool_t pg, svuint64_t base
 // CHECK-LABEL: @test_svldnt1uh_gather_s64index_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1uh_gather_s64index_s64u10__SVBool_tPKtu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -288,14 +288,14 @@ svint64_t test_svldnt1uh_gather_s64index_s64(svbool_t pg, const uint16_t *base,
 // CHECK-LABEL: @test_svldnt1uh_gather_s64index_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1uh_gather_s64index_u64u10__SVBool_tPKtu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -306,14 +306,14 @@ svuint64_t test_svldnt1uh_gather_s64index_u64(svbool_t pg, const uint16_t *base,
 // CHECK-LABEL: @test_svldnt1uh_gather_u64index_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1uh_gather_u64index_s64u10__SVBool_tPKtu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -324,14 +324,14 @@ svint64_t test_svldnt1uh_gather_u64index_s64(svbool_t pg, const uint16_t *base,
 // CHECK-LABEL: @test_svldnt1uh_gather_u64index_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1uh_gather_u64index_u64u10__SVBool_tPKtu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uw.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uw.c
index 4c512de474179..1030f125e191a 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uw.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_ldnt1uw.c
@@ -54,14 +54,14 @@ svuint64_t test_svldnt1uw_gather_u64base_u64(svbool_t pg, svuint64_t bases) {
 // CHECK-LABEL: @test_svldnt1uw_gather_s64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1uw_gather_s64offset_s64u10__SVBool_tPKju11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -72,14 +72,14 @@ svint64_t test_svldnt1uw_gather_s64offset_s64(svbool_t pg, const uint32_t *base,
 // CHECK-LABEL: @test_svldnt1uw_gather_s64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1uw_gather_s64offset_u64u10__SVBool_tPKju11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -90,14 +90,14 @@ svuint64_t test_svldnt1uw_gather_s64offset_u64(svbool_t pg, const uint32_t *base
 // CHECK-LABEL: @test_svldnt1uw_gather_u64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1uw_gather_u64offset_s64u10__SVBool_tPKju12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -108,14 +108,14 @@ svint64_t test_svldnt1uw_gather_u64offset_s64(svbool_t pg, const uint32_t *base,
 // CHECK-LABEL: @test_svldnt1uw_gather_u64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z35test_svldnt1uw_gather_u64offset_u64u10__SVBool_tPKju12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -126,14 +126,14 @@ svuint64_t test_svldnt1uw_gather_u64offset_u64(svbool_t pg, const uint32_t *base
 // CHECK-LABEL: @test_svldnt1uw_gather_u64base_offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1uw_gather_u64base_offset_s64u10__SVBool_tu12__SVUint64_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -144,14 +144,14 @@ svint64_t test_svldnt1uw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases
 // CHECK-LABEL: @test_svldnt1uw_gather_u64base_offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z40test_svldnt1uw_gather_u64base_offset_u64u10__SVBool_tu12__SVUint64_tl(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -162,14 +162,14 @@ svuint64_t test_svldnt1uw_gather_u64base_offset_u64(svbool_t pg, svuint64_t base
 // CHECK-LABEL: @test_svldnt1uw_gather_s64index_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1uw_gather_s64index_s64u10__SVBool_tPKju11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -180,14 +180,14 @@ svint64_t test_svldnt1uw_gather_s64index_s64(svbool_t pg, const uint32_t *base,
 // CHECK-LABEL: @test_svldnt1uw_gather_s64index_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1uw_gather_s64index_u64u10__SVBool_tPKju11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -198,14 +198,14 @@ svuint64_t test_svldnt1uw_gather_s64index_u64(svbool_t pg, const uint32_t *base,
 // CHECK-LABEL: @test_svldnt1uw_gather_u64index_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1uw_gather_u64index_s64u10__SVBool_tPKju12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
@@ -216,14 +216,14 @@ svint64_t test_svldnt1uw_gather_u64index_s64(svbool_t pg, const uint32_t *base,
 // CHECK-LABEL: @test_svldnt1uw_gather_u64index_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
 // CPP-CHECK-LABEL: @_Z34test_svldnt1uw_gather_u64index_u64u10__SVBool_tPKju12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1.c
index c24250beb9bb4..d866c8ac0e4f5 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1.c
@@ -114,13 +114,13 @@ void test_svstnt1_scatter_u64base_f64(svbool_t pg, svuint64_t bases, svfloat64_t
 // CHECK-LABEL: @test_svstnt1_scatter_s64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1_scatter_s64offset_s64u10__SVBool_tPlu11__SVInt64_tu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_s64offset_s64(svbool_t pg, int64_t *base, svint64_t offsets, svint64_t data) {
@@ -130,13 +130,13 @@ void test_svstnt1_scatter_s64offset_s64(svbool_t pg, int64_t *base, svint64_t of
 // CHECK-LABEL: @test_svstnt1_scatter_s64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1_scatter_s64offset_u64u10__SVBool_tPmu11__SVInt64_tu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_s64offset_u64(svbool_t pg, uint64_t *base, svint64_t offsets, svuint64_t data) {
@@ -146,13 +146,13 @@ void test_svstnt1_scatter_s64offset_u64(svbool_t pg, uint64_t *base, svint64_t o
 // CHECK-LABEL: @test_svstnt1_scatter_s64offset_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1_scatter_s64offset_f64u10__SVBool_tPdu11__SVInt64_tu13__SVFloat64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_s64offset_f64(svbool_t pg, float64_t *base, svint64_t offsets, svfloat64_t data) {
@@ -162,13 +162,13 @@ void test_svstnt1_scatter_s64offset_f64(svbool_t pg, float64_t *base, svint64_t
 // CHECK-LABEL: @test_svstnt1_scatter_u32offset_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1_scatter_u32offset_s32u10__SVBool_tPiu12__SVUint32_tu11__SVInt32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_u32offset_s32(svbool_t pg, int32_t *base, svuint32_t offsets, svint32_t data) {
@@ -178,13 +178,13 @@ void test_svstnt1_scatter_u32offset_s32(svbool_t pg, int32_t *base, svuint32_t o
 // CHECK-LABEL: @test_svstnt1_scatter_u64offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1_scatter_u64offset_s64u10__SVBool_tPlu12__SVUint64_tu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_u64offset_s64(svbool_t pg, int64_t *base, svuint64_t offsets, svint64_t data) {
@@ -194,13 +194,13 @@ void test_svstnt1_scatter_u64offset_s64(svbool_t pg, int64_t *base, svuint64_t o
 // CHECK-LABEL: @test_svstnt1_scatter_u32offset_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1_scatter_u32offset_u32u10__SVBool_tPju12__SVUint32_tu12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_u32offset_u32(svbool_t pg, uint32_t *base, svuint32_t offsets, svuint32_t data) {
@@ -210,13 +210,13 @@ void test_svstnt1_scatter_u32offset_u32(svbool_t pg, uint32_t *base, svuint32_t
 // CHECK-LABEL: @test_svstnt1_scatter_u64offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1_scatter_u64offset_u64u10__SVBool_tPmu12__SVUint64_tu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_u64offset_u64(svbool_t pg, uint64_t *base, svuint64_t offsets, svuint64_t data) {
@@ -226,13 +226,13 @@ void test_svstnt1_scatter_u64offset_u64(svbool_t pg, uint64_t *base, svuint64_t
 // CHECK-LABEL: @test_svstnt1_scatter_u32offset_f32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1_scatter_u32offset_f32u10__SVBool_tPfu12__SVUint32_tu13__SVFloat32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_u32offset_f32(svbool_t pg, float32_t *base, svuint32_t offsets, svfloat32_t data) {
@@ -242,13 +242,13 @@ void test_svstnt1_scatter_u32offset_f32(svbool_t pg, float32_t *base, svuint32_t
 // CHECK-LABEL: @test_svstnt1_scatter_u64offset_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1_scatter_u64offset_f64u10__SVBool_tPdu12__SVUint64_tu13__SVFloat64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_u64offset_f64(svbool_t pg, float64_t *base, svuint64_t offsets, svfloat64_t data) {
@@ -258,13 +258,13 @@ void test_svstnt1_scatter_u64offset_f64(svbool_t pg, float64_t *base, svuint64_t
 // CHECK-LABEL: @test_svstnt1_scatter_u32base_offset_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z39test_svstnt1_scatter_u32base_offset_s32u10__SVBool_tu12__SVUint32_tlu11__SVInt32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data) {
@@ -274,13 +274,13 @@ void test_svstnt1_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int6
 // CHECK-LABEL: @test_svstnt1_scatter_u64base_offset_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z39test_svstnt1_scatter_u64base_offset_s64u10__SVBool_tu12__SVUint64_tlu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) {
@@ -290,13 +290,13 @@ void test_svstnt1_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int6
 // CHECK-LABEL: @test_svstnt1_scatter_u32base_offset_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z39test_svstnt1_scatter_u32base_offset_u32u10__SVBool_tu12__SVUint32_tlu12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data) {
@@ -306,13 +306,13 @@ void test_svstnt1_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int6
 // CHECK-LABEL: @test_svstnt1_scatter_u64base_offset_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z39test_svstnt1_scatter_u64base_offset_u64u10__SVBool_tu12__SVUint64_tlu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) {
@@ -322,13 +322,13 @@ void test_svstnt1_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int6
 // CHECK-LABEL: @test_svstnt1_scatter_u32base_offset_f32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z39test_svstnt1_scatter_u32base_offset_f32u10__SVBool_tu12__SVUint32_tlu13__SVFloat32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_u32base_offset_f32(svbool_t pg, svuint32_t bases, int64_t offset, svfloat32_t data) {
@@ -338,13 +338,13 @@ void test_svstnt1_scatter_u32base_offset_f32(svbool_t pg, svuint32_t bases, int6
 // CHECK-LABEL: @test_svstnt1_scatter_u64base_offset_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z39test_svstnt1_scatter_u64base_offset_f64u10__SVBool_tu12__SVUint64_tlu13__SVFloat64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_u64base_offset_f64(svbool_t pg, svuint64_t bases, int64_t offset, svfloat64_t data) {
@@ -354,13 +354,13 @@ void test_svstnt1_scatter_u64base_offset_f64(svbool_t pg, svuint64_t bases, int6
 // CHECK-LABEL: @test_svstnt1_scatter_s64index_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z33test_svstnt1_scatter_s64index_s64u10__SVBool_tPlu11__SVInt64_tu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_s64index_s64(svbool_t pg, int64_t *base, svint64_t indices, svint64_t data) {
@@ -370,13 +370,13 @@ void test_svstnt1_scatter_s64index_s64(svbool_t pg, int64_t *base, svint64_t ind
 // CHECK-LABEL: @test_svstnt1_scatter_s64index_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z33test_svstnt1_scatter_s64index_u64u10__SVBool_tPmu11__SVInt64_tu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_s64index_u64(svbool_t pg, uint64_t *base, svint64_t indices, svuint64_t data) {
@@ -386,13 +386,13 @@ void test_svstnt1_scatter_s64index_u64(svbool_t pg, uint64_t *base, svint64_t in
 // CHECK-LABEL: @test_svstnt1_scatter_s64index_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z33test_svstnt1_scatter_s64index_f64u10__SVBool_tPdu11__SVInt64_tu13__SVFloat64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_s64index_f64(svbool_t pg, float64_t *base, svint64_t indices, svfloat64_t data) {
@@ -402,13 +402,13 @@ void test_svstnt1_scatter_s64index_f64(svbool_t pg, float64_t *base, svint64_t i
 // CHECK-LABEL: @test_svstnt1_scatter_u64index_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z33test_svstnt1_scatter_u64index_s64u10__SVBool_tPlu12__SVUint64_tu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_u64index_s64(svbool_t pg, int64_t *base, svuint64_t indices, svint64_t data) {
@@ -418,13 +418,13 @@ void test_svstnt1_scatter_u64index_s64(svbool_t pg, int64_t *base, svuint64_t in
 // CHECK-LABEL: @test_svstnt1_scatter_u64index_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z33test_svstnt1_scatter_u64index_u64u10__SVBool_tPmu12__SVUint64_tu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_u64index_u64(svbool_t pg, uint64_t *base, svuint64_t indices, svuint64_t data) {
@@ -434,13 +434,13 @@ void test_svstnt1_scatter_u64index_u64(svbool_t pg, uint64_t *base, svuint64_t i
 // CHECK-LABEL: @test_svstnt1_scatter_u64index_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z33test_svstnt1_scatter_u64index_f64u10__SVBool_tPdu12__SVUint64_tu13__SVFloat64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1_scatter_u64index_f64(svbool_t pg, float64_t *base, svuint64_t indices, svfloat64_t data) {

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1b.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1b.c
index 25a8814c15e1f..d9e0d79b23c04 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1b.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1b.c
@@ -91,14 +91,14 @@ void test_svstnt1b_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1b_scatter_s64offset_s64u10__SVBool_tPau11__SVInt64_tu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1b_scatter_s64offset_s64(svbool_t pg, int8_t *base, svint64_t offsets, svint64_t data) {
@@ -109,14 +109,14 @@ void test_svstnt1b_scatter_s64offset_s64(svbool_t pg, int8_t *base, svint64_t of
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1b_scatter_s64offset_u64u10__SVBool_tPhu11__SVInt64_tu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1b_scatter_s64offset_u64(svbool_t pg, uint8_t *base, svint64_t offsets, svuint64_t data) {
@@ -127,14 +127,14 @@ void test_svstnt1b_scatter_s64offset_u64(svbool_t pg, uint8_t *base, svint64_t o
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1b_scatter_u32offset_s32u10__SVBool_tPau12__SVUint32_tu11__SVInt32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1b_scatter_u32offset_s32(svbool_t pg, int8_t *base, svuint32_t offsets, svint32_t data) {
@@ -145,14 +145,14 @@ void test_svstnt1b_scatter_u32offset_s32(svbool_t pg, int8_t *base, svuint32_t o
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1b_scatter_u64offset_s64u10__SVBool_tPau12__SVUint64_tu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1b_scatter_u64offset_s64(svbool_t pg, int8_t *base, svuint64_t offsets, svint64_t data) {
@@ -163,14 +163,14 @@ void test_svstnt1b_scatter_u64offset_s64(svbool_t pg, int8_t *base, svuint64_t o
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1b_scatter_u32offset_u32u10__SVBool_tPhu12__SVUint32_tu12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1b_scatter_u32offset_u32(svbool_t pg, uint8_t *base, svuint32_t offsets, svuint32_t data) {
@@ -181,14 +181,14 @@ void test_svstnt1b_scatter_u32offset_u32(svbool_t pg, uint8_t *base, svuint32_t
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1b_scatter_u64offset_u64u10__SVBool_tPhu12__SVUint64_tu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1b_scatter_u64offset_u64(svbool_t pg, uint8_t *base, svuint64_t offsets, svuint64_t data) {
@@ -199,14 +199,14 @@ void test_svstnt1b_scatter_u64offset_u64(svbool_t pg, uint8_t *base, svuint64_t
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z40test_svstnt1b_scatter_u32base_offset_s32u10__SVBool_tu12__SVUint32_tlu11__SVInt32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1b_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data) {
@@ -217,14 +217,14 @@ void test_svstnt1b_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z40test_svstnt1b_scatter_u64base_offset_s64u10__SVBool_tu12__SVUint64_tlu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1b_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) {
@@ -235,14 +235,14 @@ void test_svstnt1b_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z40test_svstnt1b_scatter_u32base_offset_u32u10__SVBool_tu12__SVUint32_tlu12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i8> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1b_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data) {
@@ -253,14 +253,14 @@ void test_svstnt1b_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z40test_svstnt1b_scatter_u64base_offset_u64u10__SVBool_tu12__SVUint64_tlu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i8> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1b_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) {

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1h.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1h.c
index 4d8e9b0b7817d..825b699bd722f 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1h.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1h.c
@@ -91,14 +91,14 @@ void test_svstnt1h_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1h_scatter_s64offset_s64u10__SVBool_tPsu11__SVInt64_tu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1h_scatter_s64offset_s64(svbool_t pg, int16_t *base, svint64_t offsets, svint64_t data) {
@@ -109,14 +109,14 @@ void test_svstnt1h_scatter_s64offset_s64(svbool_t pg, int16_t *base, svint64_t o
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1h_scatter_s64offset_u64u10__SVBool_tPtu11__SVInt64_tu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1h_scatter_s64offset_u64(svbool_t pg, uint16_t *base, svint64_t offsets, svuint64_t data) {
@@ -127,14 +127,14 @@ void test_svstnt1h_scatter_s64offset_u64(svbool_t pg, uint16_t *base, svint64_t
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1h_scatter_u32offset_s32u10__SVBool_tPsu12__SVUint32_tu11__SVInt32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1h_scatter_u32offset_s32(svbool_t pg, int16_t *base, svuint32_t offsets, svint32_t data) {
@@ -145,14 +145,14 @@ void test_svstnt1h_scatter_u32offset_s32(svbool_t pg, int16_t *base, svuint32_t
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1h_scatter_u64offset_s64u10__SVBool_tPsu12__SVUint64_tu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1h_scatter_u64offset_s64(svbool_t pg, int16_t *base, svuint64_t offsets, svint64_t data) {
@@ -163,14 +163,14 @@ void test_svstnt1h_scatter_u64offset_s64(svbool_t pg, int16_t *base, svuint64_t
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1h_scatter_u32offset_u32u10__SVBool_tPtu12__SVUint32_tu12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1h_scatter_u32offset_u32(svbool_t pg, uint16_t *base, svuint32_t offsets, svuint32_t data) {
@@ -181,14 +181,14 @@ void test_svstnt1h_scatter_u32offset_u32(svbool_t pg, uint16_t *base, svuint32_t
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1h_scatter_u64offset_u64u10__SVBool_tPtu12__SVUint64_tu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1h_scatter_u64offset_u64(svbool_t pg, uint16_t *base, svuint64_t offsets, svuint64_t data) {
@@ -199,14 +199,14 @@ void test_svstnt1h_scatter_u64offset_u64(svbool_t pg, uint16_t *base, svuint64_t
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z40test_svstnt1h_scatter_u32base_offset_s32u10__SVBool_tu12__SVUint32_tlu11__SVInt32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1h_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data) {
@@ -217,14 +217,14 @@ void test_svstnt1h_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z40test_svstnt1h_scatter_u64base_offset_s64u10__SVBool_tu12__SVUint64_tlu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1h_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) {
@@ -235,14 +235,14 @@ void test_svstnt1h_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z40test_svstnt1h_scatter_u32base_offset_u32u10__SVBool_tu12__SVUint32_tlu12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i16> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1h_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data) {
@@ -253,14 +253,14 @@ void test_svstnt1h_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z40test_svstnt1h_scatter_u64base_offset_u64u10__SVBool_tu12__SVUint64_tlu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1h_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) {
@@ -271,14 +271,14 @@ void test_svstnt1h_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1h_scatter_s64index_s64u10__SVBool_tPsu11__SVInt64_tu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1h_scatter_s64index_s64(svbool_t pg, int16_t *base, svint64_t indices, svint64_t data) {
@@ -289,14 +289,14 @@ void test_svstnt1h_scatter_s64index_s64(svbool_t pg, int16_t *base, svint64_t in
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1h_scatter_s64index_u64u10__SVBool_tPtu11__SVInt64_tu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1h_scatter_s64index_u64(svbool_t pg, uint16_t *base, svint64_t indices, svuint64_t data) {
@@ -307,14 +307,14 @@ void test_svstnt1h_scatter_s64index_u64(svbool_t pg, uint16_t *base, svint64_t i
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1h_scatter_u64index_s64u10__SVBool_tPsu12__SVUint64_tu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1h_scatter_u64index_s64(svbool_t pg, int16_t *base, svuint64_t indices, svint64_t data) {
@@ -325,14 +325,14 @@ void test_svstnt1h_scatter_u64index_s64(svbool_t pg, int16_t *base, svuint64_t i
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1h_scatter_u64index_u64u10__SVBool_tPtu12__SVUint64_tu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1h_scatter_u64index_u64(svbool_t pg, uint16_t *base, svuint64_t indices, svuint64_t data) {

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1w.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1w.c
index a0ce3a66292f6..48c434f91b164 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1w.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_stnt1w.c
@@ -55,14 +55,14 @@ void test_svstnt1w_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1w_scatter_s64offset_s64u10__SVBool_tPiu11__SVInt64_tu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1w_scatter_s64offset_s64(svbool_t pg, int32_t *base, svint64_t offsets, svint64_t data) {
@@ -73,14 +73,14 @@ void test_svstnt1w_scatter_s64offset_s64(svbool_t pg, int32_t *base, svint64_t o
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1w_scatter_s64offset_u64u10__SVBool_tPju11__SVInt64_tu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1w_scatter_s64offset_u64(svbool_t pg, uint32_t *base, svint64_t offsets, svuint64_t data) {
@@ -91,14 +91,14 @@ void test_svstnt1w_scatter_s64offset_u64(svbool_t pg, uint32_t *base, svint64_t
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1w_scatter_u64offset_s64u10__SVBool_tPiu12__SVUint64_tu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1w_scatter_u64offset_s64(svbool_t pg, int32_t *base, svuint64_t offsets, svint64_t data) {
@@ -109,14 +109,14 @@ void test_svstnt1w_scatter_u64offset_s64(svbool_t pg, int32_t *base, svuint64_t
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z35test_svstnt1w_scatter_u64offset_u64u10__SVBool_tPju12__SVUint64_tu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFFSETS:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1w_scatter_u64offset_u64(svbool_t pg, uint32_t *base, svuint64_t offsets, svuint64_t data) {
@@ -127,14 +127,14 @@ void test_svstnt1w_scatter_u64offset_u64(svbool_t pg, uint32_t *base, svuint64_t
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z40test_svstnt1w_scatter_u64base_offset_s64u10__SVBool_tu12__SVUint64_tlu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1w_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) {
@@ -145,14 +145,14 @@ void test_svstnt1w_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z40test_svstnt1w_scatter_u64base_offset_u64u10__SVBool_tu12__SVUint64_tlu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 noundef [[OFFSET:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BASES:%.*]], i64 [[OFFSET:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1w_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) {
@@ -163,14 +163,14 @@ void test_svstnt1w_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1w_scatter_s64index_s64u10__SVBool_tPiu11__SVInt64_tu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1w_scatter_s64index_s64(svbool_t pg, int32_t *base, svint64_t indices, svint64_t data) {
@@ -181,14 +181,14 @@ void test_svstnt1w_scatter_s64index_s64(svbool_t pg, int32_t *base, svint64_t in
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1w_scatter_s64index_u64u10__SVBool_tPju11__SVInt64_tu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1w_scatter_s64index_u64(svbool_t pg, uint32_t *base, svint64_t indices, svuint64_t data) {
@@ -199,14 +199,14 @@ void test_svstnt1w_scatter_s64index_u64(svbool_t pg, uint32_t *base, svint64_t i
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1w_scatter_u64index_s64u10__SVBool_tPiu12__SVUint64_tu11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1w_scatter_u64index_s64(svbool_t pg, int32_t *base, svuint64_t indices, svint64_t data) {
@@ -217,14 +217,14 @@ void test_svstnt1w_scatter_u64index_s64(svbool_t pg, int32_t *base, svuint64_t i
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: @_Z34test_svstnt1w_scatter_u64index_u64u10__SVBool_tPju12__SVUint64_tu12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr noundef [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i1> [[TMP1]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svstnt1w_scatter_u64index_u64(svbool_t pg, uint32_t *base, svuint64_t indices, svuint64_t data) {

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilege.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilege.c
index cf1b9d231408e..971c65a28a690 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilege.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilege.c
@@ -17,12 +17,12 @@
 
 // CHECK-LABEL: @test_svwhilege_b8_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z21test_svwhilege_b8_s32ii(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 svbool_t test_svwhilege_b8_s32(int32_t op1, int32_t op2)
@@ -32,13 +32,13 @@ svbool_t test_svwhilege_b8_s32(int32_t op1, int32_t op2)
 
 // CHECK-LABEL: @test_svwhilege_b16_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilege_b16_s32ii(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -49,13 +49,13 @@ svbool_t test_svwhilege_b16_s32(int32_t op1, int32_t op2)
 
 // CHECK-LABEL: @test_svwhilege_b32_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilege_b32_s32ii(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -66,13 +66,13 @@ svbool_t test_svwhilege_b32_s32(int32_t op1, int32_t op2)
 
 // CHECK-LABEL: @test_svwhilege_b64_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilege_b64_s32ii(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -83,12 +83,12 @@ svbool_t test_svwhilege_b64_s32(int32_t op1, int32_t op2)
 
 // CHECK-LABEL: @test_svwhilege_b8_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z21test_svwhilege_b8_u32jj(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 svbool_t test_svwhilege_b8_u32(uint32_t op1, uint32_t op2)
@@ -98,13 +98,13 @@ svbool_t test_svwhilege_b8_u32(uint32_t op1, uint32_t op2)
 
 // CHECK-LABEL: @test_svwhilege_b16_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilege_b16_u32jj(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -115,13 +115,13 @@ svbool_t test_svwhilege_b16_u32(uint32_t op1, uint32_t op2)
 
 // CHECK-LABEL: @test_svwhilege_b32_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilege_b32_u32jj(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -132,13 +132,13 @@ svbool_t test_svwhilege_b32_u32(uint32_t op1, uint32_t op2)
 
 // CHECK-LABEL: @test_svwhilege_b64_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilege_b64_u32jj(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -149,12 +149,12 @@ svbool_t test_svwhilege_b64_u32(uint32_t op1, uint32_t op2)
 
 // CHECK-LABEL: @test_svwhilege_b8_s64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z21test_svwhilege_b8_s64ll(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 svbool_t test_svwhilege_b8_s64(int64_t op1, int64_t op2)
@@ -164,13 +164,13 @@ svbool_t test_svwhilege_b8_s64(int64_t op1, int64_t op2)
 
 // CHECK-LABEL: @test_svwhilege_b16_s64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilege_b16_s64ll(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -181,13 +181,13 @@ svbool_t test_svwhilege_b16_s64(int64_t op1, int64_t op2)
 
 // CHECK-LABEL: @test_svwhilege_b32_s64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilege_b32_s64ll(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -198,13 +198,13 @@ svbool_t test_svwhilege_b32_s64(int64_t op1, int64_t op2)
 
 // CHECK-LABEL: @test_svwhilege_b64_s64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilege_b64_s64ll(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -215,12 +215,12 @@ svbool_t test_svwhilege_b64_s64(int64_t op1, int64_t op2)
 
 // CHECK-LABEL: @test_svwhilege_b8_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z21test_svwhilege_b8_u64mm(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 svbool_t test_svwhilege_b8_u64(uint64_t op1, uint64_t op2)
@@ -230,13 +230,13 @@ svbool_t test_svwhilege_b8_u64(uint64_t op1, uint64_t op2)
 
 // CHECK-LABEL: @test_svwhilege_b16_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilege_b16_u64mm(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -247,13 +247,13 @@ svbool_t test_svwhilege_b16_u64(uint64_t op1, uint64_t op2)
 
 // CHECK-LABEL: @test_svwhilege_b32_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilege_b32_u64mm(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -264,13 +264,13 @@ svbool_t test_svwhilege_b32_u64(uint64_t op1, uint64_t op2)
 
 // CHECK-LABEL: @test_svwhilege_b64_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilege_b64_u64mm(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilegt.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilegt.c
index ecd88b039a64e..7d6caeea18ebc 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilegt.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilegt.c
@@ -17,12 +17,12 @@
 
 // CHECK-LABEL: @test_svwhilegt_b8_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z21test_svwhilegt_b8_s32ii(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 svbool_t test_svwhilegt_b8_s32(int32_t op1, int32_t op2)
@@ -32,13 +32,13 @@ svbool_t test_svwhilegt_b8_s32(int32_t op1, int32_t op2)
 
 // CHECK-LABEL: @test_svwhilegt_b16_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilegt.nxv8i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilegt.nxv8i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilegt_b16_s32ii(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilegt.nxv8i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilegt.nxv8i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -49,13 +49,13 @@ svbool_t test_svwhilegt_b16_s32(int32_t op1, int32_t op2)
 
 // CHECK-LABEL: @test_svwhilegt_b32_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilegt_b32_s32ii(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -66,13 +66,13 @@ svbool_t test_svwhilegt_b32_s32(int32_t op1, int32_t op2)
 
 // CHECK-LABEL: @test_svwhilegt_b64_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilegt_b64_s32ii(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -83,12 +83,12 @@ svbool_t test_svwhilegt_b64_s32(int32_t op1, int32_t op2)
 
 // CHECK-LABEL: @test_svwhilegt_b8_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z21test_svwhilegt_b8_u32jj(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 svbool_t test_svwhilegt_b8_u32(uint32_t op1, uint32_t op2)
@@ -98,13 +98,13 @@ svbool_t test_svwhilegt_b8_u32(uint32_t op1, uint32_t op2)
 
 // CHECK-LABEL: @test_svwhilegt_b16_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehi.nxv8i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehi.nxv8i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilegt_b16_u32jj(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehi.nxv8i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehi.nxv8i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -115,13 +115,13 @@ svbool_t test_svwhilegt_b16_u32(uint32_t op1, uint32_t op2)
 
 // CHECK-LABEL: @test_svwhilegt_b32_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilegt_b32_u32jj(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -132,13 +132,13 @@ svbool_t test_svwhilegt_b32_u32(uint32_t op1, uint32_t op2)
 
 // CHECK-LABEL: @test_svwhilegt_b64_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilegt_b64_u32jj(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i32(i32 noundef [[OP1:%.*]], i32 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -149,12 +149,12 @@ svbool_t test_svwhilegt_b64_u32(uint32_t op1, uint32_t op2)
 
 // CHECK-LABEL: @test_svwhilegt_b8_s64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z21test_svwhilegt_b8_s64ll(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 svbool_t test_svwhilegt_b8_s64(int64_t op1, int64_t op2)
@@ -164,13 +164,13 @@ svbool_t test_svwhilegt_b8_s64(int64_t op1, int64_t op2)
 
 // CHECK-LABEL: @test_svwhilegt_b16_s64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilegt.nxv8i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilegt.nxv8i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilegt_b16_s64ll(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilegt.nxv8i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilegt.nxv8i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -181,13 +181,13 @@ svbool_t test_svwhilegt_b16_s64(int64_t op1, int64_t op2)
 
 // CHECK-LABEL: @test_svwhilegt_b32_s64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilegt_b32_s64ll(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -198,13 +198,13 @@ svbool_t test_svwhilegt_b32_s64(int64_t op1, int64_t op2)
 
 // CHECK-LABEL: @test_svwhilegt_b64_s64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilegt_b64_s64ll(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -215,12 +215,12 @@ svbool_t test_svwhilegt_b64_s64(int64_t op1, int64_t op2)
 
 // CHECK-LABEL: @test_svwhilegt_b8_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z21test_svwhilegt_b8_u64mm(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 svbool_t test_svwhilegt_b8_u64(uint64_t op1, uint64_t op2)
@@ -230,13 +230,13 @@ svbool_t test_svwhilegt_b8_u64(uint64_t op1, uint64_t op2)
 
 // CHECK-LABEL: @test_svwhilegt_b16_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehi.nxv8i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehi.nxv8i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilegt_b16_u64mm(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehi.nxv8i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilehi.nxv8i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -247,13 +247,13 @@ svbool_t test_svwhilegt_b16_u64(uint64_t op1, uint64_t op2)
 
 // CHECK-LABEL: @test_svwhilegt_b32_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilegt_b32_u64mm(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -264,13 +264,13 @@ svbool_t test_svwhilegt_b32_u64(uint64_t op1, uint64_t op2)
 
 // CHECK-LABEL: @test_svwhilegt_b64_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z22test_svwhilegt_b64_u64mm(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilerw-bfloat.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilerw-bfloat.c
index b9ed7cc5dd9dc..78e47e07d2225 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilerw-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilerw-bfloat.c
@@ -17,13 +17,13 @@
 
 // CHECK-LABEL: @test_svwhilerw_bf16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svwhilerw_bf16PKu6__bf16S0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilerw.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilerw.c
index b53fb67507628..9dcb59905a0be 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilerw.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilerw.c
@@ -17,12 +17,12 @@
 
 // CHECK-LABEL: @test_svwhilerw_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilerw.b.nxv16i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilerw.b.nxv16i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z17test_svwhilerw_s8PKaS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilerw.b.nxv16i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilerw.b.nxv16i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 svbool_t test_svwhilerw_s8(const int8_t *op1, const int8_t *op2)
@@ -32,13 +32,13 @@ svbool_t test_svwhilerw_s8(const int8_t *op1, const int8_t *op2)
 
 // CHECK-LABEL: @test_svwhilerw_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilerw_s16PKsS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -49,13 +49,13 @@ svbool_t test_svwhilerw_s16(const int16_t *op1, const int16_t *op2)
 
 // CHECK-LABEL: @test_svwhilerw_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nxv4i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nxv4i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilerw_s32PKiS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nxv4i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nxv4i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -66,13 +66,13 @@ svbool_t test_svwhilerw_s32(const int32_t *op1, const int32_t *op2)
 
 // CHECK-LABEL: @test_svwhilerw_s64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nxv2i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nxv2i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilerw_s64PKlS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nxv2i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nxv2i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -83,12 +83,12 @@ svbool_t test_svwhilerw_s64(const int64_t *op1, const int64_t *op2)
 
 // CHECK-LABEL: @test_svwhilerw_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilerw.b.nxv16i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilerw.b.nxv16i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z17test_svwhilerw_u8PKhS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilerw.b.nxv16i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilerw.b.nxv16i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 svbool_t test_svwhilerw_u8(const uint8_t *op1, const uint8_t *op2)
@@ -98,13 +98,13 @@ svbool_t test_svwhilerw_u8(const uint8_t *op1, const uint8_t *op2)
 
 // CHECK-LABEL: @test_svwhilerw_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilerw_u16PKtS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -115,13 +115,13 @@ svbool_t test_svwhilerw_u16(const uint16_t *op1, const uint16_t *op2)
 
 // CHECK-LABEL: @test_svwhilerw_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nxv4i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nxv4i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilerw_u32PKjS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nxv4i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nxv4i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -132,13 +132,13 @@ svbool_t test_svwhilerw_u32(const uint32_t *op1, const uint32_t *op2)
 
 // CHECK-LABEL: @test_svwhilerw_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nxv2i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nxv2i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilerw_u64PKmS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nxv2i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nxv2i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -149,13 +149,13 @@ svbool_t test_svwhilerw_u64(const uint64_t *op1, const uint64_t *op2)
 
 // CHECK-LABEL: @test_svwhilerw_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilerw_f16PKDhS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -166,13 +166,13 @@ svbool_t test_svwhilerw_f16(const float16_t *op1, const float16_t *op2)
 
 // CHECK-LABEL: @test_svwhilerw_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nxv4i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nxv4i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilerw_f32PKfS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nxv4i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilerw.s.nxv4i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -183,13 +183,13 @@ svbool_t test_svwhilerw_f32(const float32_t *op1, const float32_t *op2)
 
 // CHECK-LABEL: @test_svwhilerw_f64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nxv2i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nxv2i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilerw_f64PKdS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nxv2i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilerw.d.nxv2i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilewr-bfloat.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilewr-bfloat.c
index 4cd0f888baf56..d7969f3fd6d85 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilewr-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilewr-bfloat.c
@@ -17,13 +17,13 @@
 
 // CHECK-LABEL: @test_svwhilewr_bf16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svwhilewr_bf16PKu6__bf16S0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //

diff  --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilewr.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilewr.c
index 8af5121c09eac..3cd13311f696f 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilewr.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilewr.c
@@ -17,12 +17,12 @@
 
 // CHECK-LABEL: @test_svwhilewr_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilewr.b.nxv16i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilewr.b.nxv16i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z17test_svwhilewr_s8PKaS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilewr.b.nxv16i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilewr.b.nxv16i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 svbool_t test_svwhilewr_s8(const int8_t *op1, const int8_t *op2)
@@ -32,13 +32,13 @@ svbool_t test_svwhilewr_s8(const int8_t *op1, const int8_t *op2)
 
 // CHECK-LABEL: @test_svwhilewr_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilewr_s16PKsS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -49,13 +49,13 @@ svbool_t test_svwhilewr_s16(const int16_t *op1, const int16_t *op2)
 
 // CHECK-LABEL: @test_svwhilewr_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nxv4i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nxv4i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilewr_s32PKiS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nxv4i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nxv4i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -66,13 +66,13 @@ svbool_t test_svwhilewr_s32(const int32_t *op1, const int32_t *op2)
 
 // CHECK-LABEL: @test_svwhilewr_s64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nxv2i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nxv2i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilewr_s64PKlS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nxv2i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nxv2i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -83,12 +83,12 @@ svbool_t test_svwhilewr_s64(const int64_t *op1, const int64_t *op2)
 
 // CHECK-LABEL: @test_svwhilewr_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilewr.b.nxv16i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilewr.b.nxv16i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z17test_svwhilewr_u8PKhS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilewr.b.nxv16i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilewr.b.nxv16i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 svbool_t test_svwhilewr_u8(const uint8_t *op1, const uint8_t *op2)
@@ -98,13 +98,13 @@ svbool_t test_svwhilewr_u8(const uint8_t *op1, const uint8_t *op2)
 
 // CHECK-LABEL: @test_svwhilewr_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilewr_u16PKtS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -115,13 +115,13 @@ svbool_t test_svwhilewr_u16(const uint16_t *op1, const uint16_t *op2)
 
 // CHECK-LABEL: @test_svwhilewr_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nxv4i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nxv4i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilewr_u32PKjS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nxv4i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nxv4i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -132,13 +132,13 @@ svbool_t test_svwhilewr_u32(const uint32_t *op1, const uint32_t *op2)
 
 // CHECK-LABEL: @test_svwhilewr_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nxv2i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nxv2i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilewr_u64PKmS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nxv2i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nxv2i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -149,13 +149,13 @@ svbool_t test_svwhilewr_u64(const uint64_t *op1, const uint64_t *op2)
 
 // CHECK-LABEL: @test_svwhilewr_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilewr_f16PKDhS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -166,13 +166,13 @@ svbool_t test_svwhilewr_f16(const float16_t *op1, const float16_t *op2)
 
 // CHECK-LABEL: @test_svwhilewr_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nxv4i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nxv4i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilewr_f32PKfS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nxv4i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilewr.s.nxv4i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -183,13 +183,13 @@ svbool_t test_svwhilewr_f32(const float32_t *op1, const float32_t *op2)
 
 // CHECK-LABEL: @test_svwhilewr_f64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nxv2i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nxv2i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z18test_svwhilewr_f64PKdS0_(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nxv2i1.p0(ptr noundef readnone [[OP1:%.*]], ptr noundef readnone [[OP2:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilewr.d.nxv2i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]])
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //

diff  --git a/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c
index 48db3ed7f3ba8..990169329825f 100644
--- a/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c
+++ b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c
@@ -16,13 +16,13 @@
 
 // CHECK-LABEL: @test_svdup_neonq_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> [[N:%.*]], i64 0)
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP0]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svdup_neonq_s811__Int8x16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP0]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
@@ -32,13 +32,13 @@ svint8_t test_svdup_neonq_s8(int8x16_t n) {
 
 // CHECK-LABEL: @test_svdup_neonq_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> [[N:%.*]], i64 0)
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP0]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svdup_neonq_s1611__Int16x8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP0]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
@@ -48,13 +48,13 @@ svint16_t test_svdup_neonq_s16(int16x8_t n) {
 
 // CHECK-LABEL: @test_svdup_neonq_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> [[N:%.*]], i64 0)
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svdup_neonq_s3211__Int32x4_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
@@ -64,13 +64,13 @@ svint32_t test_svdup_neonq_s32(int32x4_t n) {
 
 // CHECK-LABEL: @test_svdup_neonq_s64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> [[N:%.*]], i64 0)
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP0]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svdup_neonq_s6411__Int64x2_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP0]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
@@ -80,13 +80,13 @@ svint64_t test_svdup_neonq_s64(int64x2_t n) {
 
 // CHECK-LABEL: @test_svdup_neonq_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> [[N:%.*]], i64 0)
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP0]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svdup_neonq_u812__Uint8x16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP0]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
@@ -96,13 +96,13 @@ svuint8_t test_svdup_neonq_u8(uint8x16_t n) {
 
 // CHECK-LABEL: @test_svdup_neonq_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> [[N:%.*]], i64 0)
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP0]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svdup_neonq_u1612__Uint16x8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP0]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
@@ -112,13 +112,13 @@ svuint16_t test_svdup_neonq_u16(uint16x8_t n) {
 
 // CHECK-LABEL: @test_svdup_neonq_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> [[N:%.*]], i64 0)
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svdup_neonq_u3212__Uint32x4_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
@@ -128,13 +128,13 @@ svuint32_t test_svdup_neonq_u32(uint32x4_t n) {
 
 // CHECK-LABEL: @test_svdup_neonq_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> [[N:%.*]], i64 0)
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP0]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svdup_neonq_u6412__Uint64x2_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP0]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
@@ -144,13 +144,13 @@ svuint64_t test_svdup_neonq_u64(uint64x2_t n) {
 
 // CHECK-LABEL: @test_svdup_neonq_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> poison, <8 x half> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> poison, <8 x half> [[N:%.*]], i64 0)
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> [[TMP0]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svdup_neonq_f1613__Float16x8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> poison, <8 x half> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> poison, <8 x half> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> [[TMP0]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
@@ -163,13 +163,13 @@ svfloat16_t test_svdup_neonq_f16(float16x8_t n) {
 // CHECK-NEXT  ret <vscale x 4 x float> %1
 // CHECK-LABEL: @test_svdup_neonq_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> [[N:%.*]], i64 0)
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> [[TMP0]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svdup_neonq_f3213__Float32x4_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> [[TMP0]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
@@ -179,13 +179,13 @@ svfloat32_t test_svdup_neonq_f32(float32x4_t n) {
 
 // CHECK-LABEL: @test_svdup_neonq_f64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> poison, <2 x double> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> poison, <2 x double> [[N:%.*]], i64 0)
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> [[TMP0]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svdup_neonq_f6413__Float64x2_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> poison, <2 x double> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> poison, <2 x double> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> [[TMP0]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
@@ -195,13 +195,13 @@ svfloat64_t test_svdup_neonq_f64(float64x2_t n) {
 
 // CHECK-LABEL: @test_svdup_neonq_bf16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> poison, <8 x bfloat> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> poison, <8 x bfloat> [[N:%.*]], i64 0)
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z21test_svdup_neonq_bf1614__Bfloat16x8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> poison, <8 x bfloat> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> poison, <8 x bfloat> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //

diff  --git a/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c
index 093ff82af0888..89460ab6187eb 100644
--- a/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c
+++ b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c
@@ -21,7 +21,7 @@
 //
 // CPP-CHECK-LABEL: @_Z19test_svget_neonq_s8u10__SVInt8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 int8x16_t test_svget_neonq_s8(svint8_t n) {
@@ -35,7 +35,7 @@ int8x16_t test_svget_neonq_s8(svint8_t n) {
 //
 // CPP-CHECK-LABEL: @_Z20test_svget_neonq_s16u11__SVInt16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <8 x i16> @llvm.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 int16x8_t test_svget_neonq_s16(svint16_t n) {
@@ -49,7 +49,7 @@ int16x8_t test_svget_neonq_s16(svint16_t n) {
 //
 // CPP-CHECK-LABEL: @_Z20test_svget_neonq_s32u11__SVInt32_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <4 x i32> @llvm.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_svget_neonq_s32(svint32_t n) {
@@ -63,14 +63,13 @@ int32x4_t test_svget_neonq_s32(svint32_t n) {
 //
 // CPP-CHECK-LABEL: @_Z20test_svget_neonq_s64u11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <2 x i64> [[TMP0]]
 //
 int64x2_t test_svget_neonq_s64(svint64_t n) {
   return SVE_ACLE_FUNC(svget_neonq, _s64, , )(n);
 }
 
-//
 // CHECK-LABEL: @test_svget_neonq_u8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[N:%.*]], i64 0)
@@ -78,7 +77,7 @@ int64x2_t test_svget_neonq_s64(svint64_t n) {
 //
 // CPP-CHECK-LABEL: @_Z19test_svget_neonq_u8u11__SVUint8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 uint8x16_t test_svget_neonq_u8(svuint8_t n) {
@@ -92,7 +91,7 @@ uint8x16_t test_svget_neonq_u8(svuint8_t n) {
 //
 // CPP-CHECK-LABEL: @_Z20test_svget_neonq_u16u12__SVUint16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <8 x i16> @llvm.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_svget_neonq_u16(svuint16_t n) {
@@ -106,7 +105,7 @@ uint16x8_t test_svget_neonq_u16(svuint16_t n) {
 //
 // CPP-CHECK-LABEL: @_Z20test_svget_neonq_u32u12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <4 x i32> @llvm.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 uint32x4_t test_svget_neonq_u32(svuint32_t n) {
@@ -120,7 +119,7 @@ uint32x4_t test_svget_neonq_u32(svuint32_t n) {
 //
 // CPP-CHECK-LABEL: @_Z20test_svget_neonq_u64u12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <2 x i64> [[TMP0]]
 //
 uint64x2_t test_svget_neonq_u64(svuint64_t n) {
@@ -134,7 +133,7 @@ uint64x2_t test_svget_neonq_u64(svuint64_t n) {
 //
 // CPP-CHECK-LABEL: @_Z20test_svget_neonq_f16u13__SVFloat16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <8 x half> @llvm.vector.extract.v8f16.nxv8f16(<vscale x 8 x half> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x half> @llvm.vector.extract.v8f16.nxv8f16(<vscale x 8 x half> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_svget_neonq_f16(svfloat16_t n) {
@@ -148,13 +147,14 @@ float16x8_t test_svget_neonq_f16(svfloat16_t n) {
 //
 // CPP-CHECK-LABEL: @_Z20test_svget_neonq_f32u13__SVFloat32_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <4 x float> @llvm.vector.extract.v4f32.nxv4f32(<vscale x 4 x float> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.vector.extract.v4f32.nxv4f32(<vscale x 4 x float> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_svget_neonq_f32(svfloat32_t n) {
   return SVE_ACLE_FUNC(svget_neonq, _f32, , )(n);
 }
 
+//
 // CHECK-LABEL: @test_svget_neonq_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> [[N:%.*]], i64 0)
@@ -162,7 +162,7 @@ float32x4_t test_svget_neonq_f32(svfloat32_t n) {
 //
 // CPP-CHECK-LABEL: @_Z20test_svget_neonq_f64u13__SVFloat64_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <2 x double> [[TMP0]]
 //
 float64x2_t test_svget_neonq_f64(svfloat64_t n) {
@@ -176,7 +176,7 @@ float64x2_t test_svget_neonq_f64(svfloat64_t n) {
 //
 // CPP-CHECK-LABEL: @_Z21test_svget_neonq_bf16u14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <8 x bfloat> @llvm.vector.extract.v8bf16.nxv8bf16(<vscale x 8 x bfloat> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x bfloat> @llvm.vector.extract.v8bf16.nxv8bf16(<vscale x 8 x bfloat> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <8 x bfloat> [[TMP0]]
 //
 bfloat16x8_t test_svget_neonq_bf16(svbfloat16_t n) {

diff  --git a/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c
index 406d1a55d3a3e..dff25650a7394 100644
--- a/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c
+++ b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c
@@ -16,12 +16,12 @@
 
 // CHECK-LABEL: @test_svset_neonq_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> [[S:%.*]], <16 x i8> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> [[S:%.*]], <16 x i8> [[N:%.*]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svset_neonq_s8u10__SVInt8_t11__Int8x16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> [[S:%.*]], <16 x i8> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> [[S:%.*]], <16 x i8> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 svint8_t test_svset_neonq_s8(svint8_t s, int8x16_t n) {
@@ -30,12 +30,12 @@ svint8_t test_svset_neonq_s8(svint8_t s, int8x16_t n) {
 
 // CHECK-LABEL: @test_svset_neonq_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> [[S:%.*]], <8 x i16> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> [[S:%.*]], <8 x i16> [[N:%.*]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svset_neonq_s16u11__SVInt16_t11__Int16x8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> [[S:%.*]], <8 x i16> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> [[S:%.*]], <8 x i16> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 svint16_t test_svset_neonq_s16(svint16_t s, int16x8_t n) {
@@ -44,12 +44,12 @@ svint16_t test_svset_neonq_s16(svint16_t s, int16x8_t n) {
 
 // CHECK-LABEL: @test_svset_neonq_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[S:%.*]], <4 x i32> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[S:%.*]], <4 x i32> [[N:%.*]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svset_neonq_s32u11__SVInt32_t11__Int32x4_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[S:%.*]], <4 x i32> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[S:%.*]], <4 x i32> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 svint32_t test_svset_neonq_s32(svint32_t s, int32x4_t n) {
@@ -58,12 +58,12 @@ svint32_t test_svset_neonq_s32(svint32_t s, int32x4_t n) {
 
 // CHECK-LABEL: @test_svset_neonq_s64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> [[S:%.*]], <2 x i64> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> [[S:%.*]], <2 x i64> [[N:%.*]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svset_neonq_s64u11__SVInt64_t11__Int64x2_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> [[S:%.*]], <2 x i64> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> [[S:%.*]], <2 x i64> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svint64_t test_svset_neonq_s64(svint64_t s, int64x2_t n) {
@@ -72,12 +72,12 @@ svint64_t test_svset_neonq_s64(svint64_t s, int64x2_t n) {
 
 // CHECK-LABEL: @test_svset_neonq_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> [[S:%.*]], <16 x i8> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> [[S:%.*]], <16 x i8> [[N:%.*]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z19test_svset_neonq_u8u11__SVUint8_t12__Uint8x16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> [[S:%.*]], <16 x i8> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> [[S:%.*]], <16 x i8> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 svuint8_t test_svset_neonq_u8(svuint8_t s, uint8x16_t n) {
@@ -86,12 +86,12 @@ svuint8_t test_svset_neonq_u8(svuint8_t s, uint8x16_t n) {
 
 // CHECK-LABEL: @test_svset_neonq_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> [[S:%.*]], <8 x i16> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> [[S:%.*]], <8 x i16> [[N:%.*]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svset_neonq_u16u12__SVUint16_t12__Uint16x8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> [[S:%.*]], <8 x i16> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> [[S:%.*]], <8 x i16> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 svuint16_t test_svset_neonq_u16(svuint16_t s, uint16x8_t n) {
@@ -100,12 +100,12 @@ svuint16_t test_svset_neonq_u16(svuint16_t s, uint16x8_t n) {
 
 // CHECK-LABEL: @test_svset_neonq_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[S:%.*]], <4 x i32> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[S:%.*]], <4 x i32> [[N:%.*]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svset_neonq_u32u12__SVUint32_t12__Uint32x4_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[S:%.*]], <4 x i32> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[S:%.*]], <4 x i32> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 svuint32_t test_svset_neonq_u32(svuint32_t s, uint32x4_t n) {
@@ -114,12 +114,12 @@ svuint32_t test_svset_neonq_u32(svuint32_t s, uint32x4_t n) {
 
 // CHECK-LABEL: @test_svset_neonq_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> [[S:%.*]], <2 x i64> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> [[S:%.*]], <2 x i64> [[N:%.*]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svset_neonq_u64u12__SVUint64_t12__Uint64x2_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> [[S:%.*]], <2 x i64> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> [[S:%.*]], <2 x i64> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svuint64_t test_svset_neonq_u64(svuint64_t s, uint64x2_t n) {
@@ -128,12 +128,12 @@ svuint64_t test_svset_neonq_u64(svuint64_t s, uint64x2_t n) {
 
 // CHECK-LABEL: @test_svset_neonq_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> [[S:%.*]], <8 x half> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> [[S:%.*]], <8 x half> [[N:%.*]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svset_neonq_f16u13__SVFloat16_t13__Float16x8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> [[S:%.*]], <8 x half> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> [[S:%.*]], <8 x half> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 svfloat16_t test_svset_neonq_f16(svfloat16_t s, float16x8_t n) {
@@ -142,12 +142,12 @@ svfloat16_t test_svset_neonq_f16(svfloat16_t s, float16x8_t n) {
 
 // CHECK-LABEL: @test_svset_neonq_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> [[S:%.*]], <4 x float> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> [[S:%.*]], <4 x float> [[N:%.*]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svset_neonq_f32u13__SVFloat32_t13__Float32x4_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> [[S:%.*]], <4 x float> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> [[S:%.*]], <4 x float> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_svset_neonq_f32(svfloat32_t s, float32x4_t n) {
@@ -156,12 +156,12 @@ svfloat32_t test_svset_neonq_f32(svfloat32_t s, float32x4_t n) {
 
 // CHECK-LABEL: @test_svset_neonq_f64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> [[S:%.*]], <2 x double> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> [[S:%.*]], <2 x double> [[N:%.*]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z20test_svset_neonq_f64u13__SVFloat64_t13__Float64x2_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> [[S:%.*]], <2 x double> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> [[S:%.*]], <2 x double> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 svfloat64_t test_svset_neonq_f64(svfloat64_t s, float64x2_t n) {
@@ -170,12 +170,12 @@ svfloat64_t test_svset_neonq_f64(svfloat64_t s, float64x2_t n) {
 
 // CHECK-LABEL: @test_svset_neonq_bf16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> [[S:%.*]], <8 x bfloat> noundef [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> [[S:%.*]], <8 x bfloat> [[N:%.*]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z21test_svset_neonq_bf16u14__SVBFloat16_t14__Bfloat16x8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> [[S:%.*]], <8 x bfloat> noundef [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> [[S:%.*]], <8 x bfloat> [[N:%.*]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
 svbfloat16_t test_svset_neonq_bf16(svbfloat16_t s, bfloat16x8_t n) {

diff  --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
index a93ca14a7afb1..640031a7b9805 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
@@ -63,9 +63,9 @@ fixed_bool_t from_svbool_t(svbool_t type) {
 // CHECK-LABEL: @lax_cast(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[SAVED_VALUE:%.*]] = alloca <16 x i32>, align 64
-// CHECK-NEXT:    [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> noundef [[TYPE_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    store <16 x i32> [[TYPE]], ptr [[SAVED_VALUE]], align 64, !tbaa [[TBAA2:![0-9]+]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 2 x i64>, ptr [[SAVED_VALUE]], align 64, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    store <16 x i32> [[TYPE]], ptr [[SAVED_VALUE]], align 64, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 2 x i64>, ptr [[SAVED_VALUE]], align 64, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svint64_t lax_cast(fixed_int32_t type) {
@@ -74,7 +74,7 @@ svint64_t lax_cast(fixed_int32_t type) {
 
 // CHECK-LABEL: @to_svint32_t__from_gnu_int32_t(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TYPE]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
@@ -85,7 +85,7 @@ svint32_t to_svint32_t__from_gnu_int32_t(gnu_int32_t type) {
 // CHECK-LABEL: @from_svint32_t__to_gnu_int32_t(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE:%.*]], i64 0)
-// CHECK-NEXT:    store <16 x i32> [[CASTFIXEDSVE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <16 x i32> [[CASTFIXEDSVE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 //
 gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) {
@@ -94,7 +94,7 @@ gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) {
 
 // CHECK-LABEL: @to_fixed_int32_t__from_gnu_int32_t(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TYPE]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
@@ -104,8 +104,8 @@ fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) {
 
 // CHECK-LABEL: @from_fixed_int32_t__to_gnu_int32_t(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> noundef [[TYPE_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    store <16 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    store <16 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 //
 gnu_int32_t from_fixed_int32_t__to_gnu_int32_t(fixed_int32_t type) {

diff  --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
index 7df0b4417fe48..c9e0eeedbf26d 100644
--- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
+++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
@@ -54,7 +54,7 @@ vint32m1_t sizeless_caller(vint32m1_t x) {
 
 // CHECK-LABEL: @call_int32_ff(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> noundef [[OP1_COERCE:%.*]], <vscale x 2 x i32> noundef [[OP2_COERCE:%.*]], i64 8)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> [[OP1_COERCE:%.*]], <vscale x 2 x i32> [[OP2_COERCE:%.*]], i64 8)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 fixed_int32m1_t call_int32_ff(fixed_int32m1_t op1, fixed_int32m1_t op2) {
@@ -63,7 +63,7 @@ fixed_int32m1_t call_int32_ff(fixed_int32m1_t op1, fixed_int32m1_t op2) {
 
 // CHECK-LABEL: @call_float64_ff(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> poison, <vscale x 1 x double> noundef [[OP1_COERCE:%.*]], <vscale x 1 x double> noundef [[OP2_COERCE:%.*]], i64 4)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> poison, <vscale x 1 x double> [[OP1_COERCE:%.*]], <vscale x 1 x double> [[OP2_COERCE:%.*]], i64 4)
 // CHECK-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 fixed_float64m1_t call_float64_ff(fixed_float64m1_t op1, fixed_float64m1_t op2) {
@@ -76,7 +76,7 @@ fixed_float64m1_t call_float64_ff(fixed_float64m1_t op1, fixed_float64m1_t op2)
 
 // CHECK-LABEL: @call_int32_fs(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> noundef [[OP1_COERCE:%.*]], <vscale x 2 x i32> [[OP2:%.*]], i64 8)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> [[OP1_COERCE:%.*]], <vscale x 2 x i32> [[OP2:%.*]], i64 8)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 fixed_int32m1_t call_int32_fs(fixed_int32m1_t op1, vint32m1_t op2) {
@@ -85,7 +85,7 @@ fixed_int32m1_t call_int32_fs(fixed_int32m1_t op1, vint32m1_t op2) {
 
 // CHECK-LABEL: @call_float64_fs(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> poison, <vscale x 1 x double> noundef [[OP1_COERCE:%.*]], <vscale x 1 x double> [[OP2:%.*]], i64 4)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> poison, <vscale x 1 x double> [[OP1_COERCE:%.*]], <vscale x 1 x double> [[OP2:%.*]], i64 4)
 // CHECK-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 fixed_float64m1_t call_float64_fs(fixed_float64m1_t op1, vfloat64m1_t op2) {

diff  --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c
index 3686560027c95..b324d0a2b3cbd 100644
--- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c
+++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c
@@ -58,7 +58,7 @@ fixed_float64m1_t from_vfloat64m1_t(vfloat64m1_t type) {
 // CHECK-LABEL: @lax_cast(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[SAVED_VALUE:%.*]] = alloca <8 x i32>, align 32
-// CHECK-NEXT:    [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> noundef [[TYPE_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TYPE_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    store <8 x i32> [[TYPE]], ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4:![0-9]+]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 1 x i64>, ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]]
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
@@ -99,7 +99,7 @@ fixed_int32m1_t to_fixed_int32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) {
 
 // CHECK-LABEL: @from_fixed_int32m1_t__to_gnu_int32m1_t(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> noundef [[TYPE_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TYPE_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    store <8 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA4]]
 // CHECK-NEXT:    ret void
 //

diff  --git a/clang/test/CodeGen/msp430-builtins.c b/clang/test/CodeGen/msp430-builtins.c
index fc909c822b1a9..7465ec1c1d4e9 100644
--- a/clang/test/CodeGen/msp430-builtins.c
+++ b/clang/test/CodeGen/msp430-builtins.c
@@ -11,7 +11,7 @@ _Static_assert(sizeof(long double) == 8, "Assumption failed");
 
 // CHECK-LABEL: @powif(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call float @llvm.powi.f32.i16(float noundef [[X:%.*]], i16 noundef [[Y:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call float @llvm.powi.f32.i16(float [[X:%.*]], i16 [[Y:%.*]])
 // CHECK-NEXT:    ret float [[TMP0]]
 //
 float powif(float x, int y) {
@@ -20,7 +20,7 @@ float powif(float x, int y) {
 
 // CHECK-LABEL: @powi(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call double @llvm.powi.f64.i16(double noundef [[X:%.*]], i16 noundef [[Y:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call double @llvm.powi.f64.i16(double [[X:%.*]], i16 [[Y:%.*]])
 // CHECK-NEXT:    ret double [[TMP0]]
 //
 double powi(double x, int y) {
@@ -29,7 +29,7 @@ double powi(double x, int y) {
 
 // CHECK-LABEL: @powil(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call double @llvm.powi.f64.i16(double noundef [[X:%.*]], i16 noundef [[Y:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call double @llvm.powi.f64.i16(double [[X:%.*]], i16 [[Y:%.*]])
 // CHECK-NEXT:    ret double [[TMP0]]
 //
 long double powil(long double x, int y) {

diff  --git a/clang/test/CodeGen/nofpclass.c b/clang/test/CodeGen/nofpclass.c
index 7c8dee2abbe0b..7fea8192cda75 100644
--- a/clang/test/CodeGen/nofpclass.c
+++ b/clang/test/CodeGen/nofpclass.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --version 2
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --version 2
 // REQUIRES: x86-registered-target
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-feature +avx -fenable-matrix -ffinite-math-only -emit-llvm -o - %s | FileCheck -check-prefixes=CFINITEONLY %s
 // RUN: %clang_cc1 -x cl -triple x86_64-unknown-unknown -target-feature +avx -fenable-matrix -cl-finite-math-only -emit-llvm -o - %s | FileCheck -check-prefixes=CLFINITEONLY %s
@@ -47,7 +47,7 @@ extern dx5x5_t extern_matrix(dx5x5_t);
 // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @defined_func_f32
 // CLFINITEONLY-SAME: (float noundef nofpclass(nan inf) [[A:%.*]], float noundef nofpclass(nan inf) [[B:%.*]], float noundef nofpclass(nan inf) [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
-// CLFINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf float @llvm.fma.f32(float noundef [[A]], float noundef [[B]], float noundef [[C]])
+// CLFINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf float @llvm.fma.f32(float [[A]], float [[B]], float [[C]])
 // CLFINITEONLY-NEXT:    [[ADD:%.*]] = fadd nnan ninf float [[TMP0]], 4.000000e+00
 // CLFINITEONLY-NEXT:    ret float [[ADD]]
 //
@@ -110,7 +110,7 @@ float defined_func_f32(float a, float b, float c) {
 // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <2 x double> @defined_func_v2f64
 // CLFINITEONLY-SAME: (<2 x double> noundef nofpclass(nan inf) [[A:%.*]], <2 x double> noundef nofpclass(nan inf) [[B:%.*]], <2 x double> noundef nofpclass(nan inf) [[C:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
-// CLFINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf <2 x double> @llvm.fma.v2f64(<2 x double> noundef [[A]], <2 x double> noundef [[B]], <2 x double> noundef [[C]])
+// CLFINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf <2 x double> @llvm.fma.v2f64(<2 x double> [[A]], <2 x double> [[B]], <2 x double> [[C]])
 // CLFINITEONLY-NEXT:    [[ADD:%.*]] = fadd nnan ninf <2 x double> [[TMP0]], <double 4.000000e+00, double 4.000000e+00>
 // CLFINITEONLY-NEXT:    ret <2 x double> [[ADD]]
 //

diff  --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c
index 8b3bb69bd6cc6..a755499c6c797 100644
--- a/clang/test/Headers/wasm.c
+++ b/clang/test/Headers/wasm.c
@@ -1519,7 +1519,7 @@ bool test_v128_any_true(v128_t a) {
 
 // CHECK-LABEL: @test_v128_bitselect(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[MASK:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[MASK:%.*]])
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 v128_t test_v128_bitselect(v128_t a, v128_t b, v128_t mask) {
@@ -1529,7 +1529,7 @@ v128_t test_v128_bitselect(v128_t a, v128_t b, v128_t mask) {
 // CHECK-LABEL: @test_i8x16_abs(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[ABS_I:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP0]], i1 false) #[[ATTR6:[0-9]+]]
+// CHECK-NEXT:    [[ABS_I:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP0]], i1 false)
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[ABS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -1572,7 +1572,7 @@ uint32_t test_i8x16_bitmask(v128_t a) {
 // CHECK-LABEL: @test_i8x16_popcnt(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP0]]), !range [[RNG5:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP0]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
@@ -1701,7 +1701,7 @@ v128_t test_u8x16_sub_sat(v128_t a, v128_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
@@ -1713,7 +1713,7 @@ v128_t test_i8x16_min(v128_t a, v128_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
@@ -1725,7 +1725,7 @@ v128_t test_u8x16_min(v128_t a, v128_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
@@ -1737,7 +1737,7 @@ v128_t test_i8x16_max(v128_t a, v128_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
@@ -1760,7 +1760,7 @@ v128_t test_u8x16_avgr(v128_t a, v128_t b) {
 // CHECK-LABEL: @test_i16x8_abs(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[ABS_I:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP0]], i1 false) #[[ATTR6]]
+// CHECK-NEXT:    [[ABS_I:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP0]], i1 false)
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[ABS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -1933,7 +1933,7 @@ v128_t test_i16x8_mul(v128_t a, v128_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
@@ -1945,7 +1945,7 @@ v128_t test_i16x8_min(v128_t a, v128_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
@@ -1957,7 +1957,7 @@ v128_t test_u16x8_min(v128_t a, v128_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
@@ -1969,7 +1969,7 @@ v128_t test_i16x8_max(v128_t a, v128_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
@@ -1991,7 +1991,7 @@ v128_t test_u16x8_avgr(v128_t a, v128_t b) {
 
 // CHECK-LABEL: @test_i32x4_abs(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ABS_I:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> noundef [[A:%.*]], i1 false) #[[ATTR6]]
+// CHECK-NEXT:    [[ABS_I:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A:%.*]], i1 false)
 // CHECK-NEXT:    ret <4 x i32> [[ABS_I]]
 //
 v128_t test_i32x4_abs(v128_t a) {
@@ -2009,7 +2009,7 @@ v128_t test_i32x4_neg(v128_t a) {
 
 // CHECK-LABEL: @test_i32x4_all_true(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> noundef [[A:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A:%.*]])
 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0
 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
 //
@@ -2019,7 +2019,7 @@ bool test_i32x4_all_true(v128_t a) {
 
 // CHECK-LABEL: @test_i32x4_bitmask(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> noundef [[A:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A:%.*]])
 // CHECK-NEXT:    ret i32 [[TMP0]]
 //
 uint32_t test_i32x4_bitmask(v128_t a) {
@@ -2028,8 +2028,8 @@ uint32_t test_i32x4_bitmask(v128_t a) {
 
 // CHECK-LABEL: @test_i32x4_shl(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[B:%.*]], 31
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[AND_I]], i64 0
+// CHECK-NEXT:    [[REM_I:%.*]] = and i32 [[B:%.*]], 31
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[REM_I]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL_I:%.*]] = shl <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
 // CHECK-NEXT:    ret <4 x i32> [[SHL_I]]
@@ -2040,8 +2040,8 @@ v128_t test_i32x4_shl(v128_t a, uint32_t b) {
 
 // CHECK-LABEL: @test_i32x4_shr(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[B:%.*]], 31
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[AND_I]], i64 0
+// CHECK-NEXT:    [[REM_I:%.*]] = and i32 [[B:%.*]], 31
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[REM_I]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR_I:%.*]] = ashr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
 // CHECK-NEXT:    ret <4 x i32> [[SHR_I]]
@@ -2052,8 +2052,8 @@ v128_t test_i32x4_shr(v128_t a, uint32_t b) {
 
 // CHECK-LABEL: @test_u32x4_shr(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[B:%.*]], 31
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[AND_I]], i64 0
+// CHECK-NEXT:    [[REM_I:%.*]] = and i32 [[B:%.*]], 31
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[REM_I]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR_I:%.*]] = lshr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
 // CHECK-NEXT:    ret <4 x i32> [[SHR_I]]
@@ -2091,7 +2091,7 @@ v128_t test_i32x4_mul(v128_t a, v128_t b) {
 
 // CHECK-LABEL: @test_i32x4_min(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 v128_t test_i32x4_min(v128_t a, v128_t b) {
@@ -2100,7 +2100,7 @@ v128_t test_i32x4_min(v128_t a, v128_t b) {
 
 // CHECK-LABEL: @test_u32x4_min(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 v128_t test_u32x4_min(v128_t a, v128_t b) {
@@ -2109,7 +2109,7 @@ v128_t test_u32x4_min(v128_t a, v128_t b) {
 
 // CHECK-LABEL: @test_i32x4_max(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 v128_t test_i32x4_max(v128_t a, v128_t b) {
@@ -2118,7 +2118,7 @@ v128_t test_i32x4_max(v128_t a, v128_t b) {
 
 // CHECK-LABEL: @test_u32x4_max(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 v128_t test_u32x4_max(v128_t a, v128_t b) {
@@ -2139,7 +2139,7 @@ v128_t test_i32x4_dot_i16x8(v128_t a, v128_t b) {
 // CHECK-LABEL: @test_i64x2_abs(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
-// CHECK-NEXT:    [[ABS_I:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP0]], i1 false) #[[ATTR6]]
+// CHECK-NEXT:    [[ABS_I:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP0]], i1 false)
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[ABS_I]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -2183,8 +2183,8 @@ uint32_t test_i64x2_bitmask(v128_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
 // CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[B:%.*]], 63
-// CHECK-NEXT:    [[AND_I:%.*]] = zext i32 [[TMP1]] to i64
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[AND_I]], i64 0
+// CHECK-NEXT:    [[REM_I:%.*]] = zext i32 [[TMP1]] to i64
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[REM_I]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL_I:%.*]] = shl <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SHL_I]] to <4 x i32>
@@ -2198,8 +2198,8 @@ v128_t test_i64x2_shl(v128_t a, uint32_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
 // CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[B:%.*]], 63
-// CHECK-NEXT:    [[AND_I:%.*]] = zext i32 [[TMP1]] to i64
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[AND_I]], i64 0
+// CHECK-NEXT:    [[REM_I:%.*]] = zext i32 [[TMP1]] to i64
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[REM_I]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR_I:%.*]] = ashr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
@@ -2213,8 +2213,8 @@ v128_t test_i64x2_shr(v128_t a, uint32_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
 // CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[B:%.*]], 63
-// CHECK-NEXT:    [[AND_I:%.*]] = zext i32 [[TMP1]] to i64
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[AND_I]], i64 0
+// CHECK-NEXT:    [[REM_I:%.*]] = zext i32 [[TMP1]] to i64
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[REM_I]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR_I:%.*]] = lshr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
@@ -2800,7 +2800,7 @@ v128_t test_u8x16_narrow_i16x8(v128_t a, v128_t b) {
 
 // CHECK-LABEL: @test_i16x8_narrow_i32x4(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -2810,7 +2810,7 @@ v128_t test_i16x8_narrow_i32x4(v128_t a, v128_t b) {
 
 // CHECK-LABEL: @test_u16x8_narrow_i32x4(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //

diff  --git a/llvm/include/llvm/Transforms/Utils/InferCallsiteAttrs.h b/llvm/include/llvm/Transforms/Utils/InferCallsiteAttrs.h
deleted file mode 100644
index 8c1ef3e0bcbe9..0000000000000
--- a/llvm/include/llvm/Transforms/Utils/InferCallsiteAttrs.h
+++ /dev/null
@@ -1,159 +0,0 @@
-//===- InferCallsiteAttrs.h - Propagate attributes to callsites -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the InferCallsiteAttrs class.
-// This class is used to propagate attributes present in the caller function of
-// the callsite to the arguments/return/callsite itself.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_UTILS_INFERCALLSITEATTRS_H
-#define LLVM_TRANSFORMS_UTILS_INFERCALLSITEATTRS_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-
-namespace llvm {
-class InferCallsiteAttrs {
-  enum : uint8_t { kMaybe = 0, kYes = 1, kNo = 2 };
-
-  // Limit maximum amount of instructions we will check. Everything is O(1) so
-  // relatively high value is okay.
-  static constexpr unsigned kMaxChecks = UINT_MAX;
-
-  struct FunctionInfos {
-    uint8_t LandingOrEHPad : 2;
-  };
-
-  struct BasicBlockInfos {
-    uint8_t Alloca : 2;
-    uint8_t UnkMalloc : 2;
-
-    bool isSet() const { return Alloca != kMaybe && UnkMalloc != kMaybe; }
-  };
-
-  struct CallsiteInfos {
-    uint16_t StoresBetweenReturn : 2;
-    uint16_t LoadsBetweenReturn : 2;
-    uint16_t NonDirectTransferBetweenReturn : 2;
-    uint16_t CallerReturnBasedOnCallsite : 2;
-    uint16_t IsLastInsBeforeReturn : 2;
-    uint16_t PrecedingAlloca : 2;
-    uint16_t PrecedingLocalMalloc : 2;
-  };
-
-  DenseMap<const BasicBlock *, BasicBlockInfos> BBInfos;
-  DenseMap<const Function *, FunctionInfos> FunctionInfos;
-  const Function *Caller;
-  const CallBase *CxtCB;
-
-  CallsiteInfos CurCBInfo;
-  struct FunctionInfos CurFnInfo;
-  bool PreserveCache;
-
-  // Wrapper for attribute checks that check both the context callsite and
-  // actual calling function.
-  bool checkCallerHasFnAttr(Attribute::AttrKind Attr) const {
-    return (CxtCB && CxtCB->hasFnAttr(Attr)) || Caller->hasFnAttribute(Attr);
-  };
-  bool checkCallerHasParamAttr(unsigned ArgIdx,
-                               Attribute::AttrKind Attr) const {
-    return (CxtCB && CxtCB->paramHasAttr(ArgIdx, Attr)) ||
-           Caller->getArg(ArgIdx)->hasAttribute(Attr);
-  };
-  bool checkCallerHasReturnAttr(Attribute::AttrKind Attr) const {
-    return (CxtCB && CxtCB->hasRetAttr(Attr)) || Caller->hasRetAttribute(Attr);
-  };
-
-  bool checkCallerDoesNotThrow() const {
-    return (CxtCB && CxtCB->doesNotThrow()) || Caller->doesNotThrow();
-  }
-  bool checkCallerDoesNotAccessMemory() const {
-    return (CxtCB && CxtCB->doesNotAccessMemory()) ||
-           Caller->doesNotAccessMemory();
-  };
-  bool checkCallerOnlyReadsMemory() const {
-    return (CxtCB && CxtCB->onlyReadsMemory()) || Caller->onlyReadsMemory();
-  };
-  bool checkCallerOnlyWritesMemory() const {
-    return (CxtCB && CxtCB->onlyWritesMemory()) || Caller->onlyWritesMemory();
-  };
-  bool checkCallerOnlyAccessesArgMemory() const {
-    return (CxtCB && CxtCB->onlyAccessesArgMemory()) ||
-           Caller->onlyAccessesArgMemory();
-  };
-  bool checkCallerOnlyAccessesInaccessibleMemory() const {
-    return (CxtCB && CxtCB->onlyAccessesInaccessibleMemory()) ||
-           Caller->onlyAccessesInaccessibleMemory();
-  };
-  bool checkCallerOnlyAccessesInaccessibleMemOrArgMem() const {
-    return (CxtCB && CxtCB->onlyAccessesInaccessibleMemOrArgMem()) ||
-           Caller->onlyAccessesInaccessibleMemOrArgMem();
-  };
-
-  // Check all instructions between callbase and end of basicblock (if that
-  // basic block ends in a return). This will cache the analysis information.
-  // Will break early if condition is met based on arguments.
-  bool checkBetweenCallsiteAndReturn(const CallBase *CB, bool BailOnStore,
-                                     bool BailOnLoad,
-                                     bool BailOnNonDirectTransfer,
-                                     bool BailOnNotReturned);
-
-  // Check all instruction instructions preceding basic blocked (any instruction
-  // that may reach the callsite CB). If conditions are met, can set early
-  // return using BailOn* arguments.
-  bool checkPrecedingBBIns(const CallBase *CB, bool BailOnAlloca,
-                           bool BailOnLocalMalloc);
-
-  // Check all basic blocks for conditions. At the moment only condition is if
-  // landing/EH pad so will store result and break immediately if one is found.
-  // In the future may be extended to check other conditions.
-  bool checkAllBBs(bool BailOnPad);
-
-  // Try to propagate nocapture attribute from caller argument to callsite
-  // arguments.
-  bool tryPropagateNoCapture(CallBase *CB);
-
-  // Try trivial propagations (one where if true for the caller, are
-  // automatically true for the callsite without further analysis).
-  bool tryTrivialPropagations(CallBase *CB);
-
-  // Try propagations of return attributes (nonnull, noundef, etc...)
-  bool tryReturnPropagations(CallBase *CB);
-
-  // Try propagations of memory access attribute (readnone, readonly, etc...).
-  bool tryMemoryPropagations(CallBase *CB);
-
-  // Add attributes to callsite, assumes Caller and CxtCB are setup already
-  bool inferCallsiteAttributesImpl(CallBase *CB);
-
-public:
-  // Set PreserveCacheBetweenFunctions to keep cached information on
-  // functions/basicblocks between calls processFunction.
-  InferCallsiteAttrs(bool PreserveCacheBetweenFunctions = false)
-      : PreserveCache(PreserveCacheBetweenFunctions) {}
-
-  // Call if either 1) BB instructions have changed which may invalidate some of
-  // the prior analysis or 2) all previous work no longer applies (in which case
-  // clearing the cache improves performance).
-  void resetCache();
-
-  // Add attributes to callsites based on the function is called in (or by
-  // setting CxtCallsite the exact callsite of the callsite).
-  bool inferCallsiteAttributes(CallBase *CB,
-                               const CallBase *CxtCallsite = nullptr);
-
-  // Process all callsites in Function ParentFunc. This is more efficient that
-  // calling inferCallsiteAttributes in a loop as it 1) avoids some unnecessary
-  // cache lookups and 2) does some analysis while searching for callsites.
-  bool processFunction(Function *ParentFunc,
-                       const CallBase *ParentCallsite = nullptr);
-};
-} // namespace llvm
-#endif

diff  --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 86dde3a03f523..14a2b00b1bbea 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -57,7 +57,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Utils/InferCallsiteAttrs.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
 #include <iterator>
@@ -69,7 +68,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "function-attrs"
 
-
 STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute");
 STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
 STATISTIC(NumReturned, "Number of arguments marked returned");
@@ -640,7 +638,7 @@ determinePointerAccessAttrs(Argument *A,
             if (Visited.insert(&UU).second)
               Worklist.push_back(&UU);
       }
-
+      
       if (CB.doesNotAccessMemory())
         continue;
 
@@ -1747,7 +1745,7 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter) {
     addNoRecurseAttrs(Nodes.SCCNodes, Changed);
   }
 
-  // Infer the maximal set of attributes from the ones we've inferred
+  // Finally, infer the maximal set of attributes from the ones we've inferred
   // above.  This is handling the cases where one attribute on a signature
   // implies another, but for implementation reasons the inference rule for
   // the later is missing (or simply less sophisticated).
@@ -1756,14 +1754,6 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter) {
       if (inferAttributesFromOthers(*F))
         Changed.insert(F);
 
-  // Finally, propagate the functions attributes to all the callsites inside of
-  // it.
-  InferCallsiteAttrs ICA;
-  for (Function *F : Nodes.SCCNodes)
-    if (F)
-      if (ICA.processFunction(F))
-        Changed.insert(F);
-
   return Changed;
 }
 

diff  --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index b38f8c7dd83ab..a870071f3f641 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -30,7 +30,6 @@ add_llvm_component_library(LLVMTransformUtils
   GlobalStatus.cpp
   GuardUtils.cpp
   HelloWorld.cpp
-  InferCallsiteAttrs.cpp
   InlineFunction.cpp
   InjectTLIMappings.cpp
   InstructionNamer.cpp

diff  --git a/llvm/lib/Transforms/Utils/InferCallsiteAttrs.cpp b/llvm/lib/Transforms/Utils/InferCallsiteAttrs.cpp
deleted file mode 100644
index 4f14fee4fae43..0000000000000
--- a/llvm/lib/Transforms/Utils/InferCallsiteAttrs.cpp
+++ /dev/null
@@ -1,725 +0,0 @@
-//===- InferCallsiteAttrs.cpp - Propagate attributes to callsites ---------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the InferCallsiteAttrs class.
-//
-//===----------------------------------------------------------------------===//
-
-
-#include "llvm/Transforms/Utils/InferCallsiteAttrs.h"
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "infer-callsite-attrs"
-
-// Helper to add parameter attributes to callsite CB.
-static bool addCallsiteParamAttributes(CallBase *CB,
-                                       const SmallVector<unsigned> &ArgNos,
-                                       Attribute::AttrKind Attr) {
-  if (ArgNos.empty())
-    return false;
-
-  AttributeList AS = CB->getAttributes();
-  LLVMContext &Ctx = CB->getContext();
-  AS = AS.addParamAttribute(Ctx, ArgNos, Attribute::get(Ctx, Attr));
-
-  CB->setAttributes(AS);
-  return true;
-}
-
-// Helper to determine if a callsite is malloc like and doesn't provably
-// escape.
-static bool isCallUnkMallocLike(const Value *V) {
-  auto *MCB = dyn_cast<CallBase>(V);
-  if (MCB == nullptr)
-    return false;
-
-  return MCB->returnDoesNotAlias();
-}
-
-static bool isCallLocalMallocLike(const Value *V, const CallBase *CB) {
-  if (!isCallUnkMallocLike(V))
-    return false;
-  auto *RI = dyn_cast<ReturnInst>(CB->getParent()->getTerminator());
-  return RI == nullptr || RI->getReturnValue() != V;
-}
-
-// Check all instructions between callbase and end of basicblock (if that
-// basic block ends in a return). This will cache the analysis information.
-// Will break early if condition is met based on arguments.
-bool InferCallsiteAttrs::checkBetweenCallsiteAndReturn(
-    const CallBase *CB, bool BailOnStore, bool BailOnLoad,
-    bool BailOnNonDirectTransfer, bool BailOnNotReturned) {
-  const BasicBlock *BB = CB->getParent();
-  auto *RI = dyn_cast<ReturnInst>(BB->getTerminator());
-
-  if (RI == nullptr)
-    return false;
-
-  if (RI->getReturnValue() == CB)
-    CurCBInfo.CallerReturnBasedOnCallsite = kYes;
-  else {
-    CurCBInfo.CallerReturnBasedOnCallsite = kNo;
-    if (BailOnNotReturned)
-      return false;
-  }
-
-  if (RI == CB->getNextNode()) {
-    CurCBInfo.IsLastInsBeforeReturn = kYes;
-    CurCBInfo.StoresBetweenReturn = kNo;
-    CurCBInfo.LoadsBetweenReturn = kNo;
-    CurCBInfo.NonDirectTransferBetweenReturn = kNo;
-    return true;
-  }
-  CurCBInfo.IsLastInsBeforeReturn = kNo;
-
-  if (BailOnStore && CurCBInfo.StoresBetweenReturn == kYes)
-    return false;
-  if (BailOnLoad && CurCBInfo.LoadsBetweenReturn == kYes)
-    return false;
-  if (BailOnNonDirectTransfer &&
-      CurCBInfo.NonDirectTransferBetweenReturn == kYes)
-    return false;
-
-  if (CurCBInfo.StoresBetweenReturn != kMaybe &&
-      CurCBInfo.LoadsBetweenReturn != kMaybe &&
-      CurCBInfo.NonDirectTransferBetweenReturn != kMaybe)
-    return true;
-
-  unsigned Cnt = 0;
-  for (const Instruction *Ins = CB->getNextNode(); Ins && Ins != RI;
-       Ins = Ins->getNextNode()) {
-
-    if (Cnt++ >= kMaxChecks)
-      return false;
-
-    if (Ins->mayWriteToMemory()) {
-      CurCBInfo.StoresBetweenReturn = kYes;
-      if (BailOnStore)
-        return false;
-    }
-
-    if (Ins->mayReadFromMemory()) {
-      CurCBInfo.LoadsBetweenReturn = kYes;
-      if (BailOnLoad)
-        return false;
-    }
-
-    if (!isGuaranteedToTransferExecutionToSuccessor(Ins)) {
-      CurCBInfo.NonDirectTransferBetweenReturn = kYes;
-      if (BailOnNonDirectTransfer)
-        return false;
-    }
-  }
-
-  if (CurCBInfo.StoresBetweenReturn == kMaybe)
-    CurCBInfo.StoresBetweenReturn = kNo;
-  if (CurCBInfo.LoadsBetweenReturn == kMaybe)
-    CurCBInfo.LoadsBetweenReturn = kNo;
-  if (CurCBInfo.NonDirectTransferBetweenReturn == kMaybe)
-    CurCBInfo.NonDirectTransferBetweenReturn = kNo;
-
-  return true;
-}
-
-// Check all instruction instructions preceding basic blocked (any instruction
-// that may reach the callsite CB). If conditions are met, can set early
-// return using BailOn* arguments.
-bool InferCallsiteAttrs::checkPrecedingBBIns(const CallBase *CB,
-                                             bool BailOnAlloca,
-                                             bool BailOnLocalMalloc) {
-
-  if (BailOnAlloca && CurCBInfo.PrecedingAlloca == kYes)
-    return false;
-  if (BailOnLocalMalloc && CurCBInfo.PrecedingLocalMalloc == kYes)
-    return false;
-
-  if (CurCBInfo.PrecedingAlloca != kMaybe &&
-      CurCBInfo.PrecedingLocalMalloc != kMaybe)
-    return true;
-
-  SmallPtrSet<const BasicBlock *, 16> AllPreds;
-  SmallVector<const BasicBlock *> Preds;
-  unsigned Cnt = 0;
-  AllPreds.insert(CB->getParent());
-  Preds.push_back(CB->getParent());
-
-  auto WorklistCont = [&](const BasicBlock *CurBB) {
-    for (const BasicBlock *Pred : predecessors(CurBB))
-      if (AllPreds.insert(Pred).second)
-        Preds.push_back(Pred);
-  };
-
-  while (!Preds.empty()) {
-    const BasicBlock *CurBB = Preds.pop_back_val();
-    BasicBlockInfos &BBInfo = BBInfos[CurBB];
-
-    if (BBInfo.Alloca == kNo && BBInfo.UnkMalloc == kNo) {
-      WorklistCont(CurBB);
-      continue;
-    }
-
-    auto ProcessHasAlloca = [this, BBInfo, BailOnAlloca]() {
-      if (BBInfo.Alloca == kYes) {
-        CurCBInfo.PrecedingAlloca = kYes;
-        if (BailOnAlloca)
-          return false;
-      }
-      return true;
-    };
-
-    auto ProcessHasUnkMalloc = [this, BBInfo, BailOnLocalMalloc,
-                                CB](const BasicBlock *BB,
-                                    const Value *V = nullptr) {
-      // We check beyond just if there is a malloc. We only set local malloc if
-      // that malloc is not guranteed to be made visible outside of the caller.
-      // We don't exhaustively check (see if the malloc was stored to a ptr or
-      // global variable), just check if its returned from our callsites basic
-      // block.
-      // TODO: We could also check if the malloc escapes in other ways than
-      // return (like stored to a pointer or global), especially if we are
-      // already iterating through all the instructions.
-      if (BBInfo.UnkMalloc == kYes) {
-        if (V == nullptr) {
-          // We only know there is a malloc instruction, not where so iterate
-          // and find.
-          for (const Value &Val : *BB) {
-            if (isCallLocalMallocLike(&Val, CB)) {
-              CurCBInfo.PrecedingLocalMalloc = kYes;
-              break;
-            }
-          }
-        } else if (isCallLocalMallocLike(V, CB)) {
-          CurCBInfo.PrecedingLocalMalloc = kYes;
-        }
-
-        if (BailOnLocalMalloc && CurCBInfo.PrecedingLocalMalloc == kYes)
-          return false;
-      }
-      return true;
-    };
-
-    if (!ProcessHasAlloca() || !ProcessHasUnkMalloc(CurBB))
-      return false;
-
-    bool EarlyOut = false;
-    // Check all instructions in current BB for an alloca/leaked malloc.
-    for (const Value &V : *CurBB) {
-      if (&V == CB) {
-        EarlyOut = CurCBInfo.IsLastInsBeforeReturn != kYes;
-        break;
-      }
-
-      // If we reach max checks and can't rule out alloca/leaked malloc case
-      // fail.
-      if (Cnt++ >= kMaxChecks)
-        return false;
-
-      if (isa<AllocaInst>(&V))
-        BBInfo.Alloca = kYes;
-
-      if (isCallUnkMallocLike(&V))
-        BBInfo.UnkMalloc = kYes;
-
-      if (!ProcessHasAlloca() || !ProcessHasUnkMalloc(CurBB, &V))
-        return false;
-    }
-
-    if (!EarlyOut) {
-      if (BBInfo.Alloca == kMaybe)
-        BBInfo.Alloca = kNo;
-
-      if (BBInfo.UnkMalloc == kMaybe)
-        BBInfo.UnkMalloc = kNo;
-    }
-
-    WorklistCont(CurBB);
-  }
-
-  if (CurCBInfo.PrecedingAlloca == kMaybe)
-    CurCBInfo.PrecedingAlloca = kNo;
-  if (CurCBInfo.PrecedingLocalMalloc == kMaybe)
-    CurCBInfo.PrecedingLocalMalloc = kNo;
-  return true;
-}
-
-// Check all basic blocks for conditions. At the moment only condition is if
-// landing/EH pad so will store result and break immediately if one is found.
-// In the future may be extended to check other conditions.
-bool InferCallsiteAttrs::checkAllBBs(bool BailOnPad) {
-  if (BailOnPad && CurFnInfo.LandingOrEHPad == kYes)
-    return false;
-
-  if (CurFnInfo.LandingOrEHPad != kMaybe)
-    return false;
-
-  for (const BasicBlock &CurBB : *Caller) {
-    if (CurBB.isEHPad() || CurBB.isLandingPad()) {
-      CurFnInfo.LandingOrEHPad = kYes;
-      if (BailOnPad)
-        return false;
-      // Nothing else to set/check
-      break;
-    }
-  }
-
-  if (CurFnInfo.LandingOrEHPad == kMaybe)
-    CurFnInfo.LandingOrEHPad = kNo;
-  return true;
-}
-// Try to propagate nocapture attribute from caller argument to callsite
-// arguments.
-bool InferCallsiteAttrs::tryPropagateNoCapture(CallBase *CB) {
-
-  if (!isa<CallInst>(CB))
-    return false;
-
-  SmallVector<unsigned> NoCaptureArgs;
-
-  // If this callsite is to a readonly function that doesn't throw then the
-  // only way to the pointer to be captured is through the return value. If
-  // the return type is void or the return value of this callsite is unused,
-  // then all the pointer parameters at this callsite must be nocapture. NB:
-  // This is a slight strengthening of the case done in the FunctionAttrs pass
-  // which has the same logic but only for void function. At specific
-  // callsites we can do non-void function if the return value is unused.
-  bool IsAlwaysNoCapture = CB->onlyReadsMemory() && CB->doesNotThrow() &&
-                           (CB->getType()->isVoidTy() || CB->use_empty());
-  if (IsAlwaysNoCapture) {
-    unsigned ArgN = 0;
-    for (Value *V : CB->args()) {
-      if (V->getType()->isPointerTy() &&
-          !CB->paramHasAttr(ArgN, Attribute::NoCapture))
-        NoCaptureArgs.push_back(ArgN);
-      ++ArgN;
-    }
-
-    return addCallsiteParamAttributes(CB, NoCaptureArgs, Attribute::NoCapture);
-  }
-
-  // If this is not trivially nocapture, then we propagate a nocapture
-  // argument if the callsite meets the following requirements:
-  //
-  //    1) The callsite is in a basic block that ends with a return
-  //       statement.
-  //       the callsite and return statement.
-  //    2) Between the callsite the end of its basic block there are no
-  //       may-write instructions.
-  //    3) The return value of the callsite is not used (directly or
-  //       indirectly) as the address of a may-read instruction.
-  //    4) There are allocas or leaked (not freed or returned) mallocs
-  //       reachable from the callsite.
-  //    5) The callsite/caller are nothrow OR there is no landing padd in the
-  //       caller.
-  //
-  // These requirements are intentionally over conservative. We are only
-  // trying to catch relatively trivial cases.
-  //
-  // Requirements 1 & 2 are there to ensure that after the callsite has
-  // returned, the state of any captured in memory pointers cannot change.
-  // This implies that if the caller has any nocapture in memory gurantees,
-  // that state has been reached by the end of the callsite.
-  //
-  // Requirements 3 & 4 are to cover cases where pointers could escape the
-  // callsite (but not the caller) through non-dead code. Any return value
-  // thats loaded from (or used to create a pointer that is loaded from) could
-  // have derived from an argument. Finally, allocas/leaked mallocs in general
-  // are 
diff icult (so we avoid them entirely). Callsites can arbitrarily
-  // store pointers in allocas for use later without violating a nocapture
-  // gurantee by the caller, as the allocas are torn down at caller return.
-  // Likewise a leaked malloc would not be accessible outside of the caller,
-  // but could still be accessible after the callsite. There are a variety of
-  // complex cases involving allocas/leaked mallocs. For simplicity, if we see
-  // either we simply fail.
-  //
-  // Requirement 5 is to cover the last way to escape to occur. If the
-  // callsite/caller is nothrow its a non-issue. If the callsite may throw,
-  // then a method of capture is through an exception. If the caller has no
-  // landing padd to catch this exception, then the exception state will be
-  // visible outside of the caller so any gurantees about nocapture made by
-  // the caller will apply to the callsites throw. If the caller has a landing
-  // padd, its possible for the callsite to capture a pointer in a throw that
-  // is later cleared by the caller.
-
-  // Check easy O(1) stuff that can quickly rule out this callsite.
-  const BasicBlock *BB = CB->getParent();
-  // Make sure this BB ends in a return (Requirement 1).
-  auto *RI = dyn_cast<ReturnInst>(BB->getTerminator());
-  if (RI == nullptr)
-    return false;
-
-  // Req 2 fails.
-  if (CurCBInfo.StoresBetweenReturn == kYes)
-    return false;
-
-  // Req 4 fails.
-  if (CurCBInfo.PrecedingAlloca == kYes ||
-      CurCBInfo.PrecedingLocalMalloc == kYes)
-    return false;
-
-  bool MayThrow = !(CB->doesNotThrow() || checkCallerDoesNotThrow());
-  // Req 5 fails.
-  if (MayThrow && CurFnInfo.LandingOrEHPad == kYes)
-    return false;
-
-  SmallPtrSet<Value *, 8> NoCaptureParentArguments;
-
-  // See if caller has any nocapture arguments we may be able to propagate
-  // attributes from.
-  for (unsigned I = 0; I < Caller->arg_size(); ++I)
-    if (checkCallerHasParamAttr(I, Attribute::NoCapture))
-      NoCaptureParentArguments.insert(Caller->getArg(I));
-
-  unsigned ArgN = 0;
-  for (Value *V : CB->args()) {
-    // See if this callsite argument is missing nocapture and its propagatable
-    // (nocapture in the caller).
-    if (!CB->paramHasAttr(ArgN, Attribute::NoCapture) &&
-        NoCaptureParentArguments.contains(V))
-      NoCaptureArgs.push_back(ArgN);
-    ++ArgN;
-  }
-
-  // No point to do more expensive analysis if we won't be able to do anything
-  // with it.
-  if (NoCaptureArgs.empty())
-    return false;
-
-  // Check that between callsite and return we can't changed capture state
-  // (BailOnStore). NB: we can accept non-directtransfer instructions as long as
-  // the caller does not having any landing pads. If the caller has no landing
-  // pad, then any exception/interrupt/etc... will leave the callers scope and
-  // thus any caller nocapture gurantee will apply.
-  if (!checkBetweenCallsiteAndReturn(CB, /*BailOnStore*/ true,
-                                     /*BailOnLoad*/ false,
-                                     /*BailOnNonDirectTransfer*/ false,
-                                     /*BailOnNotReturned*/ false))
-    return false;
-
-  // We need to check if the load is from the return of this callsite. If so
-  // then a pointer may have been return captured (Req 3).
-  if (CurCBInfo.LoadsBetweenReturn == kYes) {
-
-    // If the callsite return is used as the callers return, then the caller's
-    // no-capture gurantee includes the callsites return so we don't need to
-    // check the actual loads (NB: We fail on NonDirectTransfer between callsite
-    // and return, so the callsite's return MUST reach the return
-    // instruction).
-    if (CurCBInfo.CallerReturnBasedOnCallsite != kYes) {
-
-      // Figure out of the load is derived from the return of the callsite. If
-      // so we assume its a captured pointer.
-      SmallPtrSet<const Value *, 16> DerivedFromReturn;
-      for (const Value *U : CB->uses())
-        DerivedFromReturn.insert(U);
-
-      unsigned Cnt = 0;
-      for (const Instruction *Ins = CB; Ins && Ins != RI;
-           Ins = Ins->getNextNode()) {
-        if (Cnt++ >= kMaxChecks)
-          return false;
-
-        for (const Value *U : Ins->operands()) {
-          if (DerivedFromReturn.contains(U)) {
-            DerivedFromReturn.insert(Ins);
-            break;
-          }
-        }
-
-        if (Ins->mayReadFromMemory()) {
-
-          // TODO: We could do a bit more analysis and check if Ins is used to
-          // derived the Caller return value at all, rather than just checking
-          // if they are equal.
-          if ((!isa<LoadInst>(Ins) ||
-               cast<LoadInst>(Ins)->getPointerOperand() != RI) &&
-              DerivedFromReturn.contains(Ins))
-            return false;
-        }
-      }
-    }
-  }
-
-  // Check all predecessors (basic blocks from which an alloca or leaked
-  // malloc may be able to reach this callsite). We are being incredibly//
-  // conservative here. We could likely skip the alloca/leaked malloc search
-  // in a few cases. 1) If the callsite is the last instruction before the
-  // return or if there are no may-read instructions between the callsite and
-  // the return.  2) If there are possible stores to the alloca/leaked malloc
-  // that may reach the callsite its probably also safe. And/Or 3) If the
-  // callsite is readonly it could never capture in memory so these
-  // are non factor concerns. For now stay conservative, but over
-  // time these optimizations can be added.
-  if (!checkPrecedingBBIns(CB, /*BailOnAlloca*/ true,
-                           /*BailOnLocalMalloc*/ true))
-    return false;
-
-  if (MayThrow && !checkAllBBs(/*BailOnPad*/ true))
-    return false;
-
-  return addCallsiteParamAttributes(CB, NoCaptureArgs, Attribute::NoCapture);
-}
-
-// Try trivial propagations (one where if true for the caller, are
-// automatically true for the callsite without further analysis).
-bool InferCallsiteAttrs::tryTrivialPropagations(CallBase *CB) {
-  bool Changed = false;
-  const std::array CallerFnAttrPropagations = {
-      Attribute::MustProgress, Attribute::WillReturn, Attribute::NoSync};
-  for (const Attribute::AttrKind Attr : CallerFnAttrPropagations) {
-    if (checkCallerHasFnAttr(Attr) && !CB->hasFnAttr(Attr)) {
-      Changed = true;
-      CB->addFnAttr(Attr);
-    }
-  }
-
-  const std::array CallerParamAttrPropagations = {
-      Attribute::NoUndef,  Attribute::NonNull,  Attribute::NoFree,
-      Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly};
-
-  for (const Attribute::AttrKind Attr : CallerParamAttrPropagations) {
-    SmallPtrSet<Value *, 8> CallerArgs;
-    SmallVector<unsigned> ArgNosAttr;
-    for (unsigned I = 0; I < Caller->arg_size(); ++I)
-      if (checkCallerHasParamAttr(I, Attr))
-        CallerArgs.insert(Caller->getArg(I));
-
-    unsigned ArgN = 0;
-    // TODO: For the readnone, readonly, and writeonly attributes, we may be
-    // able to inherit from the callsite params underlying object if that
-    // underlying object is an argument.
-    for (Value *V : CB->args()) {
-      if (!CB->paramHasAttr(ArgN, Attr) && CallerArgs.contains(V))
-        ArgNosAttr.push_back(ArgN);
-      ArgN++;
-    }
-
-    Changed |= addCallsiteParamAttributes(CB, ArgNosAttr, Attr);
-  }
-
-  return Changed;
-}
-
-// Try propagations of return attributes (nonnull, noundef, etc...)
-bool InferCallsiteAttrs::tryReturnPropagations(CallBase *CB) {
-  std::optional<bool> CallsiteReturnMustBeCallerReturnCached;
-  auto CallsiteReturnMustBeCallerReturn = [&]() {
-    if (CallsiteReturnMustBeCallerReturnCached)
-      return *CallsiteReturnMustBeCallerReturnCached;
-    // We can only propagate return attribute if we are certain this
-    // callsites return is used as the caller return (in it's basic
-    // block).
-    CallsiteReturnMustBeCallerReturnCached = checkBetweenCallsiteAndReturn(
-        CB, /*BailOnStore*/ false, /*BailOnLoad*/ false,
-        /*BailOnNonDirectTransfer*/ true, /*BailOnNotReturned*/ true);
-    return *CallsiteReturnMustBeCallerReturnCached;
-  };
-
-  bool Changed = false;
-  const std::array CallerReturnAttrPropagations = {Attribute::NoUndef,
-                                                   Attribute::NonNull};
-  for (const Attribute::AttrKind Attr : CallerReturnAttrPropagations) {
-    if (checkCallerHasReturnAttr(Attr) && !CB->hasRetAttr(Attr)) {
-      // Wait until we know we actually need it to do potentially expensive
-      // analysis.
-      if (!CallsiteReturnMustBeCallerReturn())
-        return Changed;
-      CB->addRetAttr(Attr);
-      Changed = true;
-    }
-  }
-  return Changed;
-}
-
-// Try propagations of memory access attribute (readnone, readonly, etc...).
-bool InferCallsiteAttrs::tryMemoryPropagations(CallBase *CB) {
-  std::optional<bool> MayHaveLocalMemoryArgsCached;
-  std::optional<bool> MayHavePrecedingLocalMemoryCached;
-
-  auto MayHavePrecedingLocalMemory = [&]() {
-    if (MayHavePrecedingLocalMemoryCached)
-      return *MayHavePrecedingLocalMemoryCached;
-    MayHavePrecedingLocalMemoryCached =
-        checkPrecedingBBIns(CB, /*BailOnAlloca*/ true,
-                            /*BailOnLocalMalloc*/ true);
-    return *MayHavePrecedingLocalMemoryCached;
-  };
-
-  auto MayHaveLocalMemoryArgs = [&]() {
-    if (MayHaveLocalMemoryArgsCached)
-      return *MayHaveLocalMemoryArgsCached;
-
-    // If there are local memory regions that can reach this callsite,
-    // then check all arguments. If we can't trace them back to some
-    // value that is also visible outside the caller fail.
-    for (Value *V : CB->args()) {
-      Value *UnderlyingObj = getUnderlyingObject(V);
-      // TODO: We probably don't need to bail entirely here. We could still
-      // set parameter attributes for the callsite for the arguments that do
-      // meet these conditions.
-      if (!isa<Argument>(UnderlyingObj) && !isa<GlobalValue>(UnderlyingObj)) {
-        // Don't do potentially very expensive preceding BB check unless
-        // cheaper getUnderlyingObject check fails to prove what we need.
-        // TODO: Does local malloc inherit parent memory access constraints
-        // or is it like alloca? If the former set BailOnLocalMalloc to
-        // false.
-        MayHaveLocalMemoryArgsCached = MayHavePrecedingLocalMemory();
-        return *MayHaveLocalMemoryArgsCached;
-      }
-    }
-    MayHaveLocalMemoryArgsCached = true;
-    return true;
-  };
-
-  bool Changed = false;
-  // If the callsite has no local memory visible to it, then it shares
-  // constraints with the caller as any pointer is has access too is shared
-  // with the caller. For readnone, readonly, and writeonly simple not alloca
-  // args is enough to propagate. For the ArgMemory attributes, we need
-  // absolutely no local memory as otherwise we could nest caller local memory
-  // in argument pointers then use that derefed caller local memory in the
-  // callsite violating the constraint.
-  if (checkCallerDoesNotAccessMemory() && !CB->doesNotAccessMemory()) {
-    // Wait until we know we actually need it to do potentially expensive
-    // analysis.
-    if (!MayHaveLocalMemoryArgs())
-      return Changed;
-    CB->setDoesNotAccessMemory();
-    Changed = true;
-  }
-  if (checkCallerOnlyReadsMemory() && !CB->onlyReadsMemory()) {
-    if (!MayHaveLocalMemoryArgs())
-      return Changed;
-    CB->setOnlyReadsMemory();
-    Changed = true;
-  }
-  if (checkCallerOnlyWritesMemory() && !CB->onlyWritesMemory()) {
-    if (!MayHaveLocalMemoryArgs())
-      return Changed;
-    CB->setOnlyWritesMemory();
-    Changed = true;
-  }
-
-  if (checkCallerOnlyAccessesArgMemory() && !CB->onlyAccessesArgMemory()) {
-    // Switch to heavier check here.
-    // TODO: We may be able to do some analysis of if any of the allocas are
-    // ever stored anywhere (if thats not the case, then just argument memory
-    // is enough again).
-    if (!MayHavePrecedingLocalMemory())
-      return Changed;
-    CB->setOnlyAccessesArgMemory();
-    Changed = true;
-  }
-  if (checkCallerOnlyAccessesInaccessibleMemory() &&
-      !CB->onlyAccessesInaccessibleMemory()) {
-    if (!MayHavePrecedingLocalMemory())
-      return Changed;
-    CB->setOnlyAccessesInaccessibleMemory();
-    Changed = true;
-  }
-  if (checkCallerOnlyAccessesInaccessibleMemOrArgMem() &&
-      !CB->onlyAccessesInaccessibleMemOrArgMem()) {
-    if (!MayHavePrecedingLocalMemory())
-      return Changed;
-    CB->setOnlyAccessesInaccessibleMemOrArgMem();
-    Changed = true;
-  }
-
-  return Changed;
-}
-
-// Add attributes to callsite, assumes Caller and CxtCB are setup already
-bool InferCallsiteAttrs::inferCallsiteAttributesImpl(CallBase *CB) {
-
-  memset(&CurCBInfo, kMaybe, sizeof(CurCBInfo));
-  bool Changed = tryPropagateNoCapture(CB);
-  Changed |= tryTrivialPropagations(CB);
-  Changed |= tryReturnPropagations(CB);
-  Changed |= tryMemoryPropagations(CB);
-  return Changed;
-}
-
-void InferCallsiteAttrs::resetCache() {
-  BBInfos.clear();
-  FunctionInfos.clear();
-}
-
-// Add attributes to callsites based on the function is called in (or by
-// setting CxtCallsite the exact callsite of the callsite).
-bool InferCallsiteAttrs::inferCallsiteAttributes(CallBase *CB,
-                                                 const CallBase *CxtCallsite) {
-  const BasicBlock *BB = CB->getParent();
-  assert(BB && "BasicBlock should never be null in this context");
-  const Function *PF = BB->getParent();
-  assert(PF && "Function should never but null in this context");
-
-  // Setup caching state
-
-  CurFnInfo = FunctionInfos[PF];
-  Caller = PF;
-  CxtCB = CxtCallsite;
-
-  bool Changed = inferCallsiteAttributesImpl(CB);
-
-  return Changed;
-}
-
-// Process all callsites in Function ParentFunc. This is more efficient that
-// calling inferCallsiteAttributes in a loop as it 1) avoids some unnecessary
-// cache lookups and 2) does some analysis while searching for callsites.
-bool InferCallsiteAttrs::processFunction(Function *ParentFunc,
-                                         const CallBase *ParentCallsite) {
-  bool Changed = false;
-  if (PreserveCache)
-    CurFnInfo = FunctionInfos[ParentFunc];
-  else
-    memset(&CurFnInfo, kMaybe, sizeof(CurFnInfo));
-  Caller = ParentFunc;
-  CxtCB = ParentCallsite;
-  for (BasicBlock &BB : *ParentFunc) {
-    if (BB.isEHPad() || BB.isLandingPad())
-      CurFnInfo.LandingOrEHPad = kYes;
-    BasicBlockInfos &BBInfo = BBInfos[&BB];
-    for (Value &V : BB) {
-      if (isa<AllocaInst>(&V))
-        BBInfo.Alloca = kYes;
-
-      if (isCallUnkMallocLike(&V))
-        BBInfo.UnkMalloc = kYes;
-
-      if (auto *CB = dyn_cast<CallBase>(&V))
-        Changed |= inferCallsiteAttributesImpl(CB);
-    }
-
-    if (BBInfo.Alloca == kMaybe)
-      BBInfo.Alloca = kNo;
-
-    if (BBInfo.UnkMalloc == kMaybe)
-      BBInfo.UnkMalloc = kNo;
-  }
-  if (CurFnInfo.LandingOrEHPad == kMaybe)
-    CurFnInfo.LandingOrEHPad = kNo;
-
-  if (PreserveCache)
-    FunctionInfos[ParentFunc] = CurFnInfo;
-  else
-    resetCache();
-
-  return Changed;
-}

diff  --git a/llvm/test/Other/cgscc-devirt-iteration.ll b/llvm/test/Other/cgscc-devirt-iteration.ll
index 4fbc6bb3fb955..54229461cb394 100644
--- a/llvm/test/Other/cgscc-devirt-iteration.ll
+++ b/llvm/test/Other/cgscc-devirt-iteration.ll
@@ -67,7 +67,7 @@ entry:
   ; This indirect call is the first to be resolved, allowing us to deduce
   ; readonly but not (yet) readnone.
   call void %f1(ptr %ignore)
-; CHECK: call void @readnone_with_arg(ptr
+; CHECK: call void @readnone_with_arg(ptr %ignore)
 
   ; Bogus call to test2_b to make this a cycle.
   call void @test2_b()

diff  --git a/llvm/test/Transforms/FunctionAttrs/nonnull.ll b/llvm/test/Transforms/FunctionAttrs/nonnull.ll
index 04bf5beb59f9c..17a4155319b60 100644
--- a/llvm/test/Transforms/FunctionAttrs/nonnull.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nonnull.ll
@@ -357,7 +357,7 @@ define void @parent1(ptr %a, ptr %b, ptr %c) {
 define void @parent2(ptr %a, ptr %b, ptr %c) {
 ; FNATTR-LABEL: @parent2(ptr nonnull %a, ptr nonnull %b, ptr nonnull %c)
 ; FNATTR-NEXT:    call void @use3nonnull(ptr %b, ptr %c, ptr %a)
-; FNATTR-NEXT:    call void @use3(ptr nonnull %c, ptr nonnull %a, ptr nonnull %b)
+; FNATTR-NEXT:    call void @use3(ptr %c, ptr %a, ptr %b)
 
 
 ; FNATTR-NEXT:    ret void
@@ -371,7 +371,7 @@ define void @parent2(ptr %a, ptr %b, ptr %c) {
 define void @parent3(ptr %a, ptr %b, ptr %c) {
 ; FNATTR-LABEL: @parent3(ptr nonnull %a, ptr %b, ptr %c)
 ; FNATTR-NEXT:    call void @use1nonnull(ptr %a)
-; FNATTR-NEXT:    call void @use3(ptr %c, ptr %b, ptr nonnull %a)
+; FNATTR-NEXT:    call void @use3(ptr %c, ptr %b, ptr %a)
 
 
 ; FNATTR-NEXT:  ret void
@@ -437,7 +437,7 @@ define i8 @parent6(ptr %a, ptr %b) {
 
 define i8 @parent7(ptr %a) {
 ; FNATTR-LABEL: @parent7(ptr nonnull %a)
-; FNATTR-NEXT:    [[RET:%.*]] = call i8 @use1safecall(ptr nonnull %a)
+; FNATTR-NEXT:    [[RET:%.*]] = call i8 @use1safecall(ptr %a)
 ; FNATTR-NEXT:    call void @use1nonnull(ptr %a)
 
 

diff  --git a/llvm/test/Transforms/FunctionAttrs/readattrs.ll b/llvm/test/Transforms/FunctionAttrs/readattrs.ll
index f36113b775189..94ffde15d338a 100644
--- a/llvm/test/Transforms/FunctionAttrs/readattrs.ll
+++ b/llvm/test/Transforms/FunctionAttrs/readattrs.ll
@@ -45,7 +45,7 @@ define void @test4_2(ptr %p) {
 ; CHECK: Function Attrs: nofree memory(read)
 ; CHECK-LABEL: define {{[^@]+}}@test4_2
 ; CHECK-SAME: (ptr nocapture readonly [[P:%.*]]) #[[ATTR3:[0-9]+]] {
-; CHECK-NEXT:    call void @test4_1(ptr readonly [[P]])
+; CHECK-NEXT:    call void @test4_1(ptr [[P]])
 ; CHECK-NEXT:    ret void
 ;
   call void @test4_1(ptr %p)
@@ -115,7 +115,7 @@ define void @test8_2(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@test8_2
 ; CHECK-SAME: (ptr writeonly [[P:%.*]]) #[[ATTR4]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = call ptr @test8_1(ptr writeonly [[P]])
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @test8_1(ptr [[P]])
 ; CHECK-NEXT:    store i32 10, ptr [[CALL]], align 4
 ; CHECK-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll b/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll
index e1a2f4162bb74..e10c84bd6ac83 100644
--- a/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll
+++ b/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll
@@ -8,10 +8,10 @@ declare void @decl_unknown()
 define void @test_fn_mustprogress(ptr %ptr) mustprogress {
 ; CHECK: Function Attrs: mustprogress
 ; CHECK-LABEL: @test_fn_mustprogress(
-; CHECK:     call void @decl_readonly() #
-; CHECK:     call void @decl_readnone() #
-; CHECK:     call void @decl_unknown() #
-; CHECK:     call void @decl_argmemonly(ptr [[PTR:%.*]]) #
+; CHECK-NOT:     call void @decl_readonly() #
+; CHECK-NOT:     call void @decl_readnone() #
+; CHECK-NOT:     call void @decl_unknown() #
+; CHECK-NOT:     call void @decl_argmemonly(ptr [[PTR:%.*]]) #
 ; CHECK:         ret void
 ;
   call void @decl_readonly()
@@ -24,10 +24,10 @@ define void @test_fn_mustprogress(ptr %ptr) mustprogress {
 define void @test_fn_willreturn(ptr %ptr) willreturn {
 ; CHECK: Function Attrs: mustprogress willreturn
 ; CHECK-LABEL: @test_fn_willreturn(
-; CHECK:     call void @decl_readonly() #
-; CHECK:    call void @decl_readnone() #
-; CHECK:     call void @decl_unknown() #
-; CHECK:     call void @decl_argmemonly(ptr [[PTR:%.*]]) #
+; CHECK-NOT:     call void @decl_readonly() #
+; CHECK-NOT:    call void @decl_readnone() #
+; CHECK-NOT:     call void @decl_unknown() #
+; CHECK-NOT:     call void @decl_argmemonly(ptr [[PTR:%.*]]) #
 ; CHECK:         ret void
 ;
   call void @decl_readonly()
@@ -40,8 +40,8 @@ define void @test_fn_willreturn(ptr %ptr) willreturn {
 define void @test_fn_mustprogress_readonly_calls(ptr %ptr) mustprogress {
 ; CHECK: Function Attrs: mustprogress nofree willreturn memory(read)
 ; CHECK-LABEL: @test_fn_mustprogress_readonly_calls(
-; CHECK:     call void @decl_readonly() #
-; CHECK:     call void @decl_readnone() #
+; CHECK-NOT:     call void @decl_readonly() #
+; CHECK-NOT:     call void @decl_readnone() #
 ; CHECK:         ret void
 ;
   call void @decl_readonly()
@@ -52,8 +52,8 @@ define void @test_fn_mustprogress_readonly_calls(ptr %ptr) mustprogress {
 define void @test_fn_mustprogress_readonly_calls_but_stores(ptr %ptr) mustprogress {
 ; CHECK: Function Attrs: mustprogress nofree
 ; CHECK-LABEL: @test_fn_mustprogress_readonly_calls_but_stores(
-; CHECK:     call void @decl_readonly() #
-; CHECK:     call void @decl_readnone() #
+; CHECK-NOT:     call void @decl_readonly() #
+; CHECK-NOT:     call void @decl_readnone() #
 ; CHECK:         store i32 0, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll b/llvm/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll
index c3f9bd08374ad..13d94f8cf6a25 100644
--- a/llvm/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll
+++ b/llvm/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll
@@ -96,8 +96,8 @@ define i32 @maxB(i32 %x, i32 %y) !dbg !34 {
 
 ; OPTIMIZATION_LEVEL_2: define i32 @maxB(i32 %x, i32 %y)
 ; OPTIMIZATION_LEVEL_2-NEXT: entry:
-; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 %x, metadata !{{[0-9]+}}, metadata !DIExpression()) #{{[0-9]+}}, !dbg !{{[0-9]+}}
-; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 %y, metadata !{{[0-9]+}}, metadata !DIExpression()) #{{[0-9]+}}, !dbg !{{[0-9]+}}
+; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 %x, metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
+; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 %y, metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
 ; OPTIMIZATION_LEVEL_2-NEXT: %0 = tail call i32 @maxA(i32 %x, i32 %y) #{{[0-9]+}}, !dbg !{{[0-9]+}}
 ; OPTIMIZATION_LEVEL_2-NEXT: ret i32 %0, !dbg !{{[0-9]+}}
 ; OPTIMIZATION_LEVEL_2-NEXT: }

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/loop-idiom-vs-indvars.ll b/llvm/test/Transforms/PhaseOrdering/X86/loop-idiom-vs-indvars.ll
index 10be1f51277a6..4b5f3107466c8 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/loop-idiom-vs-indvars.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/loop-idiom-vs-indvars.ll
@@ -12,7 +12,7 @@ define i32 @cttz(i32 %n, ptr %p1) {
 ; ALL-LABEL: @cttz(
 ; ALL-NEXT:  entry:
 ; ALL-NEXT:    [[TMP0:%.*]] = shl i32 [[N:%.*]], 1
-; ALL-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false) #[[ATTR2:[0-9]+]], !range [[RNG0:![0-9]+]]
+; ALL-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false), !range [[RNG0:![0-9]+]]
 ; ALL-NEXT:    [[TMP2:%.*]] = sub nuw nsw i32 32, [[TMP1]]
 ; ALL-NEXT:    [[TMP3:%.*]] = sub nuw nsw i32 75, [[TMP1]]
 ; ALL-NEXT:    store i32 [[TMP3]], ptr [[P1:%.*]], align 4

diff  --git a/llvm/test/Transforms/PhaseOrdering/memset-tail.ll b/llvm/test/Transforms/PhaseOrdering/memset-tail.ll
index b1b74059db7ff..489da5a1ed327 100644
--- a/llvm/test/Transforms/PhaseOrdering/memset-tail.ll
+++ b/llvm/test/Transforms/PhaseOrdering/memset-tail.ll
@@ -8,7 +8,7 @@ define void @PR47852(ptr noundef %d, i32 noundef %c) {
 ; CHECK-NEXT:    br i1 [[CMP_NOT1]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
 ; CHECK:       while.body.preheader:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[C]] to i64
-; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef align 1 [[D:%.*]], i8 0, i64 [[TMP0]], i1 false)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 1 [[D:%.*]], i8 0, i64 [[TMP0]], i1 false)
 ; CHECK-NEXT:    br label [[WHILE_END]]
 ; CHECK:       while.end:
 ; CHECK-NEXT:    ret void