[llvm] Add IntrWillReturn to intrinsics (PR #101562)

Kevin McAfee via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 8 10:57:47 PDT 2024


https://github.com/kalxr updated https://github.com/llvm/llvm-project/pull/101562

>From ef47e3d558c23fea6597a17be19bd8a6351c9965 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Thu, 1 Aug 2024 13:59:44 -0700
Subject: [PATCH 1/6] Add IntrWillReturn to intrinsics

---
 llvm/include/llvm/IR/IntrinsicsBPF.td         |   6 +-
 llvm/include/llvm/IR/IntrinsicsLoongArch.td   |  24 ++--
 llvm/include/llvm/IR/IntrinsicsMips.td        |  26 ++--
 llvm/include/llvm/IR/IntrinsicsNVVM.td        |   2 +-
 llvm/include/llvm/IR/IntrinsicsSystemZ.td     |   6 +-
 llvm/include/llvm/IR/IntrinsicsVE.td          |   4 +-
 llvm/include/llvm/IR/IntrinsicsVEVL.gen.td    | 128 +++++++++---------
 llvm/include/llvm/IR/IntrinsicsWebAssembly.td |   2 +-
 llvm/test/CodeGen/BPF/sockex2.ll              |   2 +-
 9 files changed, 100 insertions(+), 100 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td
index c7ec0916f1d1f..725dadeb752d8 100644
--- a/llvm/include/llvm/IR/IntrinsicsBPF.td
+++ b/llvm/include/llvm/IR/IntrinsicsBPF.td
@@ -13,11 +13,11 @@
 // Specialized loads from packet
 let TargetPrefix = "bpf" in {  // All intrinsics start with "llvm.bpf."
   def int_bpf_load_byte : ClangBuiltin<"__builtin_bpf_load_byte">,
-              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrWillReturn, IntrReadMem]>;
   def int_bpf_load_half : ClangBuiltin<"__builtin_bpf_load_half">,
-              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrWillReturn, IntrReadMem]>;
   def int_bpf_load_word : ClangBuiltin<"__builtin_bpf_load_word">,
-              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrWillReturn, IntrReadMem]>;
   def int_bpf_pseudo : ClangBuiltin<"__builtin_bpf_pseudo">,
               Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>;
   def int_bpf_preserve_field_info : ClangBuiltin<"__builtin_bpf_preserve_field_info">,
diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
index 9002076e7aece..e1661a860c70c 100644
--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td
+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
@@ -621,22 +621,22 @@ foreach inst = ["vfcmp_caf_d", "vfcmp_cun_d", "vfcmp_ceq_d", "vfcmp_cueq_d",
 // LSX load/store
 def int_loongarch_lsx_vld
   : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lsx_vldx
   : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i64_ty],
-           [IntrReadMem, IntrArgMemOnly]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_loongarch_lsx_vldrepl_b
   : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lsx_vldrepl_h
   : VecInt<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lsx_vldrepl_w
   : VecInt<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lsx_vldrepl_d
   : VecInt<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 
 def int_loongarch_lsx_vst
   : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty],
@@ -1147,22 +1147,22 @@ def int_loongarch_lasx_xvpickve_d_f
 // LASX load/store
 def int_loongarch_lasx_xvld
   : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lasx_xvldx
   : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i64_ty],
-           [IntrReadMem, IntrArgMemOnly]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_loongarch_lasx_xvldrepl_b
   : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lasx_xvldrepl_h
   : VecInt<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lasx_xvldrepl_w
   : VecInt<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lasx_xvldrepl_d
   : VecInt<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 
 def int_loongarch_lasx_xvst
   : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty],
diff --git a/llvm/include/llvm/IR/IntrinsicsMips.td b/llvm/include/llvm/IR/IntrinsicsMips.td
index 3056f37b9d877..99807412e0a99 100644
--- a/llvm/include/llvm/IR/IntrinsicsMips.td
+++ b/llvm/include/llvm/IR/IntrinsicsMips.td
@@ -236,7 +236,7 @@ def int_mips_extpdp: ClangBuiltin<"__builtin_mips_extpdp">,
 def int_mips_wrdsp: ClangBuiltin<"__builtin_mips_wrdsp">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<1>>]>;
 def int_mips_rddsp: ClangBuiltin<"__builtin_mips_rddsp">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem, ImmArg<ArgIndex<0>>]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrWillReturn, IntrReadMem, ImmArg<ArgIndex<0>>]>;
 
 def int_mips_insv: ClangBuiltin<"__builtin_mips_insv">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
@@ -252,22 +252,22 @@ def int_mips_repl_ph: ClangBuiltin<"__builtin_mips_repl_ph">,
   Intrinsic<[mips_v2q15_ty], [llvm_i32_ty], [IntrNoMem]>;
 
 def int_mips_pick_qb: ClangBuiltin<"__builtin_mips_pick_qb">,
-  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrReadMem]>;
+  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrWillReturn, IntrReadMem]>;
 def int_mips_pick_ph: ClangBuiltin<"__builtin_mips_pick_ph">,
-  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrReadMem]>;
+  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrWillReturn, IntrReadMem]>;
 
 def int_mips_mthlip: ClangBuiltin<"__builtin_mips_mthlip">,
   Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], []>;
 
 def int_mips_bposge32: ClangBuiltin<"__builtin_mips_bposge32">,
-  Intrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
+  Intrinsic<[llvm_i32_ty], [], [IntrWillReturn, IntrReadMem]>;
 
 def int_mips_lbux: ClangBuiltin<"__builtin_mips_lbux">,
-  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
+  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_mips_lhx: ClangBuiltin<"__builtin_mips_lhx">,
-  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
+  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_mips_lwx: ClangBuiltin<"__builtin_mips_lwx">,
-  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
+  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 
 //===----------------------------------------------------------------------===//
 // MIPS DSP Rev 2
@@ -1260,23 +1260,23 @@ def int_mips_insve_d : ClangBuiltin<"__builtin_msa_insve_d">,
 
 def int_mips_ld_b : ClangBuiltin<"__builtin_msa_ld_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_mips_ld_h : ClangBuiltin<"__builtin_msa_ld_h">,
   Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_mips_ld_w : ClangBuiltin<"__builtin_msa_ld_w">,
   Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_mips_ld_d : ClangBuiltin<"__builtin_msa_ld_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 
 def int_mips_ldr_d : ClangBuiltin<"__builtin_msa_ldr_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_mips_ldr_w : ClangBuiltin<"__builtin_msa_ldr_w">,
   Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 
 def int_mips_ldi_b : ClangBuiltin<"__builtin_msa_ldi_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<0>>]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 1e7fdb53059e2..0fb8b770e4d9c 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -4699,7 +4699,7 @@ def int_nvvm_redux_sync_or : ClangBuiltin<"__nvvm_redux_sync_or">,
 class NVVM_WMMA_LD<WMMA_REGS Frag, string Layout, int WithStride>
   : Intrinsic<Frag.regs,
               !if(WithStride, [llvm_anyptr_ty, llvm_i32_ty], [llvm_anyptr_ty]),
-              [IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
+              [IntrWillReturn, IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
               WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr>;
 
 // WMMA.STORE.D
diff --git a/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
index 9f4b905fedc7c..6d510b14dd991 100644
--- a/llvm/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
@@ -231,11 +231,11 @@ let TargetPrefix = "s390" in {
 
   def int_s390_vlbb : ClangBuiltin<"__builtin_s390_vlbb">,
                       Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-                                [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+                                [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 
   def int_s390_vll : ClangBuiltin<"__builtin_s390_vll">,
                      Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty],
-                               [IntrReadMem, IntrArgMemOnly]>;
+                               [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 
   def int_s390_vpdi : ClangBuiltin<"__builtin_s390_vpdi">,
                       Intrinsic<[llvm_v2i64_ty],
@@ -400,7 +400,7 @@ let TargetPrefix = "s390" in {
   // Instructions from the Vector Packed Decimal Facility
   def int_s390_vlrl : ClangBuiltin<"__builtin_s390_vlrlr">,
                       Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty],
-                                [IntrReadMem, IntrArgMemOnly]>;
+                                [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 
   def int_s390_vstrl : ClangBuiltin<"__builtin_s390_vstrlr">,
                        Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty],
diff --git a/llvm/include/llvm/IR/IntrinsicsVE.td b/llvm/include/llvm/IR/IntrinsicsVE.td
index 15b828b320ea2..eb3436b5fd30e 100644
--- a/llvm/include/llvm/IR/IntrinsicsVE.td
+++ b/llvm/include/llvm/IR/IntrinsicsVE.td
@@ -4,10 +4,10 @@
 let TargetPrefix = "ve" in {
   def int_ve_vl_pack_f32p : ClangBuiltin<"__builtin_ve_vl_pack_f32p">,
                             Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
-                                      [IntrReadMem]>;
+                                      [IntrWillReturn, IntrReadMem]>;
   def int_ve_vl_pack_f32a : ClangBuiltin<"__builtin_ve_vl_pack_f32a">,
                             Intrinsic<[llvm_i64_ty], [llvm_ptr_ty],
-                                      [IntrReadMem]>;
+                                      [IntrWillReturn, IntrReadMem]>;
 
   def int_ve_vl_extract_vm512u :
       ClangBuiltin<"__builtin_ve_vl_extract_vm512u">,
diff --git a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
index 554dd85572005..7dca4774e99de 100644
--- a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
+++ b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
@@ -1,35 +1,35 @@
-let TargetPrefix = "ve" in def int_ve_vl_vld_vssl : ClangBuiltin<"__builtin_ve_vl_vld_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld_vssvl : ClangBuiltin<"__builtin_ve_vl_vld_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu_vssl : ClangBuiltin<"__builtin_ve_vl_vldu_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld_vssl : ClangBuiltin<"__builtin_ve_vl_vld_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld_vssvl : ClangBuiltin<"__builtin_ve_vl_vld_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu_vssl : ClangBuiltin<"__builtin_ve_vl_vldu_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vst_vssl : ClangBuiltin<"__builtin_ve_vl_vst_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vst_vssml : ClangBuiltin<"__builtin_ve_vl_vst_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssl : ClangBuiltin<"__builtin_ve_vl_vstnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
@@ -1176,38 +1176,38 @@ let TargetPrefix = "ve" in def int_ve_vl_vror_vvl : ClangBuiltin<"__builtin_ve_v
 let TargetPrefix = "ve" in def int_ve_vl_vror_vvml : ClangBuiltin<"__builtin_ve_vl_vror_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvl : ClangBuiltin<"__builtin_ve_vl_vrxor_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvml : ClangBuiltin<"__builtin_ve_vl_vrxor_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssml : ClangBuiltin<"__builtin_ve_vl_vgt_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssml : ClangBuiltin<"__builtin_ve_vl_vgt_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssl : ClangBuiltin<"__builtin_ve_vl_vsc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssml : ClangBuiltin<"__builtin_ve_vl_vsc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vscnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vscnc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 4d2df1c44ebce..d5020a9a624dc 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -348,7 +348,7 @@ def int_wasm_relaxed_dot_bf16x8_add_f32:
 def int_wasm_loadf16_f32:
   Intrinsic<[llvm_float_ty],
             [llvm_ptr_ty],
-            [IntrReadMem, IntrArgMemOnly],
+            [IntrWillReturn, IntrReadMem, IntrArgMemOnly],
              "", [SDNPMemOperand]>;
 def int_wasm_storef16_f32:
   Intrinsic<[],
diff --git a/llvm/test/CodeGen/BPF/sockex2.ll b/llvm/test/CodeGen/BPF/sockex2.ll
index 4131d9dac31d8..b1264099f64c6 100644
--- a/llvm/test/CodeGen/BPF/sockex2.ll
+++ b/llvm/test/CodeGen/BPF/sockex2.ll
@@ -311,7 +311,7 @@ flow_dissector.exit.thread:                       ; preds = %86, %12, %196, %199
 ; CHECK-LABEL: bpf_prog2:
 ; CHECK: r0 = *(u16 *)skb[12] # encoding: [0x28,0x00,0x00,0x00,0x0c,0x00,0x00,0x00]
 ; CHECK: r0 = *(u16 *)skb[16] # encoding: [0x28,0x00,0x00,0x00,0x10,0x00,0x00,0x00]
-; CHECK: implicit-def: $r8
+; CHECK: implicit-def: $r7
 ; CHECK: r1 =
 ; CHECK: call 1 # encoding: [0x85,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
 ; CHECK: call 2 # encoding: [0x85,0x00,0x00,0x00,0x02,0x00,0x00,0x00]

>From 725139a231c2acbb704fa893f1bb16d421dd0200 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Tue, 6 Aug 2024 12:35:57 -0700
Subject: [PATCH 2/6] Add DCE test for modified intrinsics

---
 .../Transforms/DCE/intrinsics-willreturn.ll   | 940 ++++++++++++++++++
 1 file changed, 940 insertions(+)
 create mode 100644 llvm/test/Transforms/DCE/intrinsics-willreturn.ll

diff --git a/llvm/test/Transforms/DCE/intrinsics-willreturn.ll b/llvm/test/Transforms/DCE/intrinsics-willreturn.ll
new file mode 100644
index 0000000000000..5d66216db79cd
--- /dev/null
+++ b/llvm/test/Transforms/DCE/intrinsics-willreturn.ll
@@ -0,0 +1,940 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s -passes=dce  | FileCheck %s
+
+
+; BPF
+
+declare i64 @llvm.bpf.load.half(ptr, i64)
+declare i64 @llvm.bpf.load.word(ptr, i64)
+declare i64 @llvm.bpf.load.byte(ptr, i64)
+
+define void @test_bpf_load_half(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_bpf_load_half(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.bpf.load.half(ptr %a, i64 %b)
+  ret void
+}
+
+define void @test_bpf_load_word(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_bpf_load_word(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.bpf.load.word(ptr %a, i64 %b)
+  ret void
+}
+
+define void @test_bpf_load_byte(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_bpf_load_byte(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.bpf.load.byte(ptr %a, i64 %b)
+  ret void
+}
+
+; LoongArch
+
+declare <16 x i8> @llvm.loongarch.lsx.vld(ptr, i32)
+declare <16 x i8> @llvm.loongarch.lsx.vldx(ptr, i64)
+declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr, i32)
+declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr, i32)
+declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr, i32)
+declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr, i32)
+
+declare <32 x i8> @llvm.loongarch.lasx.xvld(ptr, i32)
+declare <32 x i8> @llvm.loongarch.lasx.xvldx(ptr, i64)
+declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr, i32)
+declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr, i32)
+declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr, i32)
+declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr, i32)
+
+define void @test_loongarch_lsx_vld(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vld(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.loongarch.lsx.vld(ptr %a, i32 8)
+  ret void
+}
+
+define void @test_loongarch_lsx_vldx(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vldx(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.loongarch.lsx.vldx(ptr %a, i64 %b)
+  ret void
+}
+
+define void @test_loongarch_lsx_vldrepl_b(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_b(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr %a, i32 12)
+  ret void
+}
+
+define void @test_loongarch_lsx_vldrepl_h(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_h(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr %a, i32 12)
+  ret void
+}
+
+define void @test_loongarch_lsx_vldrepl_w(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_w(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lsx_vldrepl_d(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_d(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvld(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvld(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <32 x i8> @llvm.loongarch.lasx.xvld(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvldx(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvldx(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr %a, i64 %b)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvldrepl_b(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_b(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvldrepl_h(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_h(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvldrepl_w(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_w(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvldrepl_d(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_d(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr %a, i32 4)
+  ret void
+}
+
+; MIPS
+
+declare i32 @llvm.mips.rddsp(i32)
+declare <4 x i8> @llvm.mips.pick.qb(<4 x i8>, <4 x i8>)
+declare <2 x i16> @llvm.mips.pick.ph(<2 x i16>, <2 x i16>)
+declare i32 @llvm.mips.bposge32()
+declare i32 @llvm.mips.lbux(ptr, i32)
+declare i32 @llvm.mips.lhx(ptr, i32)
+declare i32 @llvm.mips.lwx(ptr, i32)
+declare <16 x i8> @llvm.mips.ld.b(ptr, i32)
+declare <8 x i16> @llvm.mips.ld.h(ptr, i32)
+declare <4 x i32> @llvm.mips.ld.w(ptr, i32)
+declare <2 x i64> @llvm.mips.ld.d(ptr, i32)
+declare <2 x i64> @llvm.mips.ldr.d(ptr, i32)
+declare <4 x i32> @llvm.mips.ldr.w(ptr, i32)
+
+define void @test_mips_rddsp() {
+; CHECK-LABEL: define void @test_mips_rddsp() {
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.mips.rddsp(i32 4)
+  ret void
+}
+
+define void @test_llvm_mips_pick_qb(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: define void @test_llvm_mips_pick_qb(
+; CHECK-SAME: <4 x i8> [[A:%.*]], <4 x i8> [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <4 x i8> @llvm.mips.pick.qb(<4 x i8> %a, <4 x i8> %b)
+  ret void
+}
+
+define void @test_llvm_mips_pick_ph(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: define void @test_llvm_mips_pick_ph(
+; CHECK-SAME: <2 x i16> [[A:%.*]], <2 x i16> [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <2 x i16> @llvm.mips.pick.ph(<2 x i16> %a, <2 x i16> %b)
+  ret void
+}
+
+define void @test_llvm_mips_bposge32() {
+; CHECK-LABEL: define void @test_llvm_mips_bposge32() {
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.mips.bposge32()
+  ret void
+}
+
+define void @test_llvm_mips_lbux(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_lbux(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.mips.lbux(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_lhx(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_lhx(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.mips.lhx(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_lwx(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_lwx(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.mips.lwx(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ld_b(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ld_b(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.mips.ld.b(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ld_h(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ld_h(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <8 x i16> @llvm.mips.ld.h(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ld_w(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ld_w(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <4 x i32> @llvm.mips.ld.w(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ld_d(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ld_d(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <2 x i64> @llvm.mips.ld.d(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ldr_d(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ldr_d(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <2 x i64> @llvm.mips.ldr.d(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ldr_w(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ldr_w(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <4 x i32> @llvm.mips.ldr.w(ptr %a, i32 %b)
+  ret void
+}
+
+; NVVM
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+; SystemZ
+
+declare <16 x i8> @llvm.s390.vlbb(ptr, i32)
+declare <16 x i8> @llvm.s390.vll(i32, ptr)
+declare <16 x i8> @llvm.s390.vlrl(i32, ptr)
+
+define void @test_llvm_s390.vlbb(ptr %a) {
+; CHECK-LABEL: define void @test_llvm_s390.vlbb(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.s390.vlbb(ptr %a, i32 8)
+  ret void
+}
+
+define void @test_llvm_s390.vll(i32 %a, ptr %b) {
+; CHECK-LABEL: define void @test_llvm_s390.vll(
+; CHECK-SAME: i32 [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.s390.vll(i32 %a, ptr %b)
+  ret void
+}
+
+define void @test_llvm_s390.vlrl(i32 %a, ptr %b) {
+; CHECK-LABEL: define void @test_llvm_s390.vlrl(
+; CHECK-SAME: i32 [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.s390.vlrl(i32 %a, ptr %b)
+  ret void
+}
+
+; VE
+
+declare i64 @llvm.ve.vl.pack.f32p(ptr, ptr)
+declare i64 @llvm.ve.vl.pack.f32a(ptr)
+
+define void @test_llvm_ve_vl_pack_f32p(ptr %a, ptr %b) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_pack_f32p(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.ve.vl.pack.f32p(ptr %a, ptr %b)
+  ret void
+}
+
+define void @test_llvm_ve_vl_pack_f32a(ptr %a) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_pack_f32a(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.ve.vl.pack.f32a(ptr %a)
+  ret void
+}
+
+; WebAssembly
+
+declare float @llvm.wasm.loadf16.f32(ptr)
+
+define void @test_llvm_wasm_loadf16_f32(ptr %a) {
+; CHECK-LABEL: define void @test_llvm_wasm_loadf16_f32(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call float @llvm.wasm.loadf16.f32(ptr %a)
+  ret void
+}
+
+
+

>From 1155f369999bcb96d497efa9e264d46790531ec6 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Tue, 6 Aug 2024 15:46:24 -0700
Subject: [PATCH 3/6] Add missing tests

---
 .../Transforms/DCE/intrinsics-willreturn.ll   | 708 +++++++++++++++++-
 1 file changed, 707 insertions(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/DCE/intrinsics-willreturn.ll b/llvm/test/Transforms/DCE/intrinsics-willreturn.ll
index 5d66216db79cd..0c3e15222cb82 100644
--- a/llvm/test/Transforms/DCE/intrinsics-willreturn.ll
+++ b/llvm/test/Transforms/DCE/intrinsics-willreturn.ll
@@ -903,7 +903,6 @@ define void @test_llvm_s390.vlrl(i32 %a, ptr %b) {
 ; VE
 
 declare i64 @llvm.ve.vl.pack.f32p(ptr, ptr)
-declare i64 @llvm.ve.vl.pack.f32a(ptr)
 
 define void @test_llvm_ve_vl_pack_f32p(ptr %a, ptr %b) {
 ; CHECK-LABEL: define void @test_llvm_ve_vl_pack_f32p(
@@ -914,6 +913,9 @@ define void @test_llvm_ve_vl_pack_f32p(ptr %a, ptr %b) {
   ret void
 }
 
+
+declare i64 @llvm.ve.vl.pack.f32a(ptr)
+
 define void @test_llvm_ve_vl_pack_f32a(ptr %a) {
 ; CHECK-LABEL: define void @test_llvm_ve_vl_pack_f32a(
 ; CHECK-SAME: ptr [[A:%.*]]) {
@@ -923,6 +925,710 @@ define void @test_llvm_ve_vl_pack_f32a(ptr %a) {
   ret void
 }
 
+declare <256 x double> @llvm.ve.vl.vld.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vld_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vld_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vld.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vld.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vld_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vld_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vld.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldnc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldnc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldnc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldnc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldnc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldu.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldu_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldu_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldu.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldu.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldu_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldu_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldu.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldunc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldunc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldunc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldunc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldunc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldunc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldunc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldunc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlsx.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldlsx_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlsx_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlsx.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlsx.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldlsx_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlsx_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlsx.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlsxnc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldlsxnc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlsxnc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlsxnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlsxnc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldlsxnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlsxnc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlsxnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlzx.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldlzx_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlzx_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlzx.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlzx.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldlzx_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlzx_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlzx.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlzxnc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldlzxnc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlzxnc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlzxnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlzxnc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldlzxnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlzxnc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlzxnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vld2d.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vld2d_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vld2d_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vld2d.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vld2d.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vld2d_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vld2d_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vld2d.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vld2dnc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vld2dnc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vld2dnc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vld2dnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vld2dnc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vld2dnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vld2dnc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vld2dnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldu2d.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldu2d_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldu2d_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldu2d.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldu2d.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldu2d_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldu2d_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldu2d.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldu2dnc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldu2dnc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldu2dnc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldu2dnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldu2dnc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldu2dnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldu2dnc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldu2dnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dsx.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldl2dsx_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dsx_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dsx.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dsx.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldl2dsx_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dsx_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dsx.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldl2dsxnc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dsxnc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dsxnc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldl2dsxnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dsxnc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dsxnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dzx.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldl2dzx_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dzx_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dzx.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldl2dzx_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dzx_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dzx.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dzxnc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldl2dzxnc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dzxnc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dzxnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dzxnc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldl2dzxnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dzxnc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dzxnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgt.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgt_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgt_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgt.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgt.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgt_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgt_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgt.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgt.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgt_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgt_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgt.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgt.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgt_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgt_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgt.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtnc.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgtnc_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtnc_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtnc.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtnc.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtnc_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtnc_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtnc.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtnc.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgtnc_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtnc_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtnc.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtnc.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtnc_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtnc_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtnc.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtu.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgtu_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtu_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtu.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtu.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtu_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtu_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtu.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtu.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgtu_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtu_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtu.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtu.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtu_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtu_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtu.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtunc.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgtunc_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtunc_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtunc.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtunc.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtunc_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtunc_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtunc.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtunc.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgtunc_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtunc_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtunc.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtunc.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtunc_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtunc_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtunc.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsx.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgtlsx_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsx_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsx.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsx.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlsx_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsx_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsx.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsx.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgtlsx_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsx_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsx.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsx.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlsx_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsx_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsx.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsxnc.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgtlsxnc_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsxnc_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsxnc.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsxnc.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlsxnc_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsxnc_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsxnc.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsxnc.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgtlsxnc_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsxnc_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsxnc.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsxnc.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlsxnc_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsxnc_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsxnc.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzx.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgtlzx_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzx_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzx.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzx.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlzx_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzx_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzx.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzx.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgtlzx_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzx_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzx.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzx.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlzx_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzx_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzx.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzxnc.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgtlzxnc_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzxnc_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzxnc.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzxnc.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlzxnc_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzxnc_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzxnc.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzxnc.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgtlzxnc_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzxnc_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzxnc.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzxnc.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlzxnc_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzxnc_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzxnc.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
+
 ; WebAssembly
 
 declare float @llvm.wasm.loadf16.f32(ptr)

>From 663a5189a755d2ece1607469f98e8dcaeb892902 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Wed, 7 Aug 2024 09:34:54 -0700
Subject: [PATCH 4/6] Use DefaultAttrsIntrinsic instead for most intrinsics

---
 llvm/include/llvm/IR/IntrinsicsBPF.td         |   6 +-
 llvm/include/llvm/IR/IntrinsicsMips.td        |  38 +++---
 llvm/include/llvm/IR/IntrinsicsSystemZ.td     |  12 +-
 llvm/include/llvm/IR/IntrinsicsVE.td          |   8 +-
 llvm/include/llvm/IR/IntrinsicsVEVL.gen.td    | 128 +++++++++---------
 llvm/include/llvm/IR/IntrinsicsWebAssembly.td |   4 +-
 6 files changed, 98 insertions(+), 98 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td
index 725dadeb752d8..d02eaa6d0dff6 100644
--- a/llvm/include/llvm/IR/IntrinsicsBPF.td
+++ b/llvm/include/llvm/IR/IntrinsicsBPF.td
@@ -13,11 +13,11 @@
 // Specialized loads from packet
 let TargetPrefix = "bpf" in {  // All intrinsics start with "llvm.bpf."
   def int_bpf_load_byte : ClangBuiltin<"__builtin_bpf_load_byte">,
-              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrWillReturn, IntrReadMem]>;
+              DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
   def int_bpf_load_half : ClangBuiltin<"__builtin_bpf_load_half">,
-              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrWillReturn, IntrReadMem]>;
+              DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
   def int_bpf_load_word : ClangBuiltin<"__builtin_bpf_load_word">,
-              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrWillReturn, IntrReadMem]>;
+              DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
   def int_bpf_pseudo : ClangBuiltin<"__builtin_bpf_pseudo">,
               Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>;
   def int_bpf_preserve_field_info : ClangBuiltin<"__builtin_bpf_preserve_field_info">,
diff --git a/llvm/include/llvm/IR/IntrinsicsMips.td b/llvm/include/llvm/IR/IntrinsicsMips.td
index 99807412e0a99..8d12786d52e12 100644
--- a/llvm/include/llvm/IR/IntrinsicsMips.td
+++ b/llvm/include/llvm/IR/IntrinsicsMips.td
@@ -236,7 +236,7 @@ def int_mips_extpdp: ClangBuiltin<"__builtin_mips_extpdp">,
 def int_mips_wrdsp: ClangBuiltin<"__builtin_mips_wrdsp">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<1>>]>;
 def int_mips_rddsp: ClangBuiltin<"__builtin_mips_rddsp">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrWillReturn, IntrReadMem, ImmArg<ArgIndex<0>>]>;
+  DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem, ImmArg<ArgIndex<0>>]>;
 
 def int_mips_insv: ClangBuiltin<"__builtin_mips_insv">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
@@ -252,22 +252,22 @@ def int_mips_repl_ph: ClangBuiltin<"__builtin_mips_repl_ph">,
   Intrinsic<[mips_v2q15_ty], [llvm_i32_ty], [IntrNoMem]>;
 
 def int_mips_pick_qb: ClangBuiltin<"__builtin_mips_pick_qb">,
-  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrWillReturn, IntrReadMem]>;
+  DefaultAttrsIntrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrReadMem]>;
 def int_mips_pick_ph: ClangBuiltin<"__builtin_mips_pick_ph">,
-  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrWillReturn, IntrReadMem]>;
+  DefaultAttrsIntrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrReadMem]>;
 
 def int_mips_mthlip: ClangBuiltin<"__builtin_mips_mthlip">,
   Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], []>;
 
 def int_mips_bposge32: ClangBuiltin<"__builtin_mips_bposge32">,
-  Intrinsic<[llvm_i32_ty], [], [IntrWillReturn, IntrReadMem]>;
+  DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
 
 def int_mips_lbux: ClangBuiltin<"__builtin_mips_lbux">,
-  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
+  DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
 def int_mips_lhx: ClangBuiltin<"__builtin_mips_lhx">,
-  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
+  DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
 def int_mips_lwx: ClangBuiltin<"__builtin_mips_lwx">,
-  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
+  DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
 
 //===----------------------------------------------------------------------===//
 // MIPS DSP Rev 2
@@ -1259,24 +1259,24 @@ def int_mips_insve_d : ClangBuiltin<"__builtin_msa_insve_d">,
             [IntrNoMem, ImmArg<ArgIndex<1>>]>;
 
 def int_mips_ld_b : ClangBuiltin<"__builtin_msa_ld_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
+  DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
 def int_mips_ld_h : ClangBuiltin<"__builtin_msa_ld_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
+  DefaultAttrsIntrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
 def int_mips_ld_w : ClangBuiltin<"__builtin_msa_ld_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
+  DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
 def int_mips_ld_d : ClangBuiltin<"__builtin_msa_ld_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
+  DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
 
 def int_mips_ldr_d : ClangBuiltin<"__builtin_msa_ldr_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
+  DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
 def int_mips_ldr_w : ClangBuiltin<"__builtin_msa_ldr_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
+  DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
 
 def int_mips_ldi_b : ClangBuiltin<"__builtin_msa_ldi_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<0>>]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
index 6d510b14dd991..4f925979cf856 100644
--- a/llvm/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
@@ -230,12 +230,12 @@ let TargetPrefix = "s390" in {
                                 [IntrNoMem, ImmArg<ArgIndex<1>>]>;
 
   def int_s390_vlbb : ClangBuiltin<"__builtin_s390_vlbb">,
-                      Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-                                [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+                      DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
+                                [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 
   def int_s390_vll : ClangBuiltin<"__builtin_s390_vll">,
-                     Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty],
-                               [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
+                     DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty],
+                               [IntrReadMem, IntrArgMemOnly]>;
 
   def int_s390_vpdi : ClangBuiltin<"__builtin_s390_vpdi">,
                       Intrinsic<[llvm_v2i64_ty],
@@ -399,8 +399,8 @@ let TargetPrefix = "s390" in {
 
   // Instructions from the Vector Packed Decimal Facility
   def int_s390_vlrl : ClangBuiltin<"__builtin_s390_vlrlr">,
-                      Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty],
-                                [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
+                      DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty],
+                                [IntrReadMem, IntrArgMemOnly]>;
 
   def int_s390_vstrl : ClangBuiltin<"__builtin_s390_vstrlr">,
                        Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty],
diff --git a/llvm/include/llvm/IR/IntrinsicsVE.td b/llvm/include/llvm/IR/IntrinsicsVE.td
index eb3436b5fd30e..633bc6603bf23 100644
--- a/llvm/include/llvm/IR/IntrinsicsVE.td
+++ b/llvm/include/llvm/IR/IntrinsicsVE.td
@@ -3,11 +3,11 @@
 // VEL Intrinsic instructions.
 let TargetPrefix = "ve" in {
   def int_ve_vl_pack_f32p : ClangBuiltin<"__builtin_ve_vl_pack_f32p">,
-                            Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
-                                      [IntrWillReturn, IntrReadMem]>;
+                            DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
+                                      [IntrReadMem]>;
   def int_ve_vl_pack_f32a : ClangBuiltin<"__builtin_ve_vl_pack_f32a">,
-                            Intrinsic<[llvm_i64_ty], [llvm_ptr_ty],
-                                      [IntrWillReturn, IntrReadMem]>;
+                            DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_ptr_ty],
+                                      [IntrReadMem]>;
 
   def int_ve_vl_extract_vm512u :
       ClangBuiltin<"__builtin_ve_vl_extract_vm512u">,
diff --git a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
index 7dca4774e99de..0c44dfb907654 100644
--- a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
+++ b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
@@ -1,35 +1,35 @@
-let TargetPrefix = "ve" in def int_ve_vl_vld_vssl : ClangBuiltin<"__builtin_ve_vl_vld_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld_vssvl : ClangBuiltin<"__builtin_ve_vl_vld_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu_vssl : ClangBuiltin<"__builtin_ve_vl_vldu_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld_vssl : ClangBuiltin<"__builtin_ve_vl_vld_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld_vssvl : ClangBuiltin<"__builtin_ve_vl_vld_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu_vssl : ClangBuiltin<"__builtin_ve_vl_vldu_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vst_vssl : ClangBuiltin<"__builtin_ve_vl_vst_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vst_vssml : ClangBuiltin<"__builtin_ve_vl_vst_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssl : ClangBuiltin<"__builtin_ve_vl_vstnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
@@ -1176,38 +1176,38 @@ let TargetPrefix = "ve" in def int_ve_vl_vror_vvl : ClangBuiltin<"__builtin_ve_v
 let TargetPrefix = "ve" in def int_ve_vl_vror_vvml : ClangBuiltin<"__builtin_ve_vl_vror_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvl : ClangBuiltin<"__builtin_ve_vl_vrxor_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvml : ClangBuiltin<"__builtin_ve_vl_vrxor_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssml : ClangBuiltin<"__builtin_ve_vl_vgt_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssml : ClangBuiltin<"__builtin_ve_vl_vgt_vvssml">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssmvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssml">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssmvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssml">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssmvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssml">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssmvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssml">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssmvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssml">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssmvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssml">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssmvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssml">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssmvl">, DefaultAttrsIntrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssl : ClangBuiltin<"__builtin_ve_vl_vsc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssml : ClangBuiltin<"__builtin_ve_vl_vsc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vscnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vscnc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index d5020a9a624dc..d03e532964f28 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -346,9 +346,9 @@ def int_wasm_relaxed_dot_bf16x8_add_f32:
 // TODO: Replace these intrinsic with normal ISel patterns once the XXX
 // instructions are merged to the proposal.
 def int_wasm_loadf16_f32:
-  Intrinsic<[llvm_float_ty],
+  DefaultAttrsIntrinsic<[llvm_float_ty],
             [llvm_ptr_ty],
-            [IntrWillReturn, IntrReadMem, IntrArgMemOnly],
+            [IntrReadMem, IntrArgMemOnly],
              "", [SDNPMemOperand]>;
 def int_wasm_storef16_f32:
   Intrinsic<[],

>From 2546ab16a9e1a357a3f2cccc8da007398c743827 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Wed, 7 Aug 2024 10:25:47 -0700
Subject: [PATCH 5/6] Split tests into per-target files and consolidate
 existing nvvm test

---
 llvm/test/Transforms/DCE/intrinsics-bpf.ll    |   33 +
 .../Transforms/DCE/intrinsics-loongarch.ll    |  124 ++
 llvm/test/Transforms/DCE/intrinsics-mips.ll   |  131 ++
 llvm/test/Transforms/DCE/intrinsics-nvvm.ll   |  759 ++++++++
 .../test/Transforms/DCE/intrinsics-systemz.ll |   33 +
 llvm/test/Transforms/DCE/intrinsics-ve.ll     |  729 ++++++++
 llvm/test/Transforms/DCE/intrinsics-wasm.ll   |   13 +
 .../Transforms/DCE/intrinsics-willreturn.ll   | 1646 -----------------
 .../Transforms/DCE/nvvm-ldu-ldg-willreturn.ll |  187 --
 9 files changed, 1822 insertions(+), 1833 deletions(-)
 create mode 100644 llvm/test/Transforms/DCE/intrinsics-bpf.ll
 create mode 100644 llvm/test/Transforms/DCE/intrinsics-loongarch.ll
 create mode 100644 llvm/test/Transforms/DCE/intrinsics-mips.ll
 create mode 100644 llvm/test/Transforms/DCE/intrinsics-nvvm.ll
 create mode 100644 llvm/test/Transforms/DCE/intrinsics-systemz.ll
 create mode 100644 llvm/test/Transforms/DCE/intrinsics-ve.ll
 create mode 100644 llvm/test/Transforms/DCE/intrinsics-wasm.ll
 delete mode 100644 llvm/test/Transforms/DCE/intrinsics-willreturn.ll
 delete mode 100644 llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll

diff --git a/llvm/test/Transforms/DCE/intrinsics-bpf.ll b/llvm/test/Transforms/DCE/intrinsics-bpf.ll
new file mode 100644
index 0000000000000..135588ba21cbb
--- /dev/null
+++ b/llvm/test/Transforms/DCE/intrinsics-bpf.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s -passes=dce  | FileCheck %s
+
+declare i64 @llvm.bpf.load.half(ptr, i64)
+declare i64 @llvm.bpf.load.word(ptr, i64)
+declare i64 @llvm.bpf.load.byte(ptr, i64)
+
+define void @test_bpf_load_half(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_bpf_load_half(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.bpf.load.half(ptr %a, i64 %b)
+  ret void
+}
+
+define void @test_bpf_load_word(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_bpf_load_word(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.bpf.load.word(ptr %a, i64 %b)
+  ret void
+}
+
+define void @test_bpf_load_byte(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_bpf_load_byte(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.bpf.load.byte(ptr %a, i64 %b)
+  ret void
+}
diff --git a/llvm/test/Transforms/DCE/intrinsics-loongarch.ll b/llvm/test/Transforms/DCE/intrinsics-loongarch.ll
new file mode 100644
index 0000000000000..665069091a48d
--- /dev/null
+++ b/llvm/test/Transforms/DCE/intrinsics-loongarch.ll
@@ -0,0 +1,124 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s -passes=dce  | FileCheck %s
+
+declare <16 x i8> @llvm.loongarch.lsx.vld(ptr, i32)
+declare <16 x i8> @llvm.loongarch.lsx.vldx(ptr, i64)
+declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr, i32)
+declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr, i32)
+declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr, i32)
+declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr, i32)
+
+declare <32 x i8> @llvm.loongarch.lasx.xvld(ptr, i32)
+declare <32 x i8> @llvm.loongarch.lasx.xvldx(ptr, i64)
+declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr, i32)
+declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr, i32)
+declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr, i32)
+declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr, i32)
+
+define void @test_loongarch_lsx_vld(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vld(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.loongarch.lsx.vld(ptr %a, i32 8)
+  ret void
+}
+
+define void @test_loongarch_lsx_vldx(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vldx(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.loongarch.lsx.vldx(ptr %a, i64 %b)
+  ret void
+}
+
+define void @test_loongarch_lsx_vldrepl_b(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_b(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr %a, i32 12)
+  ret void
+}
+
+define void @test_loongarch_lsx_vldrepl_h(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_h(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr %a, i32 12)
+  ret void
+}
+
+define void @test_loongarch_lsx_vldrepl_w(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_w(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lsx_vldrepl_d(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_d(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvld(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvld(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <32 x i8> @llvm.loongarch.lasx.xvld(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvldx(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvldx(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr %a, i64 %b)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvldrepl_b(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_b(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvldrepl_h(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_h(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvldrepl_w(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_w(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvldrepl_d(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_d(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr %a, i32 4)
+  ret void
+}
diff --git a/llvm/test/Transforms/DCE/intrinsics-mips.ll b/llvm/test/Transforms/DCE/intrinsics-mips.ll
new file mode 100644
index 0000000000000..22d709e21e061
--- /dev/null
+++ b/llvm/test/Transforms/DCE/intrinsics-mips.ll
@@ -0,0 +1,131 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s -passes=dce  | FileCheck %s
+
+declare i32 @llvm.mips.rddsp(i32)
+declare <4 x i8> @llvm.mips.pick.qb(<4 x i8>, <4 x i8>)
+declare <2 x i16> @llvm.mips.pick.ph(<2 x i16>, <2 x i16>)
+declare i32 @llvm.mips.bposge32()
+declare i32 @llvm.mips.lbux(ptr, i32)
+declare i32 @llvm.mips.lhx(ptr, i32)
+declare i32 @llvm.mips.lwx(ptr, i32)
+declare <16 x i8> @llvm.mips.ld.b(ptr, i32)
+declare <8 x i16> @llvm.mips.ld.h(ptr, i32)
+declare <4 x i32> @llvm.mips.ld.w(ptr, i32)
+declare <2 x i64> @llvm.mips.ld.d(ptr, i32)
+declare <2 x i64> @llvm.mips.ldr.d(ptr, i32)
+declare <4 x i32> @llvm.mips.ldr.w(ptr, i32)
+
+define void @test_mips_rddsp() {
+; CHECK-LABEL: define void @test_mips_rddsp() {
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.mips.rddsp(i32 4)
+  ret void
+}
+
+define void @test_llvm_mips_pick_qb(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: define void @test_llvm_mips_pick_qb(
+; CHECK-SAME: <4 x i8> [[A:%.*]], <4 x i8> [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <4 x i8> @llvm.mips.pick.qb(<4 x i8> %a, <4 x i8> %b)
+  ret void
+}
+
+define void @test_llvm_mips_pick_ph(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: define void @test_llvm_mips_pick_ph(
+; CHECK-SAME: <2 x i16> [[A:%.*]], <2 x i16> [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <2 x i16> @llvm.mips.pick.ph(<2 x i16> %a, <2 x i16> %b)
+  ret void
+}
+
+define void @test_llvm_mips_bposge32() {
+; CHECK-LABEL: define void @test_llvm_mips_bposge32() {
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.mips.bposge32()
+  ret void
+}
+
+define void @test_llvm_mips_lbux(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_lbux(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.mips.lbux(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_lhx(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_lhx(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.mips.lhx(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_lwx(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_lwx(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.mips.lwx(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ld_b(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ld_b(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.mips.ld.b(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ld_h(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ld_h(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <8 x i16> @llvm.mips.ld.h(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ld_w(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ld_w(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <4 x i32> @llvm.mips.ld.w(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ld_d(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ld_d(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <2 x i64> @llvm.mips.ld.d(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ldr_d(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ldr_d(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <2 x i64> @llvm.mips.ldr.d(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ldr_w(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ldr_w(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <4 x i32> @llvm.mips.ldr.w(ptr %a, i32 %b)
+  ret void
+}
diff --git a/llvm/test/Transforms/DCE/intrinsics-nvvm.ll b/llvm/test/Transforms/DCE/intrinsics-nvvm.ll
new file mode 100644
index 0000000000000..f0b5c2a5920f7
--- /dev/null
+++ b/llvm/test/Transforms/DCE/intrinsics-nvvm.ll
@@ -0,0 +1,759 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s -passes=dce  | FileCheck %s
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 %align)
+declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
+
+declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 %align)
+declare float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
+
+define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_i8_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_i16_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_i32_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_i64_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_p_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_f32_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_f64_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_f16_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldu_v2f16_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_i8_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_i16_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_i32_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_i64_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_p_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_f32_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
+
+define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_f64_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret void
+}
+
+define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_f16_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret void
+}
+
+define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @test_ldg_v2f16_dead(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret void
+}
diff --git a/llvm/test/Transforms/DCE/intrinsics-systemz.ll b/llvm/test/Transforms/DCE/intrinsics-systemz.ll
new file mode 100644
index 0000000000000..d54d8d02cbb78
--- /dev/null
+++ b/llvm/test/Transforms/DCE/intrinsics-systemz.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s -passes=dce  | FileCheck %s
+
+declare <16 x i8> @llvm.s390.vlbb(ptr, i32)
+declare <16 x i8> @llvm.s390.vll(i32, ptr)
+declare <16 x i8> @llvm.s390.vlrl(i32, ptr)
+
+define void @test_llvm_s390.vlbb(ptr %a) {
+; CHECK-LABEL: define void @test_llvm_s390.vlbb(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.s390.vlbb(ptr %a, i32 8)
+  ret void
+}
+
+define void @test_llvm_s390.vll(i32 %a, ptr %b) {
+; CHECK-LABEL: define void @test_llvm_s390.vll(
+; CHECK-SAME: i32 [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.s390.vll(i32 %a, ptr %b)
+  ret void
+}
+
+define void @test_llvm_s390.vlrl(i32 %a, ptr %b) {
+; CHECK-LABEL: define void @test_llvm_s390.vlrl(
+; CHECK-SAME: i32 [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.s390.vlrl(i32 %a, ptr %b)
+  ret void
+}
diff --git a/llvm/test/Transforms/DCE/intrinsics-ve.ll b/llvm/test/Transforms/DCE/intrinsics-ve.ll
new file mode 100644
index 0000000000000..5c6d207587f8e
--- /dev/null
+++ b/llvm/test/Transforms/DCE/intrinsics-ve.ll
@@ -0,0 +1,729 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s -passes=dce  | FileCheck %s
+
+declare i64 @llvm.ve.vl.pack.f32p(ptr, ptr)
+
+define void @test_llvm_ve_vl_pack_f32p(ptr %a, ptr %b) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_pack_f32p(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.ve.vl.pack.f32p(ptr %a, ptr %b)
+  ret void
+}
+
+
+declare i64 @llvm.ve.vl.pack.f32a(ptr)
+
+define void @test_llvm_ve_vl_pack_f32a(ptr %a) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_pack_f32a(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.ve.vl.pack.f32a(ptr %a)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vld.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vld_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vld_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vld.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vld.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vld_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vld_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vld.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldnc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldnc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldnc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldnc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldnc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldu.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldu_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldu_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldu.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldu.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldu_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldu_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldu.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldunc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldunc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldunc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldunc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldunc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldunc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldunc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldunc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlsx.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldlsx_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlsx_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlsx.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlsx.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldlsx_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlsx_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlsx.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlsxnc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldlsxnc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlsxnc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlsxnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlsxnc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldlsxnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlsxnc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlsxnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlzx.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldlzx_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlzx_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlzx.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlzx.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldlzx_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlzx_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlzx.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlzxnc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldlzxnc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlzxnc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlzxnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldlzxnc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldlzxnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldlzxnc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldlzxnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vld2d.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vld2d_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vld2d_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vld2d.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vld2d.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vld2d_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vld2d_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vld2d.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vld2dnc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vld2dnc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vld2dnc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vld2dnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vld2dnc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vld2dnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vld2dnc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vld2dnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldu2d.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldu2d_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldu2d_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldu2d.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldu2d.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldu2d_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldu2d_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldu2d.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldu2dnc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldu2dnc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldu2dnc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldu2dnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldu2dnc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldu2dnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldu2dnc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldu2dnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dsx.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldl2dsx_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dsx_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dsx.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dsx.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldl2dsx_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dsx_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dsx.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldl2dsxnc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dsxnc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dsxnc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldl2dsxnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dsxnc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dsxnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dzx.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldl2dzx_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dzx_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dzx.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldl2dzx_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dzx_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dzx.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dzxnc.vssl(i64, ptr, i32)
+
+define void @test_llvm_ve_vl_vldl2dzxnc_vssl(i64 %a, ptr %b, i32 %c) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dzxnc_vssl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dzxnc.vssl(i64 %a, ptr %b, i32 %c)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vldl2dzxnc.vssvl(i64, ptr, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vldl2dzxnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dzxnc_vssvl(
+; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vldl2dzxnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgt.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgt_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgt_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgt.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgt.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgt_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgt_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgt.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgt.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgt_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgt_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgt.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgt.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgt_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgt_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgt.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtnc.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgtnc_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtnc_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtnc.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtnc.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtnc_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtnc_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtnc.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtnc.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgtnc_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtnc_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtnc.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtnc.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtnc_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtnc_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtnc.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtu.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgtu_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtu_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtu.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtu.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtu_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtu_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtu.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtu.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgtu_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtu_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtu.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtu.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtu_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtu_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtu.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtunc.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgtunc_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtunc_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtunc.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtunc.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtunc_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtunc_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtunc.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtunc.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgtunc_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtunc_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtunc.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtunc.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtunc_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtunc_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtunc.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsx.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgtlsx_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsx_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsx.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsx.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlsx_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsx_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsx.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsx.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgtlsx_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsx_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsx.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsx.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlsx_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsx_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsx.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsxnc.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgtlsxnc_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsxnc_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsxnc.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsxnc.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlsxnc_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsxnc_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsxnc.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsxnc.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgtlsxnc_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsxnc_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsxnc.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlsxnc.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlsxnc_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsxnc_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlsxnc.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzx.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgtlzx_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzx_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzx.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzx.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlzx_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzx_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzx.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzx.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgtlzx_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzx_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzx.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzx.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlzx_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzx_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzx.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzxnc.vvssl(<256 x double>, i64, i64, i32)
+
+define void @test_llvm_ve_vl_vgtlzxnc_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzxnc_vvssl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzxnc.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzxnc.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlzxnc_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzxnc_vvssvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzxnc.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzxnc.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
+
+define void @test_llvm_ve_vl_vgtlzxnc_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzxnc_vvssml(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzxnc.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
+  ret void
+}
+
+declare <256 x double> @llvm.ve.vl.vgtlzxnc.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
+
+define void @test_llvm_ve_vl_vgtlzxnc_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzxnc_vvssmvl(
+; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <256 x double> @llvm.ve.vl.vgtlzxnc.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
+  ret void
+}
diff --git a/llvm/test/Transforms/DCE/intrinsics-wasm.ll b/llvm/test/Transforms/DCE/intrinsics-wasm.ll
new file mode 100644
index 0000000000000..8d6a60965a46d
--- /dev/null
+++ b/llvm/test/Transforms/DCE/intrinsics-wasm.ll
@@ -0,0 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s -passes=dce  | FileCheck %s
+
+declare float @llvm.wasm.loadf16.f32(ptr)
+
+define void @test_llvm_wasm_loadf16_f32(ptr %a) {
+; CHECK-LABEL: define void @test_llvm_wasm_loadf16_f32(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call float @llvm.wasm.loadf16.f32(ptr %a)
+  ret void
+}
diff --git a/llvm/test/Transforms/DCE/intrinsics-willreturn.ll b/llvm/test/Transforms/DCE/intrinsics-willreturn.ll
deleted file mode 100644
index 0c3e15222cb82..0000000000000
--- a/llvm/test/Transforms/DCE/intrinsics-willreturn.ll
+++ /dev/null
@@ -1,1646 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S < %s -passes=dce  | FileCheck %s
-
-
-; BPF
-
-declare i64 @llvm.bpf.load.half(ptr, i64)
-declare i64 @llvm.bpf.load.word(ptr, i64)
-declare i64 @llvm.bpf.load.byte(ptr, i64)
-
-define void @test_bpf_load_half(ptr %a, i64 %b) {
-; CHECK-LABEL: define void @test_bpf_load_half(
-; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call i64 @llvm.bpf.load.half(ptr %a, i64 %b)
-  ret void
-}
-
-define void @test_bpf_load_word(ptr %a, i64 %b) {
-; CHECK-LABEL: define void @test_bpf_load_word(
-; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call i64 @llvm.bpf.load.word(ptr %a, i64 %b)
-  ret void
-}
-
-define void @test_bpf_load_byte(ptr %a, i64 %b) {
-; CHECK-LABEL: define void @test_bpf_load_byte(
-; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call i64 @llvm.bpf.load.byte(ptr %a, i64 %b)
-  ret void
-}
-
-; LoongArch
-
-declare <16 x i8> @llvm.loongarch.lsx.vld(ptr, i32)
-declare <16 x i8> @llvm.loongarch.lsx.vldx(ptr, i64)
-declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr, i32)
-declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr, i32)
-declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr, i32)
-declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr, i32)
-
-declare <32 x i8> @llvm.loongarch.lasx.xvld(ptr, i32)
-declare <32 x i8> @llvm.loongarch.lasx.xvldx(ptr, i64)
-declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr, i32)
-declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr, i32)
-declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr, i32)
-declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr, i32)
-
-define void @test_loongarch_lsx_vld(ptr %a) {
-; CHECK-LABEL: define void @test_loongarch_lsx_vld(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <16 x i8> @llvm.loongarch.lsx.vld(ptr %a, i32 8)
-  ret void
-}
-
-define void @test_loongarch_lsx_vldx(ptr %a, i64 %b) {
-; CHECK-LABEL: define void @test_loongarch_lsx_vldx(
-; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <16 x i8> @llvm.loongarch.lsx.vldx(ptr %a, i64 %b)
-  ret void
-}
-
-define void @test_loongarch_lsx_vldrepl_b(ptr %a) {
-; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_b(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr %a, i32 12)
-  ret void
-}
-
-define void @test_loongarch_lsx_vldrepl_h(ptr %a) {
-; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_h(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr %a, i32 12)
-  ret void
-}
-
-define void @test_loongarch_lsx_vldrepl_w(ptr %a) {
-; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_w(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr %a, i32 4)
-  ret void
-}
-
-define void @test_loongarch_lsx_vldrepl_d(ptr %a) {
-; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_d(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr %a, i32 4)
-  ret void
-}
-
-define void @test_loongarch_lasx_xvld(ptr %a) {
-; CHECK-LABEL: define void @test_loongarch_lasx_xvld(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <32 x i8> @llvm.loongarch.lasx.xvld(ptr %a, i32 4)
-  ret void
-}
-
-define void @test_loongarch_lasx_xvldx(ptr %a, i64 %b) {
-; CHECK-LABEL: define void @test_loongarch_lasx_xvldx(
-; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr %a, i64 %b)
-  ret void
-}
-
-define void @test_loongarch_lasx_xvldrepl_b(ptr %a) {
-; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_b(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr %a, i32 4)
-  ret void
-}
-
-define void @test_loongarch_lasx_xvldrepl_h(ptr %a) {
-; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_h(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr %a, i32 4)
-  ret void
-}
-
-define void @test_loongarch_lasx_xvldrepl_w(ptr %a) {
-; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_w(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr %a, i32 4)
-  ret void
-}
-
-define void @test_loongarch_lasx_xvldrepl_d(ptr %a) {
-; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_d(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr %a, i32 4)
-  ret void
-}
-
-; MIPS
-
-declare i32 @llvm.mips.rddsp(i32)
-declare <4 x i8> @llvm.mips.pick.qb(<4 x i8>, <4 x i8>)
-declare <2 x i16> @llvm.mips.pick.ph(<2 x i16>, <2 x i16>)
-declare i32 @llvm.mips.bposge32()
-declare i32 @llvm.mips.lbux(ptr, i32)
-declare i32 @llvm.mips.lhx(ptr, i32)
-declare i32 @llvm.mips.lwx(ptr, i32)
-declare <16 x i8> @llvm.mips.ld.b(ptr, i32)
-declare <8 x i16> @llvm.mips.ld.h(ptr, i32)
-declare <4 x i32> @llvm.mips.ld.w(ptr, i32)
-declare <2 x i64> @llvm.mips.ld.d(ptr, i32)
-declare <2 x i64> @llvm.mips.ldr.d(ptr, i32)
-declare <4 x i32> @llvm.mips.ldr.w(ptr, i32)
-
-define void @test_mips_rddsp() {
-; CHECK-LABEL: define void @test_mips_rddsp() {
-; CHECK-NEXT:    ret void
-;
-  %v = call i32 @llvm.mips.rddsp(i32 4)
-  ret void
-}
-
-define void @test_llvm_mips_pick_qb(<4 x i8> %a, <4 x i8> %b) {
-; CHECK-LABEL: define void @test_llvm_mips_pick_qb(
-; CHECK-SAME: <4 x i8> [[A:%.*]], <4 x i8> [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <4 x i8> @llvm.mips.pick.qb(<4 x i8> %a, <4 x i8> %b)
-  ret void
-}
-
-define void @test_llvm_mips_pick_ph(<2 x i16> %a, <2 x i16> %b) {
-; CHECK-LABEL: define void @test_llvm_mips_pick_ph(
-; CHECK-SAME: <2 x i16> [[A:%.*]], <2 x i16> [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <2 x i16> @llvm.mips.pick.ph(<2 x i16> %a, <2 x i16> %b)
-  ret void
-}
-
-define void @test_llvm_mips_bposge32() {
-; CHECK-LABEL: define void @test_llvm_mips_bposge32() {
-; CHECK-NEXT:    ret void
-;
-  %v = call i32 @llvm.mips.bposge32()
-  ret void
-}
-
-define void @test_llvm_mips_lbux(ptr %a, i32 %b) {
-; CHECK-LABEL: define void @test_llvm_mips_lbux(
-; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call i32 @llvm.mips.lbux(ptr %a, i32 %b)
-  ret void
-}
-
-define void @test_llvm_mips_lhx(ptr %a, i32 %b) {
-; CHECK-LABEL: define void @test_llvm_mips_lhx(
-; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call i32 @llvm.mips.lhx(ptr %a, i32 %b)
-  ret void
-}
-
-define void @test_llvm_mips_lwx(ptr %a, i32 %b) {
-; CHECK-LABEL: define void @test_llvm_mips_lwx(
-; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call i32 @llvm.mips.lwx(ptr %a, i32 %b)
-  ret void
-}
-
-define void @test_llvm_mips_ld_b(ptr %a, i32 %b) {
-; CHECK-LABEL: define void @test_llvm_mips_ld_b(
-; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <16 x i8> @llvm.mips.ld.b(ptr %a, i32 %b)
-  ret void
-}
-
-define void @test_llvm_mips_ld_h(ptr %a, i32 %b) {
-; CHECK-LABEL: define void @test_llvm_mips_ld_h(
-; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <8 x i16> @llvm.mips.ld.h(ptr %a, i32 %b)
-  ret void
-}
-
-define void @test_llvm_mips_ld_w(ptr %a, i32 %b) {
-; CHECK-LABEL: define void @test_llvm_mips_ld_w(
-; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <4 x i32> @llvm.mips.ld.w(ptr %a, i32 %b)
-  ret void
-}
-
-define void @test_llvm_mips_ld_d(ptr %a, i32 %b) {
-; CHECK-LABEL: define void @test_llvm_mips_ld_d(
-; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <2 x i64> @llvm.mips.ld.d(ptr %a, i32 %b)
-  ret void
-}
-
-define void @test_llvm_mips_ldr_d(ptr %a, i32 %b) {
-; CHECK-LABEL: define void @test_llvm_mips_ldr_d(
-; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <2 x i64> @llvm.mips.ldr.d(ptr %a, i32 %b)
-  ret void
-}
-
-define void @test_llvm_mips_ldr_w(ptr %a, i32 %b) {
-; CHECK-LABEL: define void @test_llvm_mips_ldr_w(
-; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <4 x i32> @llvm.mips.ldr.w(ptr %a, i32 %b)
-  ret void
-}
-
-; NVVM
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p0i8(i8 addrspace(0)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p0i8(i8 addrspace(0)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p0i8(i8 addrspace(0)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p3i8(i8 addrspace(3)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p3i8(i8 addrspace(3)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p3i8(i8 addrspace(3)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p1i8(i8 addrspace(1)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p1i8(i8 addrspace(1)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p1i8(i8 addrspace(1)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p0i8(i8 addrspace(0)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p0i8(i8 addrspace(0)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p0i8(i8 addrspace(0)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p3i8(i8 addrspace(3)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p3i8(i8 addrspace(3)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p3i8(i8 addrspace(3)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p1i8(i8 addrspace(1)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p1i8(i8 addrspace(1)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p1i8(i8 addrspace(1)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p0i8(i8 addrspace(0)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p0i8(i8 addrspace(0)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p0i8(i8 addrspace(0)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p3i8(i8 addrspace(3)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p3i8(i8 addrspace(3)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p3i8(i8 addrspace(3)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p1i8(i8 addrspace(1)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p1i8(i8 addrspace(1)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p1i8(i8 addrspace(1)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p0i8(i8 addrspace(0)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p0i8(i8 addrspace(0)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p0i8(i8 addrspace(0)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p3i8(i8 addrspace(3)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p3i8(i8 addrspace(3)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p3i8(i8 addrspace(3)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p1i8(i8 addrspace(1)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p1i8(i8 addrspace(1)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p1i8(i8 addrspace(1)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p0i8(i8 addrspace(0)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p0i8(i8 addrspace(0)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p0i8(i8 addrspace(0)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p3i8(i8 addrspace(3)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p3i8(i8 addrspace(3)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p3i8(i8 addrspace(3)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p1i8(i8 addrspace(1)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p1i8(i8 addrspace(1)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p1i8(i8 addrspace(1)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p0i8(i8 addrspace(0)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p0i8(i8 addrspace(0)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p0i8(i8 addrspace(0)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p3i8(i8 addrspace(3)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p3i8(i8 addrspace(3)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p3i8(i8 addrspace(3)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p1i8(i8 addrspace(1)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p1i8(i8 addrspace(1)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p1i8(i8 addrspace(1)* %src );
-  ret void;
-}
-
-
-declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p0i8(i8 addrspace(0)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p0i8(i8 addrspace(0)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p0i8(i8 addrspace(0)* %src );
-  ret void;
-}
-
-
-declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p0i8(i8 addrspace(0)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p3i8(i8 addrspace(3)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p3i8(i8 addrspace(3)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p3i8(i8 addrspace(3)* %src );
-  ret void;
-}
-
-
-declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p3i8(i8 addrspace(3)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p1i8(i8 addrspace(1)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p1i8(i8 addrspace(1)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p1i8(i8 addrspace(1)* %src );
-  ret void;
-}
-
-
-declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p1i8(i8 addrspace(1)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p0i8(i8 addrspace(0)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p0i8(i8 addrspace(0)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p0i8(i8 addrspace(0)* %src );
-  ret void;
-}
-
-
-declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p0i8(i8 addrspace(0)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p0i8(
-; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p3i8(i8 addrspace(3)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p3i8(i8 addrspace(3)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p3i8(i8 addrspace(3)* %src );
-  ret void;
-}
-
-
-declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p3i8(i8 addrspace(3)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p3i8(
-; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
-  ret void;
-}
-
-
-declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p1i8(i8 addrspace(1)* %src );
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p1i8(i8 addrspace(1)* %src ) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p1i8(i8 addrspace(1)* %src );
-  ret void;
-}
-
-
-declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
-
-define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p1i8(i8 addrspace(1)* %src , i32 %stride) {
-; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p1i8(
-; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
-  ret void;
-}
-
-; SystemZ
-
-declare <16 x i8> @llvm.s390.vlbb(ptr, i32)
-declare <16 x i8> @llvm.s390.vll(i32, ptr)
-declare <16 x i8> @llvm.s390.vlrl(i32, ptr)
-
-define void @test_llvm_s390.vlbb(ptr %a) {
-; CHECK-LABEL: define void @test_llvm_s390.vlbb(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <16 x i8> @llvm.s390.vlbb(ptr %a, i32 8)
-  ret void
-}
-
-define void @test_llvm_s390.vll(i32 %a, ptr %b) {
-; CHECK-LABEL: define void @test_llvm_s390.vll(
-; CHECK-SAME: i32 [[A:%.*]], ptr [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <16 x i8> @llvm.s390.vll(i32 %a, ptr %b)
-  ret void
-}
-
-define void @test_llvm_s390.vlrl(i32 %a, ptr %b) {
-; CHECK-LABEL: define void @test_llvm_s390.vlrl(
-; CHECK-SAME: i32 [[A:%.*]], ptr [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <16 x i8> @llvm.s390.vlrl(i32 %a, ptr %b)
-  ret void
-}
-
-; VE
-
-declare i64 @llvm.ve.vl.pack.f32p(ptr, ptr)
-
-define void @test_llvm_ve_vl_pack_f32p(ptr %a, ptr %b) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_pack_f32p(
-; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call i64 @llvm.ve.vl.pack.f32p(ptr %a, ptr %b)
-  ret void
-}
-
-
-declare i64 @llvm.ve.vl.pack.f32a(ptr)
-
-define void @test_llvm_ve_vl_pack_f32a(ptr %a) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_pack_f32a(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call i64 @llvm.ve.vl.pack.f32a(ptr %a)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vld.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vld_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vld_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vld.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vld.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vld_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vld_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vld.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldnc.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vldnc_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldnc_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldnc.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldnc.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vldnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldnc_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldu.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vldu_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldu_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldu.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldu.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vldu_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldu_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldu.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldunc.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vldunc_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldunc_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldunc.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldunc.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vldunc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldunc_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldunc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldlsx.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vldlsx_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldlsx_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldlsx.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldlsx.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vldlsx_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldlsx_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldlsx.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldlsxnc.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vldlsxnc_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldlsxnc_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldlsxnc.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldlsxnc.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vldlsxnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldlsxnc_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldlsxnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldlzx.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vldlzx_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldlzx_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldlzx.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldlzx.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vldlzx_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldlzx_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldlzx.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldlzxnc.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vldlzxnc_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldlzxnc_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldlzxnc.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldlzxnc.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vldlzxnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldlzxnc_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldlzxnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vld2d.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vld2d_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vld2d_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vld2d.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vld2d.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vld2d_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vld2d_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vld2d.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vld2dnc.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vld2dnc_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vld2dnc_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vld2dnc.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vld2dnc.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vld2dnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vld2dnc_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vld2dnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldu2d.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vldu2d_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldu2d_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldu2d.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldu2d.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vldu2d_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldu2d_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldu2d.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldu2dnc.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vldu2dnc_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldu2dnc_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldu2dnc.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldu2dnc.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vldu2dnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldu2dnc_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldu2dnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldl2dsx.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vldl2dsx_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dsx_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldl2dsx.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldl2dsx.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vldl2dsx_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dsx_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldl2dsx.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vldl2dsxnc_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dsxnc_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldl2dsxnc.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vldl2dsxnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dsxnc_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldl2dsxnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldl2dzx.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vldl2dzx_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dzx_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldl2dsxnc.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldl2dzx.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vldl2dzx_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dzx_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldl2dzx.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldl2dzxnc.vssl(i64, ptr, i32)
-
-define void @test_llvm_ve_vl_vldl2dzxnc_vssl(i64 %a, ptr %b, i32 %c) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dzxnc_vssl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], i32 [[C:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldl2dzxnc.vssl(i64 %a, ptr %b, i32 %c)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vldl2dzxnc.vssvl(i64, ptr, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vldl2dzxnc_vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vldl2dzxnc_vssvl(
-; CHECK-SAME: i64 [[A:%.*]], ptr [[B:%.*]], <256 x double> [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vldl2dzxnc.vssvl(i64 %a, ptr %b, <256 x double> %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgt.vvssl(<256 x double>, i64, i64, i32)
-
-define void @test_llvm_ve_vl_vgt_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgt_vvssl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgt.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgt.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgt_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgt_vvssvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgt.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgt.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
-
-define void @test_llvm_ve_vl_vgt_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgt_vvssml(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgt.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgt.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgt_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgt_vvssmvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgt.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtnc.vvssl(<256 x double>, i64, i64, i32)
-
-define void @test_llvm_ve_vl_vgtnc_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtnc_vvssl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtnc.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtnc.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgtnc_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtnc_vvssvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtnc.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtnc.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
-
-define void @test_llvm_ve_vl_vgtnc_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtnc_vvssml(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtnc.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtnc.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgtnc_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtnc_vvssmvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtnc.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtu.vvssl(<256 x double>, i64, i64, i32)
-
-define void @test_llvm_ve_vl_vgtu_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtu_vvssl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtu.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtu.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgtu_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtu_vvssvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtu.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtu.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
-
-define void @test_llvm_ve_vl_vgtu_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtu_vvssml(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtu.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtu.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgtu_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtu_vvssmvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtu.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtunc.vvssl(<256 x double>, i64, i64, i32)
-
-define void @test_llvm_ve_vl_vgtunc_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtunc_vvssl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtunc.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtunc.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgtunc_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtunc_vvssvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtunc.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtunc.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
-
-define void @test_llvm_ve_vl_vgtunc_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtunc_vvssml(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtunc.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtunc.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgtunc_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtunc_vvssmvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtunc.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlsx.vvssl(<256 x double>, i64, i64, i32)
-
-define void @test_llvm_ve_vl_vgtlsx_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsx_vvssl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlsx.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlsx.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgtlsx_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsx_vvssvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlsx.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlsx.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
-
-define void @test_llvm_ve_vl_vgtlsx_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsx_vvssml(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlsx.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlsx.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgtlsx_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsx_vvssmvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlsx.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlsxnc.vvssl(<256 x double>, i64, i64, i32)
-
-define void @test_llvm_ve_vl_vgtlsxnc_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsxnc_vvssl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlsxnc.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlsxnc.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgtlsxnc_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsxnc_vvssvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlsxnc.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlsxnc.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
-
-define void @test_llvm_ve_vl_vgtlsxnc_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsxnc_vvssml(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlsxnc.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlsxnc.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgtlsxnc_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlsxnc_vvssmvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlsxnc.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlzx.vvssl(<256 x double>, i64, i64, i32)
-
-define void @test_llvm_ve_vl_vgtlzx_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzx_vvssl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlzx.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlzx.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgtlzx_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzx_vvssvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlzx.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlzx.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
-
-define void @test_llvm_ve_vl_vgtlzx_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzx_vvssml(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlzx.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlzx.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgtlzx_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzx_vvssmvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlzx.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlzxnc.vvssl(<256 x double>, i64, i64, i32)
-
-define void @test_llvm_ve_vl_vgtlzxnc_vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzxnc_vvssl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlzxnc.vvssl(<256 x double> %a, i64 %b, i64 %c, i32 %d)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlzxnc.vvssvl(<256 x double>, i64, i64, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgtlzxnc_vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzxnc_vvssvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x double> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlzxnc.vvssvl(<256 x double> %a, i64 %b, i64 %c, <256 x double> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlzxnc.vvssml(<256 x double>, i64, i64, <256 x i1>, i32)
-
-define void @test_llvm_ve_vl_vgtlzxnc_vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzxnc_vvssml(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], i32 [[E:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlzxnc.vvssml(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, i32 %e)
-  ret void
-}
-
-declare <256 x double> @llvm.ve.vl.vgtlzxnc.vvssmvl(<256 x double>, i64, i64, <256 x i1>, <256 x double>, i32)
-
-define void @test_llvm_ve_vl_vgtlzxnc_vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f) {
-; CHECK-LABEL: define void @test_llvm_ve_vl_vgtlzxnc_vvssmvl(
-; CHECK-SAME: <256 x double> [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], <256 x i1> [[D:%.*]], <256 x double> [[E:%.*]], i32 [[F:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call <256 x double> @llvm.ve.vl.vgtlzxnc.vvssmvl(<256 x double> %a, i64 %b, i64 %c, <256 x i1> %d, <256 x double> %e, i32 %f)
-  ret void
-}
-
-; WebAssembly
-
-declare float @llvm.wasm.loadf16.f32(ptr)
-
-define void @test_llvm_wasm_loadf16_f32(ptr %a) {
-; CHECK-LABEL: define void @test_llvm_wasm_loadf16_f32(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %v = call float @llvm.wasm.loadf16.f32(ptr %a)
-  ret void
-}
-
-
-
diff --git a/llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll b/llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll
deleted file mode 100644
index 64a023ef45137..0000000000000
--- a/llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll
+++ /dev/null
@@ -1,187 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S < %s -passes=dce  | FileCheck %s
-
-; ldu/ldg intrinsics were erroneously not marked IntrWillReturn, preventing
-; them from being eliminated at IR level when dead.
-
-declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
-declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
-declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
-declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
-declare ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 %align)
-declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
-declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
-declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
-declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
-
-declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
-declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
-declare i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
-declare i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
-declare ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 %align)
-declare float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
-declare double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
-declare half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
-declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
-
-define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldu_i8_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}
-
-define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldu_i16_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
-  ret void
-}
-
-define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldu_i32_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}
-
-define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldu_i64_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
-  ret void
-}
-
-define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldu_p_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8)
-  ret void
-}
-
-define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldu_f32_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}
-
-define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldu_f64_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
-  ret void
-}
-
-define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldu_f16_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
-  ret void
-}
-
-define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldu_v2f16_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}
-
-define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldg_i8_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}
-
-define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldg_i16_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
-  ret void
-}
-
-define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldg_i32_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}
-
-define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldg_i64_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
-  ret void
-}
-
-define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldg_p_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8)
-  ret void
-}
-
-define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldg_f32_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}
-
-define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldg_f64_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
-  ret void
-}
-
-define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldg_f16_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
-  ret void
-}
-
-define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: define void @test_ldg_v2f16_dead(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
-; CHECK-NEXT:    ret void
-;
-  %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
-  ret void
-}

>From 64b6cf4095b861396cd88910b813ca3252f25a43 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Thu, 8 Aug 2024 10:57:30 -0700
Subject: [PATCH 6/6] [LoongArch] Create and use DefaultAttrsVecInt instead of
 IntrWillReturn

---
 llvm/include/llvm/IR/IntrinsicsLoongArch.td | 53 +++++++++++----------
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
index e1661a860c70c..4621f1689b46e 100644
--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td
+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
@@ -140,6 +140,11 @@ class VecInt<list<LLVMType> ret_types, list<LLVMType> param_types,
     : Intrinsic<ret_types, param_types, intr_properties>,
       ClangBuiltin<!subst("int_loongarch", "__builtin", NAME)>;
 
+class DefaultAttrsVecInt<list<LLVMType> ret_types, list<LLVMType> param_types,
+             list<IntrinsicProperty> intr_properties = []>
+    : DefaultAttrsIntrinsic<ret_types, param_types, intr_properties>,
+      ClangBuiltin<!subst("int_loongarch", "__builtin", NAME)>;
+
 //===----------------------------------------------------------------------===//
 // LSX
 
@@ -620,23 +625,23 @@ foreach inst = ["vfcmp_caf_d", "vfcmp_cun_d", "vfcmp_ceq_d", "vfcmp_cueq_d",
 
 // LSX load/store
 def int_loongarch_lsx_vld
-  : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+  : DefaultAttrsVecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
+           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lsx_vldx
-  : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i64_ty],
-           [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
+  : DefaultAttrsVecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i64_ty],
+           [IntrReadMem, IntrArgMemOnly]>;
 def int_loongarch_lsx_vldrepl_b
-  : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+  : DefaultAttrsVecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
+           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lsx_vldrepl_h
-  : VecInt<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+  : DefaultAttrsVecInt<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty],
+           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lsx_vldrepl_w
-  : VecInt<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+  : DefaultAttrsVecInt<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lsx_vldrepl_d
-  : VecInt<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+  : DefaultAttrsVecInt<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
+           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 
 def int_loongarch_lsx_vst
   : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty],
@@ -1146,23 +1151,23 @@ def int_loongarch_lasx_xvpickve_d_f
 
 // LASX load/store
 def int_loongarch_lasx_xvld
-  : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+  : DefaultAttrsVecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty],
+           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lasx_xvldx
-  : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i64_ty],
-           [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
+  : DefaultAttrsVecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i64_ty],
+           [IntrReadMem, IntrArgMemOnly]>;
 def int_loongarch_lasx_xvldrepl_b
-  : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+  : DefaultAttrsVecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty],
+           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lasx_xvldrepl_h
-  : VecInt<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+  : DefaultAttrsVecInt<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_i32_ty],
+           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lasx_xvldrepl_w
-  : VecInt<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+  : DefaultAttrsVecInt<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lasx_xvldrepl_d
-  : VecInt<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+  : DefaultAttrsVecInt<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty],
+           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 
 def int_loongarch_lasx_xvst
   : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty],



More information about the llvm-commits mailing list