[llvm] Add IntrWillReturn to intrinsics (PR #101562)

Kevin McAfee via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 6 12:36:10 PDT 2024


https://github.com/kalxr updated https://github.com/llvm/llvm-project/pull/101562

>From ef47e3d558c23fea6597a17be19bd8a6351c9965 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Thu, 1 Aug 2024 13:59:44 -0700
Subject: [PATCH 1/2] Add IntrWillReturn to intrinsics

---
 llvm/include/llvm/IR/IntrinsicsBPF.td         |   6 +-
 llvm/include/llvm/IR/IntrinsicsLoongArch.td   |  24 ++--
 llvm/include/llvm/IR/IntrinsicsMips.td        |  26 ++--
 llvm/include/llvm/IR/IntrinsicsNVVM.td        |   2 +-
 llvm/include/llvm/IR/IntrinsicsSystemZ.td     |   6 +-
 llvm/include/llvm/IR/IntrinsicsVE.td          |   4 +-
 llvm/include/llvm/IR/IntrinsicsVEVL.gen.td    | 128 +++++++++---------
 llvm/include/llvm/IR/IntrinsicsWebAssembly.td |   2 +-
 llvm/test/CodeGen/BPF/sockex2.ll              |   2 +-
 9 files changed, 100 insertions(+), 100 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td
index c7ec0916f1d1f..725dadeb752d8 100644
--- a/llvm/include/llvm/IR/IntrinsicsBPF.td
+++ b/llvm/include/llvm/IR/IntrinsicsBPF.td
@@ -13,11 +13,11 @@
 // Specialized loads from packet
 let TargetPrefix = "bpf" in {  // All intrinsics start with "llvm.bpf."
   def int_bpf_load_byte : ClangBuiltin<"__builtin_bpf_load_byte">,
-              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrWillReturn, IntrReadMem]>;
   def int_bpf_load_half : ClangBuiltin<"__builtin_bpf_load_half">,
-              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrWillReturn, IntrReadMem]>;
   def int_bpf_load_word : ClangBuiltin<"__builtin_bpf_load_word">,
-              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrWillReturn, IntrReadMem]>;
   def int_bpf_pseudo : ClangBuiltin<"__builtin_bpf_pseudo">,
               Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>;
   def int_bpf_preserve_field_info : ClangBuiltin<"__builtin_bpf_preserve_field_info">,
diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
index 9002076e7aece..e1661a860c70c 100644
--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td
+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
@@ -621,22 +621,22 @@ foreach inst = ["vfcmp_caf_d", "vfcmp_cun_d", "vfcmp_ceq_d", "vfcmp_cueq_d",
 // LSX load/store
 def int_loongarch_lsx_vld
   : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lsx_vldx
   : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i64_ty],
-           [IntrReadMem, IntrArgMemOnly]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_loongarch_lsx_vldrepl_b
   : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lsx_vldrepl_h
   : VecInt<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lsx_vldrepl_w
   : VecInt<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lsx_vldrepl_d
   : VecInt<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 
 def int_loongarch_lsx_vst
   : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty],
@@ -1147,22 +1147,22 @@ def int_loongarch_lasx_xvpickve_d_f
 // LASX load/store
 def int_loongarch_lasx_xvld
   : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lasx_xvldx
   : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i64_ty],
-           [IntrReadMem, IntrArgMemOnly]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_loongarch_lasx_xvldrepl_b
   : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lasx_xvldrepl_h
   : VecInt<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lasx_xvldrepl_w
   : VecInt<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 def int_loongarch_lasx_xvldrepl_d
   : VecInt<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty],
-           [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+           [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 
 def int_loongarch_lasx_xvst
   : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty],
diff --git a/llvm/include/llvm/IR/IntrinsicsMips.td b/llvm/include/llvm/IR/IntrinsicsMips.td
index 3056f37b9d877..99807412e0a99 100644
--- a/llvm/include/llvm/IR/IntrinsicsMips.td
+++ b/llvm/include/llvm/IR/IntrinsicsMips.td
@@ -236,7 +236,7 @@ def int_mips_extpdp: ClangBuiltin<"__builtin_mips_extpdp">,
 def int_mips_wrdsp: ClangBuiltin<"__builtin_mips_wrdsp">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<1>>]>;
 def int_mips_rddsp: ClangBuiltin<"__builtin_mips_rddsp">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem, ImmArg<ArgIndex<0>>]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrWillReturn, IntrReadMem, ImmArg<ArgIndex<0>>]>;
 
 def int_mips_insv: ClangBuiltin<"__builtin_mips_insv">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
@@ -252,22 +252,22 @@ def int_mips_repl_ph: ClangBuiltin<"__builtin_mips_repl_ph">,
   Intrinsic<[mips_v2q15_ty], [llvm_i32_ty], [IntrNoMem]>;
 
 def int_mips_pick_qb: ClangBuiltin<"__builtin_mips_pick_qb">,
-  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrReadMem]>;
+  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrWillReturn, IntrReadMem]>;
 def int_mips_pick_ph: ClangBuiltin<"__builtin_mips_pick_ph">,
-  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrReadMem]>;
+  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrWillReturn, IntrReadMem]>;
 
 def int_mips_mthlip: ClangBuiltin<"__builtin_mips_mthlip">,
   Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], []>;
 
 def int_mips_bposge32: ClangBuiltin<"__builtin_mips_bposge32">,
-  Intrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
+  Intrinsic<[llvm_i32_ty], [], [IntrWillReturn, IntrReadMem]>;
 
 def int_mips_lbux: ClangBuiltin<"__builtin_mips_lbux">,
-  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
+  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_mips_lhx: ClangBuiltin<"__builtin_mips_lhx">,
-  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
+  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_mips_lwx: ClangBuiltin<"__builtin_mips_lwx">,
-  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
+  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 
 //===----------------------------------------------------------------------===//
 // MIPS DSP Rev 2
@@ -1260,23 +1260,23 @@ def int_mips_insve_d : ClangBuiltin<"__builtin_msa_insve_d">,
 
 def int_mips_ld_b : ClangBuiltin<"__builtin_msa_ld_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_mips_ld_h : ClangBuiltin<"__builtin_msa_ld_h">,
   Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_mips_ld_w : ClangBuiltin<"__builtin_msa_ld_w">,
   Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_mips_ld_d : ClangBuiltin<"__builtin_msa_ld_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 
 def int_mips_ldr_d : ClangBuiltin<"__builtin_msa_ldr_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 def int_mips_ldr_w : ClangBuiltin<"__builtin_msa_ldr_w">,
   Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 
 def int_mips_ldi_b : ClangBuiltin<"__builtin_msa_ldi_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<0>>]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 1e7fdb53059e2..0fb8b770e4d9c 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -4699,7 +4699,7 @@ def int_nvvm_redux_sync_or : ClangBuiltin<"__nvvm_redux_sync_or">,
 class NVVM_WMMA_LD<WMMA_REGS Frag, string Layout, int WithStride>
   : Intrinsic<Frag.regs,
               !if(WithStride, [llvm_anyptr_ty, llvm_i32_ty], [llvm_anyptr_ty]),
-              [IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
+              [IntrWillReturn, IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
               WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr>;
 
 // WMMA.STORE.D
diff --git a/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
index 9f4b905fedc7c..6d510b14dd991 100644
--- a/llvm/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
@@ -231,11 +231,11 @@ let TargetPrefix = "s390" in {
 
   def int_s390_vlbb : ClangBuiltin<"__builtin_s390_vlbb">,
                       Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-                                [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
+                                [IntrWillReturn, IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
 
   def int_s390_vll : ClangBuiltin<"__builtin_s390_vll">,
                      Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty],
-                               [IntrReadMem, IntrArgMemOnly]>;
+                               [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 
   def int_s390_vpdi : ClangBuiltin<"__builtin_s390_vpdi">,
                       Intrinsic<[llvm_v2i64_ty],
@@ -400,7 +400,7 @@ let TargetPrefix = "s390" in {
   // Instructions from the Vector Packed Decimal Facility
   def int_s390_vlrl : ClangBuiltin<"__builtin_s390_vlrlr">,
                       Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty],
-                                [IntrReadMem, IntrArgMemOnly]>;
+                                [IntrWillReturn, IntrReadMem, IntrArgMemOnly]>;
 
   def int_s390_vstrl : ClangBuiltin<"__builtin_s390_vstrlr">,
                        Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty],
diff --git a/llvm/include/llvm/IR/IntrinsicsVE.td b/llvm/include/llvm/IR/IntrinsicsVE.td
index 15b828b320ea2..eb3436b5fd30e 100644
--- a/llvm/include/llvm/IR/IntrinsicsVE.td
+++ b/llvm/include/llvm/IR/IntrinsicsVE.td
@@ -4,10 +4,10 @@
 let TargetPrefix = "ve" in {
   def int_ve_vl_pack_f32p : ClangBuiltin<"__builtin_ve_vl_pack_f32p">,
                             Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
-                                      [IntrReadMem]>;
+                                      [IntrWillReturn, IntrReadMem]>;
   def int_ve_vl_pack_f32a : ClangBuiltin<"__builtin_ve_vl_pack_f32a">,
                             Intrinsic<[llvm_i64_ty], [llvm_ptr_ty],
-                                      [IntrReadMem]>;
+                                      [IntrWillReturn, IntrReadMem]>;
 
   def int_ve_vl_extract_vm512u :
       ClangBuiltin<"__builtin_ve_vl_extract_vm512u">,
diff --git a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
index 554dd85572005..7dca4774e99de 100644
--- a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
+++ b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
@@ -1,35 +1,35 @@
-let TargetPrefix = "ve" in def int_ve_vl_vld_vssl : ClangBuiltin<"__builtin_ve_vl_vld_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld_vssvl : ClangBuiltin<"__builtin_ve_vl_vld_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu_vssl : ClangBuiltin<"__builtin_ve_vl_vldu_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld_vssl : ClangBuiltin<"__builtin_ve_vl_vld_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld_vssvl : ClangBuiltin<"__builtin_ve_vl_vld_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu_vssl : ClangBuiltin<"__builtin_ve_vl_vldu_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vst_vssl : ClangBuiltin<"__builtin_ve_vl_vst_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vst_vssml : ClangBuiltin<"__builtin_ve_vl_vst_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssl : ClangBuiltin<"__builtin_ve_vl_vstnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
@@ -1176,38 +1176,38 @@ let TargetPrefix = "ve" in def int_ve_vl_vror_vvl : ClangBuiltin<"__builtin_ve_v
 let TargetPrefix = "ve" in def int_ve_vl_vror_vvml : ClangBuiltin<"__builtin_ve_vl_vror_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvl : ClangBuiltin<"__builtin_ve_vl_vrxor_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvml : ClangBuiltin<"__builtin_ve_vl_vrxor_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssml : ClangBuiltin<"__builtin_ve_vl_vgt_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssml : ClangBuiltin<"__builtin_ve_vl_vgt_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrWillReturn, IntrReadMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssl : ClangBuiltin<"__builtin_ve_vl_vsc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssml : ClangBuiltin<"__builtin_ve_vl_vsc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vscnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vscnc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 4d2df1c44ebce..d5020a9a624dc 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -348,7 +348,7 @@ def int_wasm_relaxed_dot_bf16x8_add_f32:
 def int_wasm_loadf16_f32:
   Intrinsic<[llvm_float_ty],
             [llvm_ptr_ty],
-            [IntrReadMem, IntrArgMemOnly],
+            [IntrWillReturn, IntrReadMem, IntrArgMemOnly],
              "", [SDNPMemOperand]>;
 def int_wasm_storef16_f32:
   Intrinsic<[],
diff --git a/llvm/test/CodeGen/BPF/sockex2.ll b/llvm/test/CodeGen/BPF/sockex2.ll
index 4131d9dac31d8..b1264099f64c6 100644
--- a/llvm/test/CodeGen/BPF/sockex2.ll
+++ b/llvm/test/CodeGen/BPF/sockex2.ll
@@ -311,7 +311,7 @@ flow_dissector.exit.thread:                       ; preds = %86, %12, %196, %199
 ; CHECK-LABEL: bpf_prog2:
 ; CHECK: r0 = *(u16 *)skb[12] # encoding: [0x28,0x00,0x00,0x00,0x0c,0x00,0x00,0x00]
 ; CHECK: r0 = *(u16 *)skb[16] # encoding: [0x28,0x00,0x00,0x00,0x10,0x00,0x00,0x00]
-; CHECK: implicit-def: $r8
+; CHECK: implicit-def: $r7
 ; CHECK: r1 =
 ; CHECK: call 1 # encoding: [0x85,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
 ; CHECK: call 2 # encoding: [0x85,0x00,0x00,0x00,0x02,0x00,0x00,0x00]

>From 725139a231c2acbb704fa893f1bb16d421dd0200 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Tue, 6 Aug 2024 12:35:57 -0700
Subject: [PATCH 2/2] Add DCE test for modified intrinsics

---
 .../Transforms/DCE/intrinsics-willreturn.ll   | 940 ++++++++++++++++++
 1 file changed, 940 insertions(+)
 create mode 100644 llvm/test/Transforms/DCE/intrinsics-willreturn.ll

diff --git a/llvm/test/Transforms/DCE/intrinsics-willreturn.ll b/llvm/test/Transforms/DCE/intrinsics-willreturn.ll
new file mode 100644
index 0000000000000..5d66216db79cd
--- /dev/null
+++ b/llvm/test/Transforms/DCE/intrinsics-willreturn.ll
@@ -0,0 +1,940 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s -passes=dce  | FileCheck %s
+
+
+; BPF
+
+declare i64 @llvm.bpf.load.half(ptr, i64)
+declare i64 @llvm.bpf.load.word(ptr, i64)
+declare i64 @llvm.bpf.load.byte(ptr, i64)
+
+define void @test_bpf_load_half(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_bpf_load_half(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.bpf.load.half(ptr %a, i64 %b)
+  ret void
+}
+
+define void @test_bpf_load_word(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_bpf_load_word(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.bpf.load.word(ptr %a, i64 %b)
+  ret void
+}
+
+define void @test_bpf_load_byte(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_bpf_load_byte(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.bpf.load.byte(ptr %a, i64 %b)
+  ret void
+}
+
+; LoongArch
+
+declare <16 x i8> @llvm.loongarch.lsx.vld(ptr, i32)
+declare <16 x i8> @llvm.loongarch.lsx.vldx(ptr, i64)
+declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr, i32)
+declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr, i32)
+declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr, i32)
+declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr, i32)
+
+declare <32 x i8> @llvm.loongarch.lasx.xvld(ptr, i32)
+declare <32 x i8> @llvm.loongarch.lasx.xvldx(ptr, i64)
+declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr, i32)
+declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr, i32)
+declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr, i32)
+declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr, i32)
+
+define void @test_loongarch_lsx_vld(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vld(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.loongarch.lsx.vld(ptr %a, i32 8)
+  ret void
+}
+
+define void @test_loongarch_lsx_vldx(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vldx(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.loongarch.lsx.vldx(ptr %a, i64 %b)
+  ret void
+}
+
+define void @test_loongarch_lsx_vldrepl_b(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_b(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr %a, i32 12)
+  ret void
+}
+
+define void @test_loongarch_lsx_vldrepl_h(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_h(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr %a, i32 12)
+  ret void
+}
+
+define void @test_loongarch_lsx_vldrepl_w(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_w(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lsx_vldrepl_d(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lsx_vldrepl_d(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvld(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvld(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <32 x i8> @llvm.loongarch.lasx.xvld(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvldx(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvldx(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr %a, i64 %b)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvldrepl_b(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_b(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvldrepl_h(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_h(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvldrepl_w(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_w(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr %a, i32 4)
+  ret void
+}
+
+define void @test_loongarch_lasx_xvldrepl_d(ptr %a) {
+; CHECK-LABEL: define void @test_loongarch_lasx_xvldrepl_d(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr %a, i32 4)
+  ret void
+}
+
+; MIPS
+
+declare i32 @llvm.mips.rddsp(i32)
+declare <4 x i8> @llvm.mips.pick.qb(<4 x i8>, <4 x i8>)
+declare <2 x i16> @llvm.mips.pick.ph(<2 x i16>, <2 x i16>)
+declare i32 @llvm.mips.bposge32()
+declare i32 @llvm.mips.lbux(ptr, i32)
+declare i32 @llvm.mips.lhx(ptr, i32)
+declare i32 @llvm.mips.lwx(ptr, i32)
+declare <16 x i8> @llvm.mips.ld.b(ptr, i32)
+declare <8 x i16> @llvm.mips.ld.h(ptr, i32)
+declare <4 x i32> @llvm.mips.ld.w(ptr, i32)
+declare <2 x i64> @llvm.mips.ld.d(ptr, i32)
+declare <2 x i64> @llvm.mips.ldr.d(ptr, i32)
+declare <4 x i32> @llvm.mips.ldr.w(ptr, i32)
+
+define void @test_mips_rddsp() {
+; CHECK-LABEL: define void @test_mips_rddsp() {
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.mips.rddsp(i32 4)
+  ret void
+}
+
+define void @test_llvm_mips_pick_qb(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: define void @test_llvm_mips_pick_qb(
+; CHECK-SAME: <4 x i8> [[A:%.*]], <4 x i8> [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <4 x i8> @llvm.mips.pick.qb(<4 x i8> %a, <4 x i8> %b)
+  ret void
+}
+
+define void @test_llvm_mips_pick_ph(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: define void @test_llvm_mips_pick_ph(
+; CHECK-SAME: <2 x i16> [[A:%.*]], <2 x i16> [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <2 x i16> @llvm.mips.pick.ph(<2 x i16> %a, <2 x i16> %b)
+  ret void
+}
+
+define void @test_llvm_mips_bposge32() {
+; CHECK-LABEL: define void @test_llvm_mips_bposge32() {
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.mips.bposge32()
+  ret void
+}
+
+define void @test_llvm_mips_lbux(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_lbux(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.mips.lbux(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_lhx(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_lhx(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.mips.lhx(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_lwx(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_lwx(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i32 @llvm.mips.lwx(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ld_b(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ld_b(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.mips.ld.b(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ld_h(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ld_h(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <8 x i16> @llvm.mips.ld.h(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ld_w(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ld_w(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <4 x i32> @llvm.mips.ld.w(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ld_d(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ld_d(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <2 x i64> @llvm.mips.ld.d(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ldr_d(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ldr_d(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <2 x i64> @llvm.mips.ldr.d(ptr %a, i32 %b)
+  ret void
+}
+
+define void @test_llvm_mips_ldr_w(ptr %a, i32 %b) {
+; CHECK-LABEL: define void @test_llvm_mips_ldr_w(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <4 x i32> @llvm.mips.ldr.w(ptr %a, i32 %b)
+  ret void
+}
+
+; NVVM
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_row_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_a_col_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_row_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_b_col_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.f16.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f16_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {<2 x half>, <2 x half>, <2 x half>, <2 x half>} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_f32_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.f32.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_row_stride_f32_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p0i8(i8 addrspace(0)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p0i8(i8 addrspace(0)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p0i8(i8 addrspace(0)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p0i8(i8 addrspace(0)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p0i8(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p0i8(i8 addrspace(0)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p3i8(i8 addrspace(3)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p3i8(i8 addrspace(3)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p3i8(i8 addrspace(3)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p3i8(i8 addrspace(3)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p3i8(
+; CHECK-SAME: ptr addrspace(3) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p3i8(i8 addrspace(3)* %src , i32 %stride);
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p1i8(i8 addrspace(1)* %src );
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p1i8(i8 addrspace(1)* %src ) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_f32_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.f32.p1i8(i8 addrspace(1)* %src );
+  ret void;
+}
+
+
+declare {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
+
+define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p1i8(i8 addrspace(1)* %src , i32 %stride) {
+; CHECK-LABEL: define void @test_llvm_nvvm_wmma_m16n16k16_load_c_col_stride_f32_p1i8(
+; CHECK-SAME: ptr addrspace(1) [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call {float, float, float, float, float, float, float, float} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p1i8(i8 addrspace(1)* %src , i32 %stride);
+  ret void;
+}
+
+; SystemZ
+
+declare <16 x i8> @llvm.s390.vlbb(ptr, i32)
+declare <16 x i8> @llvm.s390.vll(i32, ptr)
+declare <16 x i8> @llvm.s390.vlrl(i32, ptr)
+
+define void @test_llvm_s390.vlbb(ptr %a) {
+; CHECK-LABEL: define void @test_llvm_s390.vlbb(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.s390.vlbb(ptr %a, i32 8)
+  ret void
+}
+
+define void @test_llvm_s390.vll(i32 %a, ptr %b) {
+; CHECK-LABEL: define void @test_llvm_s390.vll(
+; CHECK-SAME: i32 [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.s390.vll(i32 %a, ptr %b)
+  ret void
+}
+
+define void @test_llvm_s390.vlrl(i32 %a, ptr %b) {
+; CHECK-LABEL: define void @test_llvm_s390.vlrl(
+; CHECK-SAME: i32 [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call <16 x i8> @llvm.s390.vlrl(i32 %a, ptr %b)
+  ret void
+}
+
+; VE
+
+declare i64 @llvm.ve.vl.pack.f32p(ptr, ptr)
+declare i64 @llvm.ve.vl.pack.f32a(ptr)
+
+define void @test_llvm_ve_vl_pack_f32p(ptr %a, ptr %b) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_pack_f32p(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.ve.vl.pack.f32p(ptr %a, ptr %b)
+  ret void
+}
+
+define void @test_llvm_ve_vl_pack_f32a(ptr %a) {
+; CHECK-LABEL: define void @test_llvm_ve_vl_pack_f32a(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.ve.vl.pack.f32a(ptr %a)
+  ret void
+}
+
+; WebAssembly
+
+declare float @llvm.wasm.loadf16.f32(ptr)
+
+define void @test_llvm_wasm_loadf16_f32(ptr %a) {
+; CHECK-LABEL: define void @test_llvm_wasm_loadf16_f32(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call float @llvm.wasm.loadf16.f32(ptr %a)
+  ret void
+}
+
+
+



More information about the llvm-commits mailing list