[clang] [llvm] [Clang][AArch64] Add customisable immediate range checking to NEON (PR #100278)

Wed Aug 28 04:17:31 PDT 2024

https://github.com/SpencerAbson updated https://github.com/llvm/llvm-project/pull/100278

>From 6256f31bd5e64feb66309ef46155f066748b5544 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Tue, 23 Jul 2024 08:38:32 +0000
Subject: [PATCH 01/13] Rebase to resolve arm_neon.td conflict

---
 clang/include/clang/Basic/TargetBuiltins.h    |  39 +-
 clang/include/clang/Basic/arm_fp16.td         |   2 +-
 .../include/clang/Basic/arm_immcheck_incl.td  |  39 ++
 clang/include/clang/Basic/arm_neon.td         | 374 ++++++++++++------
 clang/include/clang/Basic/arm_neon_incl.td    |  11 +-
 clang/include/clang/Basic/arm_sve_sme_incl.td |  36 +-
 clang/include/clang/Sema/SemaARM.h            |   3 +
 clang/lib/Sema/SemaARM.cpp                    | 151 ++++---
 clang/test/CodeGen/aarch64-neon-vcmla.c       |  60 ++-
 clang/test/Sema/aarch64-neon-vcmla-ranges.c   | 202 ++++++++++
 clang/utils/TableGen/NeonEmitter.cpp          | 133 +++----
 clang/utils/TableGen/SveEmitter.cpp           |   2 +-
 12 files changed, 700 insertions(+), 352 deletions(-)
 create mode 100644 clang/include/clang/Basic/arm_immcheck_incl.td
 create mode 100644 clang/test/Sema/aarch64-neon-vcmla-ranges.c

diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 4333830bf34f24..50e17ad7e16284 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -209,15 +209,45 @@ namespace clang {
         Flags |= QuadFlag;
     }
 
-    EltType getEltType() const { return (EltType)(Flags & EltTypeMask); }
+    EltType getEltType() const { return (EltType)(Flags & EltTypeMask); } 
     bool isPoly() const {
       EltType ET = getEltType();
       return ET == Poly8 || ET == Poly16 || ET == Poly64;
     }
     bool isUnsigned() const { return (Flags & UnsignedFlag) != 0; }
-    bool isQuad() const { return (Flags & QuadFlag) != 0; }
+    bool isQuad() const { return (Flags & QuadFlag) != 0; };
+    unsigned getEltSizeInBits() const {
+      switch(getEltType()){
+        case Int8:
+        case Poly8:
+          return 8;
+        case Int16:
+        case Float16:
+        case Poly16:
+        case BFloat16:
+          return 16;
+        case Int32:
+        case Float32:
+          return 32;
+        case Int64:
+        case Float64:
+        case Poly64:
+          return 64;
+        case Poly128:
+          return 128;
+        default:
+          llvm_unreachable("Invalid NeonTypeFlag!");
+      }
+    }
   };
 
+    // Shared between SVE/SME and NEON
+    enum ArmImmCheckType {
+#define LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
+#include "clang/Basic/arm_sve_typeflags.inc"
+#undef  LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
+    };
+
   /// Flags to identify the types for overloaded SVE builtins.
   class SVETypeFlags {
     uint64_t Flags;
@@ -249,11 +279,6 @@ namespace clang {
 #undef LLVM_GET_SVE_MERGETYPES
     };
 
-    enum ImmCheckType {
-#define LLVM_GET_SVE_IMMCHECKTYPES
-#include "clang/Basic/arm_sve_typeflags.inc"
-#undef LLVM_GET_SVE_IMMCHECKTYPES
-    };
 
     SVETypeFlags(uint64_t F) : Flags(F) {
       EltTypeShift = llvm::countr_zero(EltTypeMask);
diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td
index d36b4617bef5d2..42228a3ba1ffad 100644
--- a/clang/include/clang/Basic/arm_fp16.td
+++ b/clang/include/clang/Basic/arm_fp16.td
@@ -76,7 +76,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def SCALAR_FCVTPUH  : SInst<"vcvtp_u16", "(1U)1", "Sh">;
   def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "(1U>)1", "Sh">;
   def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "(1U>>)1", "Sh">;
-  let isVCVT_N = 1 in {
+  let isVCVT_N = 1, ImmChecks = [ImmCheck<1, ImmCheck1_16>] in {
     def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "(1F)(1!)I", "sUs">;
     def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "(1F<)(1!)I", "iUi">;
     def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "(1F<<)(1!)I", "lUl">;
diff --git a/clang/include/clang/Basic/arm_immcheck_incl.td b/clang/include/clang/Basic/arm_immcheck_incl.td
new file mode 100644
index 00000000000000..3b20248f650400
--- /dev/null
+++ b/clang/include/clang/Basic/arm_immcheck_incl.td
@@ -0,0 +1,39 @@
+class ImmCheckType<int val> {
+  int Value = val;
+}
+
+// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
+def ImmCheck0_31                : ImmCheckType<0>;  // 0..31 (used for e.g. predicate patterns)
+def ImmCheck1_16                : ImmCheckType<1>;  // 1..16
+def ImmCheckExtract             : ImmCheckType<2>;  // 0..(2048/sizeinbits(elt) - 1)
+def ImmCheckShiftRight          : ImmCheckType<3>;  // 1..sizeinbits(elt)
+def ImmCheckShiftRightNarrow    : ImmCheckType<4>;  // 1..sizeinbits(elt)/2
+def ImmCheckShiftLeft           : ImmCheckType<5>;  // 0..(sizeinbits(elt) - 1)
+def ImmCheck0_7                 : ImmCheckType<6>;  // 0..7
+def ImmCheckLaneIndex           : ImmCheckType<7>;  // 0..(128/(1*sizeinbits(elt)) - 1)
+def ImmCheckLaneQIndex          : ImmCheckType<8>;  // (Neon) treat type as Quad
+def ImmCheckLaneIndexCompRotate : ImmCheckType<9>;  // 0..(128/(2*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexDot        : ImmCheckType<10>; // 0..(128/(4*sizeinbits(elt)) - 1)
+def ImmCheckComplexRot90_270    : ImmCheckType<11>; // [90,270]
+def ImmCheckComplexRotAll90     : ImmCheckType<12>; // [0, 90, 180,270]
+def ImmCheck0_13                : ImmCheckType<13>; // 0..13
+def ImmCheck0_1                 : ImmCheckType<14>; // 0..1
+def ImmCheck0_2                 : ImmCheckType<15>; // 0..2
+def ImmCheck0_3                 : ImmCheckType<16>; // 0..3
+def ImmCheck0_0                 : ImmCheckType<17>; // 0..0
+def ImmCheck0_15                : ImmCheckType<18>; // 0..15
+def ImmCheck0_255               : ImmCheckType<19>; // 0..255
+def ImmCheck2_4_Mul2            : ImmCheckType<20>; // 2, 4
+def ImmCheck1_1                 : ImmCheckType<21>; // 1..1
+def ImmCheck1_3                 : ImmCheckType<22>; // 1..3
+def ImmCheck1_7                 : ImmCheckType<23>; // 1..7
+def ImmCheck1_32                : ImmCheckType<24>; // 1..32
+def ImmCheck1_64                : ImmCheckType<25>; // 1..64
+def ImmCheck0_63                : ImmCheckType<26>; // 0..63
+
+class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
+  int Arg = arg;
+  // The index of the argument whose type should be referred to when validating this immedaite.
+  int EltSizeArg = eltSizeArg;
+  ImmCheckType Kind = kind;
+}
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 3098fa67e6a512..ee823f6ef68139 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -284,14 +284,18 @@ def OP_CVT_F32_BF16
 
 // Splat operation - performs a range-checked splat over a vector
 def SPLAT  : WInst<"splat_lane", ".(!q)I",
-                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl">;
+                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl", 
+                    [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
-                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl"> {
+                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl", 
+                   [ImmCheck<1, ImmCheckLaneQIndex, 1>]> {
   let isLaneQ = 1;
 }
 let TargetGuard = "bf16,neon" in {
-  def SPLAT_BF  : WInst<"splat_lane", ".(!q)I", "bQb">;
-  def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb"> {
+  def SPLAT_BF  : WInst<"splat_lane", ".(!q)I", "bQb", 
+                      [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+  def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb", 
+                      [ImmCheck<1, ImmCheckLaneQIndex, 1>]> {
     let isLaneQ = 1;
   }
 }
@@ -401,27 +405,45 @@ def VQRSHL : SInst<"vqrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.12 Shifts by constant
 let isShift = 1 in {
-def VSHR_N     : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VSHL_N     : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VRSHR_N    : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VSRA_N     : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VRSRA_N    : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VQSHL_N    : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VQSHLU_N   : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl">;
-def VSHRN_N    : IInst<"vshrn_n", "<QI", "silUsUiUl">;
-def VQSHRUN_N  : SInst<"vqshrun_n", "(<U)QI", "sil">;
-def VQRSHRUN_N : SInst<"vqrshrun_n", "(<U)QI", "sil">;
-def VQSHRN_N   : SInst<"vqshrn_n", "<QI", "silUsUiUl">;
-def VRSHRN_N   : IInst<"vrshrn_n", "<QI", "silUsUiUl">;
-def VQRSHRN_N  : SInst<"vqrshrn_n", "<QI", "silUsUiUl">;
-def VSHLL_N    : SInst<"vshll_n", "(>Q).I", "csiUcUsUi">;
+
+
+def VSHR_N     : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VSHL_N     : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+                      [ImmCheck<1, ImmCheckShiftLeft>]>;
+def VRSHR_N    : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VSRA_N     : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+                      [ImmCheck<2, ImmCheckShiftRight>]>;
+def VRSRA_N    : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+                      [ImmCheck<2, ImmCheckShiftRight>]>;
+def VQSHL_N    : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+                      [ImmCheck<1, ImmCheckShiftLeft>]>;
+def VQSHLU_N   : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl", 
+                      [ImmCheck<1, ImmCheckShiftLeft>]>;
+def VSHRN_N    : IInst<"vshrn_n", "<QI", "silUsUiUl", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQSHRUN_N  : SInst<"vqshrun_n", "(<U)QI", "sil", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQRSHRUN_N : SInst<"vqrshrun_n", "(<U)QI", "sil", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQSHRN_N   : SInst<"vqshrn_n", "<QI", "silUsUiUl", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VRSHRN_N   : IInst<"vrshrn_n", "<QI", "silUsUiUl", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQRSHRN_N  : SInst<"vqrshrn_n", "<QI", "silUsUiUl", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VSHLL_N    : SInst<"vshll_n", "(>Q).I", "csiUcUsUi", 
+                      [ImmCheck<1, ImmCheckShiftLeft>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.13 Shifts with insert
 def VSRI_N : WInst<"vsri_n", "...I",
-                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">;
+                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs", 
+                    [ImmCheck<2, ImmCheckShiftRight>]>;
 def VSLI_N : WInst<"vsli_n", "...I",
-                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">;
+                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs", 
+                   [ImmCheck<2, ImmCheckShiftLeft>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -435,7 +457,8 @@ def VLD1_X3   : WInst<"vld1_x3", "3(c*!)",
 def VLD1_X4   : WInst<"vld1_x4", "4(c*!)",
                       "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
 def VLD1_LANE : WInst<"vld1_lane", ".(c*!).I",
-                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
+                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs", 
+                      [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def VLD1_DUP  : WInst<"vld1_dup", ".(c*!)",
                       "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
 def VST1      : WInst<"vst1", "v*(.!)",
@@ -447,19 +470,23 @@ def VST1_X3   : WInst<"vst1_x3", "v*(3!)",
 def VST1_X4   : WInst<"vst1_x4", "v*(4!)",
                       "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
 def VST1_LANE : WInst<"vst1_lane", "v*(.!)I",
-                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
+                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs", 
+                      [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+
 let ArchGuard = "(__ARM_FP & 2)" in {
 def VLD1_F16      : WInst<"vld1", ".(c*!)", "hQh">;
 def VLD1_X2_F16   : WInst<"vld1_x2", "2(c*!)", "hQh">;
 def VLD1_X3_F16   : WInst<"vld1_x3", "3(c*!)", "hQh">;
 def VLD1_X4_F16   : WInst<"vld1_x4", "4(c*!)", "hQh">;
-def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh">;
+def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh", 
+                          [ImmCheck<2, ImmCheck0_3, 1>]>;
 def VLD1_DUP_F16  : WInst<"vld1_dup", ".(c*!)", "hQh">;
 def VST1_F16      : WInst<"vst1", "v*(.!)", "hQh">;
 def VST1_X2_F16   : WInst<"vst1_x2", "v*(2!)", "hQh">;
 def VST1_X3_F16   : WInst<"vst1_x3", "v*(3!)", "hQh">;
 def VST1_X4_F16   : WInst<"vst1_x4", "v*(4!)", "hQh">;
-def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh">;
+def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh", 
+                          [ImmCheck<2, ImmCheck0_3, 1>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -473,15 +500,21 @@ def VLD3_DUP  : WInst<"vld3_dup", "3(c*!)",
                       "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
 def VLD4_DUP  : WInst<"vld4_dup", "4(c*!)",
                       "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
-def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
+def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+                      [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+                      [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
+def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+                      [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
 def VST2 : WInst<"vst2", "v*(2!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
 def VST3 : WInst<"vst3", "v*(3!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
 def VST4 : WInst<"vst4", "v*(4!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
-def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
+def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+                      [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+                      [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+                      [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 let ArchGuard = "(__ARM_FP & 2)" in {
 def VLD2_F16      : WInst<"vld2", "2(c*!)", "hQh">;
 def VLD3_F16      : WInst<"vld3", "3(c*!)", "hQh">;
@@ -489,28 +522,36 @@ def VLD4_F16      : WInst<"vld4", "4(c*!)", "hQh">;
 def VLD2_DUP_F16  : WInst<"vld2_dup", "2(c*!)", "hQh">;
 def VLD3_DUP_F16  : WInst<"vld3_dup", "3(c*!)", "hQh">;
 def VLD4_DUP_F16  : WInst<"vld4_dup", "4(c*!)", "hQh">;
-def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh">;
-def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh">;
-def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh">;
+def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh", 
+                          [ImmCheck<4, ImmCheck0_3, 1>]>;
+def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh", 
+                          [ImmCheck<5, ImmCheck0_3, 1>]>;
+def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh", 
+                          [ImmCheck<6, ImmCheck0_3, 1>]>;
 def VST2_F16      : WInst<"vst2", "v*(2!)", "hQh">;
 def VST3_F16      : WInst<"vst3", "v*(3!)", "hQh">;
 def VST4_F16      : WInst<"vst4", "v*(4!)", "hQh">;
-def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh">;
-def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh">;
-def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh">;
+def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh", 
+                          [ImmCheck<3, ImmCheck0_3, 1>]>;
+def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh", 
+                         [ImmCheck<4, ImmCheck0_3, 1>]>;
+def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh", 
+                          [ImmCheck<5, ImmCheck0_3, 1>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.16 Extract lanes from a vector
 let InstName = "vmov" in
 def VGET_LANE : IInst<"vget_lane", "1.I",
-                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">;
+                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", 
+                      [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.17 Set lanes within a vector
 let InstName = "vmov" in
 def VSET_LANE : IInst<"vset_lane", ".1.I",
-                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">;
+                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", 
+                      [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.18 Initialize a vector from bit pattern
@@ -560,9 +601,12 @@ def VCVT_S32     : SInst<"vcvt_s32", "S.",  "fQf">;
 def VCVT_U32     : SInst<"vcvt_u32", "U.",  "fQf">;
 def VCVT_F32     : SInst<"vcvt_f32", "F(.!)",  "iUiQiQUi">;
 let isVCVT_N = 1 in {
-def VCVT_N_S32   : SInst<"vcvt_n_s32", "S.I", "fQf">;
-def VCVT_N_U32   : SInst<"vcvt_n_u32", "U.I", "fQf">;
-def VCVT_N_F32   : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi">;
+def VCVT_N_S32   : SInst<"vcvt_n_s32", "S.I", "fQf", 
+                        [ImmCheck<1, ImmCheck1_32>]>;
+def VCVT_N_U32   : SInst<"vcvt_n_u32", "U.I", "fQf", 
+                        [ImmCheck<1, ImmCheck1_32>]>;
+def VCVT_N_F32   : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi", 
+                        [ImmCheck<1, ImmCheck1_32>]>;
 }
 
 def VMOVN        : IInst<"vmovn", "<Q",  "silUsUiUl">;
@@ -610,8 +654,10 @@ def VQDMULH_LANE  : SOpInst<"vqdmulh_lane", "..qI", "siQsQi", OP_QDMULH_LN>;
 def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "..qI", "siQsQi", OP_QRDMULH_LN>;
 }
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
-def A64_VQDMULH_LANE  : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi">;
-def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi">;
+def A64_VQDMULH_LANE  : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi", 
+                              [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
+def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi", 
+                              [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 }
 
 let TargetGuard = "v8.1a,neon" in {
@@ -629,7 +675,8 @@ def VQDMLSL_N     : SOpInst<"vqdmlsl_n", "(>Q)(>Q).1", "si", OP_QDMLSL_N>;
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.26 Vector Extract
 def VEXT : WInst<"vext", "...I",
-                 "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf">;
+                 "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf", 
+                 [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.27 Reverse vector elements
@@ -738,14 +785,22 @@ def ST1_X2 : WInst<"vst1_x2", "v*(2!)", "dQdPlQPl">;
 def ST1_X3 : WInst<"vst1_x3", "v*(3!)", "dQdPlQPl">;
 def ST1_X4 : WInst<"vst1_x4", "v*(4!)", "dQdPlQPl">;
 
-def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl">;
-def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl">;
-def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl">;
+def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl", 
+                    [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+                    [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+                    [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
+def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+                    [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
+def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl", 
+                    [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+                    [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+                    [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+                    [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 
 def LD1_DUP  : WInst<"vld1_dup", ".(c*!)", "dQdPlQPl">;
 def LD2_DUP  : WInst<"vld2_dup", "2(c*!)", "dQdPlQPl">;
@@ -901,8 +956,8 @@ def SHLL_HIGH_N    : SOpInst<"vshll_high_n", ">.I", "HcHsHiHUcHUsHUi",
                              OP_LONG_HI>;
 
 ////////////////////////////////////////////////////////////////////////////////
-def SRI_N : WInst<"vsri_n", "...I", "PlQPl">;
-def SLI_N : WInst<"vsli_n", "...I", "PlQPl">;
+def SRI_N : WInst<"vsri_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftRight, 1>]>;
+def SLI_N : WInst<"vsli_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftLeft, 1>]>;
 
 // Right shift narrow high
 def SHRN_HIGH_N    : IOpInst<"vshrn_high_n", "<(<q).I",
@@ -924,9 +979,12 @@ def QRSHRN_HIGH_N  : SOpInst<"vqrshrn_high_n", "<(<q).I",
 def VMOVL_HIGH   : SOpInst<"vmovl_high", ">.", "HcHsHiHUcHUsHUi", OP_MOVL_HI>;
 
 let isVCVT_N = 1 in {
-def CVTF_N_F64   : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl">;
-def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd">;
-def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd">;
+def CVTF_N_F64   : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl", 
+                        [ImmCheck<1, ImmCheck1_64>]>;
+def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd", 
+                        [ImmCheck<1, ImmCheck1_64>]>;
+def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd", 
+                        [ImmCheck<1, ImmCheck1_64>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -965,8 +1023,10 @@ let TargetGuard = "aes,neon" in {
 
 ////////////////////////////////////////////////////////////////////////////////
 // Extract or insert element from vector
-def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl">;
-def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl">;
+def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl", 
+                      [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl", 
+                      [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def COPY_LANE : IOpInst<"vcopy_lane", "..I.I",
                         "csilUcUsUiUlPcPsPlfd", OP_COPY_LN>;
 def COPYQ_LANE : IOpInst<"vcopy_lane", "..IqI",
@@ -1011,8 +1071,10 @@ def VMLS_LANEQ   : IOpInst<"vmls_laneq", "...QI",
   let isLaneQ = 1;
 }
 
-def VFMA_LANE    : IInst<"vfma_lane", "...qI", "fdQfQd">;
-def VFMA_LANEQ   : IInst<"vfma_laneq", "...QI", "fdQfQd"> {
+def VFMA_LANE    : IInst<"vfma_lane", "...qI", "fdQfQd", 
+                        [ImmCheck<3, ImmCheckLaneIndex, 0>]>;
+def VFMA_LANEQ   : IInst<"vfma_laneq", "...QI", "fdQfQd", 
+                        [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
   let isLaneQ = 1;
 }
 def VFMS_LANE    : IOpInst<"vfms_lane", "...qI", "fdQfQd", OP_FMS_LN>;
@@ -1088,8 +1150,10 @@ def VQDMULL_HIGH_LANEQ  : SOpInst<"vqdmull_high_laneq", "(>Q)QQI", "si",
 }
 
 let isLaneQ = 1 in {
-def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi">;
-def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi">;
+def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi", 
+                          [ImmCheck<2, ImmCheckLaneQIndex, 1>]>;
+def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi", 
+                          [ImmCheck<2, ImmCheckLaneQIndex, 1>]>;
 }
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a,neon" in {
 def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN> {
@@ -1118,7 +1182,8 @@ def FMINNMV : SInst<"vminnmv", "1.", "fQfQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Newly added Vector Extract for f64
-def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl">;
+def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl", 
+                    [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Crypto
@@ -1149,7 +1214,7 @@ def EOR3 : SInst<"veor3", "....", "QUcQUsQUiQUlQcQsQiQl">;
 def RAX1 : SInst<"vrax1", "...", "QUl">;
 
 let isVXAR = 1 in {
-def XAR :  SInst<"vxar", "...I", "QUl">;
+def XAR :  SInst<"vxar", "...I", "QUl", [ImmCheck<2, ImmCheck0_63>]>;
 }
 }
 
@@ -1162,10 +1227,10 @@ def SHA512H2 : SInst<"vsha512h2", "....", "QUl">;
 
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sm4,neon" in {
 def SM3SS1 : SInst<"vsm3ss1", "....", "QUi">;
-def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi">;
-def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi">;
-def SM3TT2A : SInst<"vsm3tt2a", "....I", "QUi">;
-def SM3TT2B : SInst<"vsm3tt2b", "....I", "QUi">;
+def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
+def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
+def SM3TT2A : SInst<"vsm3tt2a", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
+def SM3TT2B : SInst<"vsm3tt2b", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
 def SM3PARTW1 : SInst<"vsm3partw1", "....", "QUi">;
 def SM3PARTW2 : SInst<"vsm3partw2", "....", "QUi">;
 }
@@ -1327,49 +1392,68 @@ def SCALAR_RSHL: SInst<"vrshl", "11(S1)", "SlSUl">;
 // Scalar Shift (Immediate)
 let isScalarShift = 1 in {
 // Signed/Unsigned Shift Right (Immediate)
-def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl">;
+def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl", 
+                        [ImmCheck<1, ImmCheckShiftRight, 0>]>;
 // Signed/Unsigned Rounding Shift Right (Immediate)
-def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl">;
+def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl", 
+                          [ImmCheck<1, ImmCheckShiftRight, 0>]>;
 
 // Signed/Unsigned Shift Right and Accumulate (Immediate)
-def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl">;
+def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl", 
+                        [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 // Signed/Unsigned Rounding Shift Right and Accumulate (Immediate)
-def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl">;
+def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl", 
+                        [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 
 // Shift Left (Immediate)
-def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl">;
+def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl", 
+                      [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
 // Signed/Unsigned Saturating Shift Left (Immediate)
-def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl">;
+def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl", 
+                      [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
 // Signed Saturating Shift Left Unsigned (Immediate)
-def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl">;
+def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl", 
+                      [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
 
 // Shift Right And Insert (Immediate)
-def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl">;
+def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl", 
+                        [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 // Shift Left And Insert (Immediate)
-def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl">;
+def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl", 
+                        [ImmCheck<2, ImmCheckShiftLeft, 0>]>;
 
 let isScalarNarrowShift = 1 in {
   // Signed/Unsigned Saturating Shift Right Narrow (Immediate)
-  def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">;
+  def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl", 
+                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
   // Signed/Unsigned Saturating Rounded Shift Right Narrow (Immediate)
-  def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">;
+  def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl", 
+                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
   // Signed Saturating Shift Right Unsigned Narrow (Immediate)
-  def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<U)1I", "SsSiSl">;
+  def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<U)1I", "SsSiSl", 
+                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
   // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
-  def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<U)1I", "SsSiSl">;
+  def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<U)1I", "SsSiSl", 
+                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 // Scalar Signed/Unsigned Fixed-point Convert To Floating-Point (Immediate)
-def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi">;
-def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl">;
+def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi", 
+                              [ImmCheck<1, ImmCheck1_32>]>;
+def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl", 
+                              [ImmCheck<1, ImmCheck1_64>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Scalar Floating-point Convert To Signed/Unsigned Fixed-point (Immediate)
-def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf">;
-def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf">;
-def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd">;
-def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd">;
+def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf", 
+                                [ImmCheck<1, ImmCheck1_32>]>;
+def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf", 
+                                [ImmCheck<1, ImmCheck1_32>]>;
+def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd", 
+                                [ImmCheck<1, ImmCheck1_64>]>;
+def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd", 
+                                [ImmCheck<1, ImmCheck1_64>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1575,10 +1659,12 @@ def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_L
 def SCALAR_VMUL_N : IInst<"vmul_n", "..1", "d">;
 
 // VMUL_LANE_A64 d type implemented using scalar mul lane
-def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d">;
+def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d", 
+                            [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 
 // VMUL_LANEQ d type implemented using scalar mul lane
-def SCALAR_VMUL_LANEQ   : IInst<"vmul_laneq", "..QI", "d"> {
+def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d", 
+                              [ImmCheck<2, ImmCheckLaneQIndex, 1>]> {
   let isLaneQ = 1;
 }
 
@@ -1591,8 +1677,10 @@ def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ
 }
 
 // Scalar Floating Point fused multiply-add (scalar, by element)
-def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd">;
-def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd"> {
+def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd", 
+                            [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd", 
+                            [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
   let isLaneQ = 1;
 }
 
@@ -1609,14 +1697,18 @@ def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR
 }
 
 // Signed Saturating Doubling Multiply-Add Long (scalar by element)
-def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi">;
-def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi"> {
+def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi", 
+                                [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi", 
+                                [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
   let isLaneQ = 1;
 }
 
 // Signed Saturating Doubling Multiply-Subtract Long (scalar by element)
-def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi">;
-def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi"> {
+def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi", 
+                              [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi", 
+                              [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
   let isLaneQ = 1;
 }
 
@@ -1646,8 +1738,10 @@ def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR
 }
 } // TargetGuard = "v8.1a"
 
-def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
-def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs"> {
+def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
+                            [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
+                            [ImmCheck<1, ImmCheckLaneQIndex, 0>]> {
   let isLaneQ = 1;
 }
 
@@ -1720,9 +1814,12 @@ let TargetGuard = "fullfp16,neon" in {
 
   // Vector conversion
   let isVCVT_N = 1 in {
-    def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs">;
-    def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh">;
-    def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh">;
+    def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs", 
+                          [ImmCheck<1, ImmCheck1_16>]>;
+    def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh", 
+                          [ImmCheck<1, ImmCheck1_16>]>;
+    def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh", 
+                          [ImmCheck<1, ImmCheck1_16>]>;
   }
 
   // Max/Min
@@ -1770,7 +1867,7 @@ def VZIPH    : WInst<"vzip", "2..", "hQh">;
 def VUZPH    : WInst<"vuzp", "2..", "hQh">;
 def VTRNH    : WInst<"vtrn", "2..", "hQh">;
 // Vector Extract
-def VEXTH      : WInst<"vext", "...I", "hQh">;
+def VEXTH      : WInst<"vext", "...I", "hQh", [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 // Reverse vector elements
 def VREV64H    : WOpInst<"vrev64", "..", "hQh", OP_REV64>;
 
@@ -1801,16 +1898,20 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   // ARMv8.2-A FP16 lane vector intrinsics.
 
   // FMA lane
-  def VFMA_LANEH   : IInst<"vfma_lane", "...qI", "hQh">;
-  def VFMA_LANEQH  : IInst<"vfma_laneq", "...QI", "hQh"> {
+  def VFMA_LANEH   : IInst<"vfma_lane", "...qI", "hQh", 
+                          [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+  def VFMA_LANEQH  : IInst<"vfma_laneq", "...QI", "hQh", 
+                          [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
     let isLaneQ = 1;
   }
 
   // FMA lane with scalar argument
   def FMLA_NH      : SOpInst<"vfma_n", "...1", "hQh", OP_FMLA_N>;
   // Scalar floating point fused multiply-add (scalar, by element)
-  def SCALAR_FMLA_LANEH  : IInst<"vfma_lane", "111.I", "Sh">;
-  def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh"> {
+  def SCALAR_FMLA_LANEH  : IInst<"vfma_lane", "111.I", "Sh", 
+                                [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+  def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh", 
+                                [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
     let isLaneQ = 1;
   }
 
@@ -1844,8 +1945,10 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   }
   def VMULX_NH      : IOpInst<"vmulx_n", "..1", "hQh", OP_MULX_N>;
   // Scalar floating point  mulx (scalar, by element)
-  def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh">;
-  def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh"> {
+  def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh", 
+                                [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+  def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh", 
+                                [ImmCheck<2, ImmCheckLaneQIndex, 1>]> {
     let isLaneQ = 1;
   }
 
@@ -1865,8 +1968,10 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
   def VZIP2H     : SOpInst<"vzip2", "...", "hQh", OP_ZIP2>;
   def VUZP2H     : SOpInst<"vuzp2", "...", "hQh", OP_UZP2>;
 
-  def SCALAR_VDUP_LANEH  : IInst<"vdup_lane", "1.I", "Sh">;
-  def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh"> {
+  def SCALAR_VDUP_LANEH  : IInst<"vdup_lane", "1.I", "Sh", 
+                                [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+  def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh", 
+                                [ImmCheck<1, ImmCheckLaneQIndex, 0>]> {
     let isLaneQ = 1;
   }
 }
@@ -1959,9 +2064,12 @@ multiclass VCMLA_ROTS<string type, string lanety, string laneqty> {
 
     let isLaneQ = 1 in  {
       // vcmla{ROT}_laneq
+      // ACLE specifies that the fp16 vcmla_#ROT_laneq variant has an immedaite range of 0 <= lane <= 1.
+      // fp16 is the only variant for which these two differ.
+      // https://developer.arm.com/documentation/ihi0073/latest/ 
+      defvar getlanety = !if(!eq(type, "h"), lanety, laneqty);
       def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type,  Op<(call "vcmla" # ROT, $p0, $p1,
-              (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
-
+                (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast getlanety, $p2), $p3))))>>;
       // vcmlaq{ROT}_laneq
       def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
              (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
@@ -2011,10 +2119,14 @@ let TargetGuard = "bf16,neon" in {
   def VGET_HIGH_BF : NoTestOpInst<"vget_high", ".Q", "b", OP_HI>;
   def VGET_LOW_BF  : NoTestOpInst<"vget_low", ".Q", "b", OP_LO>;
 
-  def VGET_LANE_BF : IInst<"vget_lane", "1.I", "bQb">;
-  def VSET_LANE_BF : IInst<"vset_lane", ".1.I", "bQb">;
-  def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb">;
-  def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb"> {
+  def VGET_LANE_BF : IInst<"vget_lane", "1.I", "bQb", 
+                          [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+  def VSET_LANE_BF : IInst<"vset_lane", ".1.I", "bQb", 
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+  def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb", 
+                          [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+  def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb", 
+                          [ImmCheck<1, ImmCheckLaneQIndex, 0>]> {
     let isLaneQ = 1;
   }
 
@@ -2036,14 +2148,22 @@ let TargetGuard = "bf16,neon" in {
   def VST1_X3_BF : WInst<"vst1_x3", "v*(3!)", "bQb">;
   def VST1_X4_BF : WInst<"vst1_x4", "v*(4!)", "bQb">;
 
-  def VLD1_LANE_BF : WInst<"vld1_lane", ".(c*!).I", "bQb">;
-  def VLD2_LANE_BF : WInst<"vld2_lane", "2(c*!)2I", "bQb">;
-  def VLD3_LANE_BF : WInst<"vld3_lane", "3(c*!)3I", "bQb">;
-  def VLD4_LANE_BF : WInst<"vld4_lane", "4(c*!)4I", "bQb">;
-  def VST1_LANE_BF : WInst<"vst1_lane", "v*(.!)I", "bQb">;
-  def VST2_LANE_BF : WInst<"vst2_lane", "v*(2!)I", "bQb">;
-  def VST3_LANE_BF : WInst<"vst3_lane", "v*(3!)I", "bQb">;
-  def VST4_LANE_BF : WInst<"vst4_lane", "v*(4!)I", "bQb">;
+  def VLD1_LANE_BF : WInst<"vld1_lane", ".(c*!).I", "bQb", 
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+  def VLD2_LANE_BF : WInst<"vld2_lane", "2(c*!)2I", "bQb", 
+                          [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+  def VLD3_LANE_BF : WInst<"vld3_lane", "3(c*!)3I", "bQb", 
+                          [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
+  def VLD4_LANE_BF : WInst<"vld4_lane", "4(c*!)4I", "bQb", 
+                          [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
+  def VST1_LANE_BF : WInst<"vst1_lane", "v*(.!)I", "bQb", 
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+  def VST2_LANE_BF : WInst<"vst2_lane", "v*(2!)I", "bQb", 
+                          [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+  def VST3_LANE_BF : WInst<"vst3_lane", "v*(3!)I", "bQb",
+                          [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+  def VST4_LANE_BF : WInst<"vst4_lane", "v*(4!)I", "bQb", 
+                          [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 
   def VLD1_DUP_BF : WInst<"vld1_dup", ".(c*!)", "bQb">;
   def VLD2_DUP_BF : WInst<"vld2_dup", "2(c*!)", "bQb">;
@@ -2093,6 +2213,8 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "b
 
 // v8.9a/v9.4a LRCPC3 intrinsics
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "rcpc3,neon" in {
-  def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl">;
-  def VSTL1_LANE  : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl">;
+  def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl",
+                        [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+  def VSTL1_LANE  : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl", 
+                        [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 }
diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td
index 3b8015daee6d90..2b5acd41e7bbd4 100644
--- a/clang/include/clang/Basic/arm_neon_incl.td
+++ b/clang/include/clang/Basic/arm_neon_incl.td
@@ -21,6 +21,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+include "arm_immcheck_incl.td"
+
 // The base Operation class. All operations must subclass this.
 class Operation<list<dag> ops=[]> {
   list<dag> Ops = ops;
@@ -260,7 +262,7 @@ def OP_UNAVAILABLE : Operation {
 
 
 // Every intrinsic subclasses Inst.
-class Inst <string n, string p, string t, Operation o> {
+class Inst <string n, string p, string t, Operation o, list<ImmCheck> ch = []>{
   string Name = n;
   string Prototype = p;
   string Types = t;
@@ -278,6 +280,7 @@ class Inst <string n, string p, string t, Operation o> {
   // a Q register. Only used for intrinsics which end up calling polymorphic
   // builtins.
   bit isLaneQ = 0;
+  list<ImmCheck> ImmChecks = ch;
 
   // Certain intrinsics have different names than their representative
   // instructions. This field allows us to handle this correctly when we
@@ -300,9 +303,9 @@ class Inst <string n, string p, string t, Operation o> {
 // SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8")
 // IInst: Instruction with generic integer suffix (e.g., "i8")
 // WInst: Instruction with only bit size suffix (e.g., "8")
-class SInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
-class IInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
-class WInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
+class SInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
+class IInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
+class WInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
 
 // The following instruction classes are implemented via operators
 // instead of builtins. As such these declarations are only used for
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td
index 6ec357825a132a..fdf4ba55fe9382 100644
--- a/clang/include/clang/Basic/arm_sve_sme_incl.td
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -13,6 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+include "arm_immcheck_incl.td"
+
 //===----------------------------------------------------------------------===//
 // Instruction definitions
 //===----------------------------------------------------------------------===//
@@ -233,40 +235,6 @@ def IsInZT0                         : FlagType<0x400000000000>;
 def IsOutZT0                        : FlagType<0x800000000000>;
 def IsInOutZT0                      : FlagType<0x1000000000000>;
 
-// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
-class ImmCheckType<int val> {
-  int Value = val;
-}
-def ImmCheck0_31                : ImmCheckType<0>;  // 0..31 (used for e.g. predicate patterns)
-def ImmCheck1_16                : ImmCheckType<1>;  // 1..16
-def ImmCheckExtract             : ImmCheckType<2>;  // 0..(2048/sizeinbits(elt) - 1)
-def ImmCheckShiftRight          : ImmCheckType<3>;  // 1..sizeinbits(elt)
-def ImmCheckShiftRightNarrow    : ImmCheckType<4>;  // 1..sizeinbits(elt)/2
-def ImmCheckShiftLeft           : ImmCheckType<5>;  // 0..(sizeinbits(elt) - 1)
-def ImmCheck0_7                 : ImmCheckType<6>;  // 0..7
-def ImmCheckLaneIndex           : ImmCheckType<7>;  // 0..(128/(1*sizeinbits(elt)) - 1)
-def ImmCheckLaneIndexCompRotate : ImmCheckType<8>;  // 0..(128/(2*sizeinbits(elt)) - 1)
-def ImmCheckLaneIndexDot        : ImmCheckType<9>;  // 0..(128/(4*sizeinbits(elt)) - 1)
-def ImmCheckComplexRot90_270    : ImmCheckType<10>; // [90,270]
-def ImmCheckComplexRotAll90     : ImmCheckType<11>; // [0, 90, 180,270]
-def ImmCheck0_13                : ImmCheckType<12>; // 0..13
-def ImmCheck0_1                 : ImmCheckType<13>; // 0..1
-def ImmCheck0_2                 : ImmCheckType<14>; // 0..2
-def ImmCheck0_3                 : ImmCheckType<15>; // 0..3
-def ImmCheck0_0                 : ImmCheckType<16>; // 0..0
-def ImmCheck0_15                : ImmCheckType<17>; // 0..15
-def ImmCheck0_255               : ImmCheckType<18>; // 0..255
-def ImmCheck2_4_Mul2            : ImmCheckType<19>; // 2, 4
-def ImmCheck1_1                 : ImmCheckType<20>; // 1..1
-def ImmCheck1_3                 : ImmCheckType<21>; // 1..3
-def ImmCheck1_7                 : ImmCheckType<22>; // 1..7
-
-class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
-  int Arg = arg;
-  int EltSizeArg = eltSizeArg;
-  ImmCheckType Kind = kind;
-}
-
 defvar InvalidMode = "";
 
 class Inst<string n, string p, string t, MergeType mt, string i,
diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index fedc7df7908f17..1ced84300c1794 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -41,6 +41,9 @@ class SemaARM : public SemaBase {
                                     unsigned MaxWidth);
   bool CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
                                     CallExpr *TheCall);
+  bool ParseNeonImmChecks(CallExpr *TheCall, 
+                          SmallVector<std::tuple<int, int, int>, 2> &ImmChecks, 
+                          int OverloadType);
   bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index d8dd4fe16e3af0..8f4d94e1df6788 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -314,40 +314,6 @@ bool SemaARM::BuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
   return false;
 }
 
-// Get the valid immediate range for the specified NEON type code.
-static unsigned RFT(unsigned t, bool shift = false, bool ForceQuad = false) {
-  NeonTypeFlags Type(t);
-  int IsQuad = ForceQuad ? true : Type.isQuad();
-  switch (Type.getEltType()) {
-  case NeonTypeFlags::Int8:
-  case NeonTypeFlags::Poly8:
-    return shift ? 7 : (8 << IsQuad) - 1;
-  case NeonTypeFlags::Int16:
-  case NeonTypeFlags::Poly16:
-    return shift ? 15 : (4 << IsQuad) - 1;
-  case NeonTypeFlags::Int32:
-    return shift ? 31 : (2 << IsQuad) - 1;
-  case NeonTypeFlags::Int64:
-  case NeonTypeFlags::Poly64:
-    return shift ? 63 : (1 << IsQuad) - 1;
-  case NeonTypeFlags::Poly128:
-    return shift ? 127 : (1 << IsQuad) - 1;
-  case NeonTypeFlags::Float16:
-    assert(!shift && "cannot shift float types!");
-    return (4 << IsQuad) - 1;
-  case NeonTypeFlags::Float32:
-    assert(!shift && "cannot shift float types!");
-    return (2 << IsQuad) - 1;
-  case NeonTypeFlags::Float64:
-    assert(!shift && "cannot shift float types!");
-    return (1 << IsQuad) - 1;
-  case NeonTypeFlags::BFloat16:
-    assert(!shift && "cannot shift float types!");
-    return (4 << IsQuad) - 1;
-  }
-  llvm_unreachable("Invalid NeonTypeFlag!");
-}
-
 /// getNeonEltType - Return the QualType corresponding to the elements of
 /// the vector type specified by the NeonTypeFlags.  This is used to check
 /// the pointer arguments for Neon load/store intrinsics.
@@ -403,6 +369,62 @@ enum ArmSMEState : unsigned {
   ArmZT0Mask = 0b11 << 2
 };
 
+bool SemaARM::ParseNeonImmChecks(CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 2> 
+                                &ImmChecks, int OverloadType = -1) {
+  int ArgIdx, CheckTy, ElementType;
+  bool hasError = false;
+
+  for (auto &I : ImmChecks) {
+    std::tie(ArgIdx, CheckTy, ElementType) = I;
+
+    NeonTypeFlags Type = (OverloadType != -1) ? 
+                          NeonTypeFlags(OverloadType) : NeonTypeFlags(ElementType); 
+          
+    switch((ArmImmCheckType)CheckTy) {
+      case ArmImmCheckType::ImmCheck0_3:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 3);
+        break;
+      case ArmImmCheckType::ImmCheck0_63:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 63);
+        break;
+      case ArmImmCheckType::ImmCheck0_7:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 7);
+        break;
+      case ArmImmCheckType::ImmCheck1_16:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 16);
+        break;
+      case ArmImmCheckType::ImmCheck1_32:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 32);
+        break;
+      case ArmImmCheckType::ImmCheck1_64:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 64);
+        break;
+      case ArmImmCheckType::ImmCheckLaneIndex:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,  (64 << Type.isQuad()) / 
+                                                    Type.getEltSizeInBits() - 1);
+        break; 
+      case ArmImmCheckType::ImmCheckLaneQIndex:    // force to use quad
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 
+                                                    (128/Type.getEltSizeInBits()) - 1);
+        break;
+      case ArmImmCheckType::ImmCheckShiftLeft:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 
+                                                    Type.getEltSizeInBits() - 1);
+        break;
+      case ArmImmCheckType::ImmCheckShiftRight:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 
+                                                    1, Type.getEltSizeInBits());
+        break;
+      default:
+        llvm_unreachable("Invalid Neon immediate range typeflag!");
+        break;
+    }
+  }
+
+  return hasError;
+}
+
+
 bool SemaARM::ParseSVEImmChecks(
     CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) {
   // Perform all the immediate checks for this builtin call.
@@ -432,76 +454,76 @@ bool SemaARM::ParseSVEImmChecks(
       return false;
     };
 
-    switch ((SVETypeFlags::ImmCheckType)CheckTy) {
-    case SVETypeFlags::ImmCheck0_31:
+    switch ((ArmImmCheckType)CheckTy) {
+    case ArmImmCheckType::ImmCheck0_31:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 31))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_13:
+    case ArmImmCheckType::ImmCheck0_13:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 13))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck1_16:
+    case ArmImmCheckType::ImmCheck1_16:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 16))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_7:
+    case ArmImmCheckType::ImmCheck0_7:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck1_1:
+    case ArmImmCheckType::ImmCheck1_1:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck1_3:
+    case ArmImmCheckType::ImmCheck1_3:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck1_7:
+    case ArmImmCheckType::ImmCheck1_7:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckExtract:
+    case ArmImmCheckType::ImmCheckExtract:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
                                           (2048 / ElementSizeInBits) - 1))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckShiftRight:
+    case ArmImmCheckType::ImmCheckShiftRight:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1,
                                           ElementSizeInBits))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckShiftRightNarrow:
+    case ArmImmCheckType::ImmCheckShiftRightNarrow:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1,
                                           ElementSizeInBits / 2))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckShiftLeft:
+    case ArmImmCheckType::ImmCheckShiftLeft:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
                                           ElementSizeInBits - 1))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckLaneIndex:
+    case ArmImmCheckType::ImmCheckLaneIndex:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
                                           (128 / (1 * ElementSizeInBits)) - 1))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckLaneIndexCompRotate:
+    case ArmImmCheckType::ImmCheckLaneIndexCompRotate:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
                                           (128 / (2 * ElementSizeInBits)) - 1))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckLaneIndexDot:
+    case ArmImmCheckType::ImmCheckLaneIndexDot:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
                                           (128 / (4 * ElementSizeInBits)) - 1))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckComplexRot90_270:
+    case ArmImmCheckType::ImmCheckComplexRot90_270:
       if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; },
                               diag::err_rotation_argument_to_cadd))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckComplexRotAll90:
+    case ArmImmCheckType::ImmCheckComplexRotAll90:
       if (CheckImmediateInSet(
               [](int64_t V) {
                 return V == 0 || V == 90 || V == 180 || V == 270;
@@ -509,35 +531,38 @@ bool SemaARM::ParseSVEImmChecks(
               diag::err_rotation_argument_to_cmla))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_1:
+    case ArmImmCheckType::ImmCheck0_1:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 1))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_2:
+    case ArmImmCheckType::ImmCheck0_2:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 2))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_3:
+    case ArmImmCheckType::ImmCheck0_3:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 3))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_0:
+    case ArmImmCheckType::ImmCheck0_0:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 0))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_15:
+    case ArmImmCheckType::ImmCheck0_15:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 15))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_255:
+    case ArmImmCheckType::ImmCheck0_255:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 255))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck2_4_Mul2:
+    case ArmImmCheckType::ImmCheck2_4_Mul2:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
           SemaRef.BuiltinConstantArgMultiple(TheCall, ArgNum, 2))
         HasError = true;
       break;
+    default:
+      llvm_unreachable("Invalid SVE immediate range typeflag!");
+      break;
     }
   }
 
@@ -748,7 +773,7 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 
   llvm::APSInt Result;
   uint64_t mask = 0;
-  unsigned TV = 0;
+  int TV = -1;
   int PtrArgNum = -1;
   bool HasConstPtr = false;
   switch (BuiltinID) {
@@ -800,7 +825,7 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 
   // For NEON intrinsics which take an immediate value as part of the
   // instruction, range check them here.
-  unsigned i = 0, l = 0, u = 0;
+  SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
   switch (BuiltinID) {
   default:
     return false;
@@ -808,9 +833,9 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 #include "clang/Basic/arm_fp16.inc"
 #include "clang/Basic/arm_neon.inc"
 #undef GET_NEON_IMMEDIATE_CHECK
-  }
-
-  return SemaRef.BuiltinConstantArgRange(TheCall, i, l, u + l);
+    }
+    
+  return ParseNeonImmChecks(TheCall, ImmChecks, TV);
 }
 
 bool SemaARM::CheckMVEBuiltinFunctionCall(unsigned BuiltinID,
diff --git a/clang/test/CodeGen/aarch64-neon-vcmla.c b/clang/test/CodeGen/aarch64-neon-vcmla.c
index 02171527cc6a32..2ff48fd97b4271 100644
--- a/clang/test/CodeGen/aarch64-neon-vcmla.c
+++ b/clang/test/CodeGen/aarch64-neon-vcmla.c
@@ -155,15 +155,14 @@ float16x4_t test_vcmla_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rh
   return vcmla_lane_f16(acc, lhs, rhs, 1);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
 // CHECK-LABEL: @test_vcmla_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK:  %0 = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK:  %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
+// CHECK:  %2 = bitcast <2 x i32> %1 to <4 x half>
+// CHECK:  %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK:  %vcmla_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> 
 float16x4_t test_vcmla_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_laneq_f16(acc, lhs, rhs, 3);
+  return vcmla_laneq_f16(acc, lhs, rhs, 1);
 }
 
 // CHECK-LABEL: @test_vcmlaq_lane_f16(
@@ -191,7 +190,6 @@ float32x2_t test_vcmla_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rh
   return vcmla_lane_f32(acc, lhs, rhs, 0);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
 // CHECK-LABEL: @test_vcmla_laneq_f32(
 // CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
 // CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
@@ -229,15 +227,14 @@ float16x4_t test_vcmla_rot90_lane_f16(float16x4_t acc, float16x4_t lhs, float16x
   return vcmla_rot90_lane_f16(acc, lhs, rhs, 1);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
-// CHECK-LABEL: @test_vcmla_rot90_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK: %0 = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
+// CHECK: %2 = bitcast <2 x i32> %1 to <4 x half>
+// CHECK: %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK: %vcmla_rot90_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
+// CHECK: ret <4 x half> %vcmla_rot90_f163.i
 float16x4_t test_vcmla_rot90_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_rot90_laneq_f16(acc, lhs, rhs, 3);
+  return vcmla_rot90_laneq_f16(acc, lhs, rhs, 0);
 }
 
 // CHECK-LABEL: @test_vcmlaq_rot90_lane_f16(
@@ -265,7 +262,6 @@ float32x2_t test_vcmla_rot90_lane_f32(float32x2_t acc, float32x2_t lhs, float32x
   return vcmla_rot90_lane_f32(acc, lhs, rhs, 0);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
 // CHECK-LABEL: @test_vcmla_rot90_laneq_f32(
 // CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
 // CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
@@ -303,15 +299,15 @@ float16x4_t test_vcmla_rot180_lane_f16(float16x4_t acc, float16x4_t lhs, float16
   return vcmla_rot180_lane_f16(acc, lhs, rhs, 1);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
 // CHECK-LABEL: @test_vcmla_rot180_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK:  %0 = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK:  %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
+// CHECK:  %2 = bitcast <2 x i32> %1 to <4 x half>
+// CHECK:  %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK:  %vcmla_rot180_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
+// CHECK:  ret <4 x half> %vcmla_rot180_f163.i
 float16x4_t test_vcmla_rot180_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_rot180_laneq_f16(acc, lhs, rhs, 3);
+  return vcmla_rot180_laneq_f16(acc, lhs, rhs, 1);
 }
 
 // CHECK-LABEL: @test_vcmlaq_rot180_lane_f16(
@@ -339,7 +335,6 @@ float32x2_t test_vcmla_rot180_lane_f32(float32x2_t acc, float32x2_t lhs, float32
   return vcmla_rot180_lane_f32(acc, lhs, rhs, 0);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
 // CHECK-LABEL: @test_vcmla_rot180_laneq_f32(
 // CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
 // CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
@@ -377,15 +372,15 @@ float16x4_t test_vcmla_rot270_lane_f16(float16x4_t acc, float16x4_t lhs, float16
   return vcmla_rot270_lane_f16(acc, lhs, rhs, 1);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
 // CHECK-LABEL: @test_vcmla_rot270_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK:  %0 = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK:  %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
+// CHECK:  %2 = bitcast <2 x i32> %1 to <4 x half>
+// CHECK:  %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK:  %vcmla_rot270_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
+// CHECK:  ret <4 x half> %vcmla_rot270_f163.
 float16x4_t test_vcmla_rot270_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_rot270_laneq_f16(acc, lhs, rhs, 3);
+  return vcmla_rot270_laneq_f16(acc, lhs, rhs, 0);
 }
 
 // CHECK-LABEL: @test_vcmlaq_rot270_lane_f16(
@@ -413,7 +408,6 @@ float32x2_t test_vcmla_rot270_lane_f32(float32x2_t acc, float32x2_t lhs, float32
   return vcmla_rot270_lane_f32(acc, lhs, rhs, 0);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
 // CHECK-LABEL: @test_vcmla_rot270_laneq_f32(
 // CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
 // CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
diff --git a/clang/test/Sema/aarch64-neon-vcmla-ranges.c b/clang/test/Sema/aarch64-neon-vcmla-ranges.c
new file mode 100644
index 00000000000000..9b42e68670da08
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-vcmla-ranges.c
@@ -0,0 +1,202 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.3a -ffreestanding -fsyntax-only -verify %s
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+#include <arm_fp16.h>
+
+void test_vcmla_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+  vcmla_lane_f16(a, b, c, 0);
+  vcmla_lane_f16(a, b, c, 1);
+
+  vcmla_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+  vcmla_laneq_f16(a, b, c, 0);
+  vcmla_laneq_f16(a, b, c, 1);
+
+  vcmla_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+  vcmla_laneq_f16(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+}
+
+void test_vcmlaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c){
+  vcmlaq_lane_f16(a, b, c, 0);
+  vcmlaq_lane_f16(a, b, c, 1);
+
+  vcmlaq_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+  vcmlaq_lane_f16(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+}
+
+void test_vcmlaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+  vcmlaq_laneq_f16(a, b, c, 0);
+  vcmlaq_laneq_f16(a, b, c, 1);
+  vcmlaq_laneq_f16(a, b, c, 3);
+
+  vcmlaq_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+  vcmlaq_laneq_f16(a, b, c, 4);  // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+}
+
+void test_vcmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+  vcmla_lane_f32(a, b, c, 0);
+
+  vcmla_lane_f32(a, b, c, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_lane_f32(a, b, c, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_lane_f32(a, b, c, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+  vcmla_laneq_f32(a, b, c, 0);
+
+  vcmla_laneq_f32(a, b, c, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_laneq_f32(a, b, c, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+  vcmlaq_laneq_f32(a, b, c, 0);
+  vcmlaq_laneq_f32(a, b, c, 1);
+
+  vcmlaq_laneq_f32(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_laneq_f32(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+  vcmla_rot90_lane_f16(a, b, c, 0);
+  vcmla_rot90_lane_f16(a, b, c, 1);
+
+  vcmla_rot90_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot90_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+  vcmla_rot90_laneq_f16(a, b, c, 0);
+  vcmla_rot90_laneq_f16(a, b, c, 1);
+
+  vcmla_rot90_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot90_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot90_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+  vcmlaq_rot90_laneq_f16(a, b, c, 0);
+  vcmlaq_rot90_laneq_f16(a, b, c, 3);
+
+  vcmlaq_rot90_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot90_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+  vcmla_rot180_lane_f16(a, b, c, 0);
+  vcmla_rot180_lane_f16(a, b, c, 1);
+
+  vcmla_rot180_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot180_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+  vcmla_rot180_laneq_f16(a, b, c, 0);
+  vcmla_rot180_laneq_f16(a, b, c, 1);
+
+  vcmla_rot180_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot180_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot180_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+  vcmlaq_rot180_laneq_f16(a, b, c, 0);
+  vcmlaq_rot180_laneq_f16(a, b, c, 3);
+
+  vcmlaq_rot180_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot180_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+  vcmla_rot270_lane_f16(a, b, c, 0);
+  vcmla_rot270_lane_f16(a, b, c, 1);
+
+  vcmla_rot270_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot270_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+  vcmla_rot270_laneq_f16(a, b, c, 0);
+  vcmla_rot270_laneq_f16(a, b, c, 1);
+
+  vcmla_rot270_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot270_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot270_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+  vcmlaq_rot270_laneq_f16(a, b, c, 0);
+  vcmlaq_rot270_laneq_f16(a, b, c, 3);
+
+  vcmlaq_rot270_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot270_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+  vcmla_rot90_lane_f32(a, b, c, 0);
+
+  vcmla_rot90_lane_f32(a, b, c, 1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot90_lane_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+  vcmla_rot90_laneq_f32(a, b, c, 0);
+  vcmla_rot90_laneq_f32(a, b, c, 1);
+
+  vcmla_rot90_laneq_f32(a, b, c, 2);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot90_laneq_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot90_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+  vcmlaq_rot90_laneq_f32(a, b, c, 0);
+  vcmlaq_rot90_laneq_f32(a, b, c, 1);
+
+  vcmlaq_rot90_laneq_f32(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot90_laneq_f32(a, b, c, -1);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+  vcmla_rot180_lane_f32(a, b, c, 0);
+
+  vcmla_rot180_lane_f32(a, b, c, 1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot180_lane_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+  vcmla_rot180_laneq_f32(a, b, c, 0);
+  vcmla_rot180_laneq_f32(a, b, c, 1);
+
+  vcmla_rot180_laneq_f32(a, b, c, 2);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot180_laneq_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot180_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+  vcmlaq_rot90_laneq_f32(a, b, c, 0);
+  vcmlaq_rot90_laneq_f32(a, b, c, 1);
+
+  vcmlaq_rot90_laneq_f32(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot90_laneq_f32(a, b, c, -1);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+  vcmla_rot270_lane_f32(a, b, c, 0);
+
+  vcmla_rot270_lane_f32(a, b, c, 1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot270_lane_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+  vcmla_rot270_laneq_f32(a, b, c, 0);
+  vcmla_rot270_laneq_f32(a, b, c, 1);
+
+  vcmla_rot270_laneq_f32(a, b, c, 2);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot270_laneq_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot270_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+  vcmlaq_rot270_laneq_f32(a, b, c, 0);
+  vcmlaq_rot270_laneq_f32(a, b, c, 1);
+
+  vcmlaq_rot270_laneq_f32(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot270_laneq_f32(a, b, c, -1);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
\ No newline at end of file
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 30fbb8c5d65e5f..7666b53000edc6 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -333,6 +333,8 @@ class Intrinsic {
 
   /// The types of return value [0] and parameters [1..].
   std::vector<Type> Types;
+
+  SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
   /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
   int PolymorphicKeyType;
   /// The local variables defined.
@@ -368,9 +370,9 @@ class Intrinsic {
 
 public:
   Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
-            TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
+            TypeSpec InTS, ArrayRef<std::tuple<int, int, int>> ImmChecks, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
             StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable, bool BigEndianSafe)
-      : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body),
+      : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), ImmChecks(ImmChecks), CK(CK), Body(Body),
         ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()), IsUnavailable(IsUnavailable),
         BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false),
         UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."),
@@ -414,23 +416,22 @@ class Intrinsic {
   /// Get the architectural guard string (#ifdef).
   std::string getArchGuard() const { return ArchGuard; }
   std::string getTargetGuard() const { return TargetGuard; }
+  ArrayRef<std::tuple<int, int, int>> getImmChecks() const {return ImmChecks; }
   /// Get the non-mangled name.
   std::string getName() const { return Name; }
 
   /// Return true if the intrinsic takes an immediate operand.
   bool hasImmediate() const {
     return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); });
+    //return !ImmChecks.empty();
   }
 
-  /// Return the parameter index of the immediate operand.
-  unsigned getImmediateIdx() const {
-    for (unsigned Idx = 0; Idx < Types.size(); ++Idx)
-      if (Types[Idx].isImmediate())
-        return Idx - 1;
-    llvm_unreachable("Intrinsic has no immediate");
+  // Return if the supplied argument is an immediate
+  bool isArgImmediate(unsigned idx) const {
+    assert((idx + 1) < Types.size() && "Argument type index out of range!");
+    return Types[idx + 1].isImmediate();
   }
 
-
   unsigned getNumParams() const { return Types.size() - 1; }
   Type getReturnType() const { return Types[0]; }
   Type getParamType(unsigned I) const { return Types[I + 1]; }
@@ -554,9 +555,9 @@ class NeonEmitter {
                                      SmallVectorImpl<Intrinsic *> &Defs);
   void genOverloadTypeCheckCode(raw_ostream &OS,
                                 SmallVectorImpl<Intrinsic *> &Defs);
+  void genNeonImmCheckTypes(raw_ostream &OS);
   void genIntrinsicRangeCheckCode(raw_ostream &OS,
                                   SmallVectorImpl<Intrinsic *> &Defs);
-
 public:
   /// Called by Intrinsic - this attempts to get an intrinsic that takes
   /// the given types as arguments.
@@ -1031,7 +1032,7 @@ std::string Intrinsic::getBuiltinTypeStr() {
     if (LocalCK == ClassI && T.isInteger())
       T.makeSigned();
 
-    if (hasImmediate() && getImmediateIdx() == I)
+    if(isArgImmediate(I))
       T.makeImmediate(32);
 
     S += T.builtin_str();
@@ -1952,6 +1953,16 @@ void NeonEmitter::createIntrinsic(Record *R,
   bool BigEndianSafe  = R->getValueAsBit("BigEndianSafe");
   std::string ArchGuard = std::string(R->getValueAsString("ArchGuard"));
   std::string TargetGuard = std::string(R->getValueAsString("TargetGuard"));
+  std::vector<Record*> ImmCheckList = R->getValueAsListOfDefs("ImmChecks");
+
+  SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
+  for(const auto *R: ImmCheckList) {
+
+    ImmChecks.push_back(std::make_tuple(R->getValueAsInt("Arg"), 
+                        R->getValueAsDef("Kind")->getValueAsInt("Value"),
+                        R->getValueAsInt("EltSizeArg")));
+  }
+
   bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
   std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith"));
 
@@ -1992,7 +2003,7 @@ void NeonEmitter::createIntrinsic(Record *R,
   auto &Entry = IntrinsicMap[Name];
 
   for (auto &I : NewTypeSpecs) {
-    Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this,
+    Entry.emplace_back(R, Name, Proto, I.first, I.second, ImmChecks, CK, Body, *this,
                        ArchGuard, TargetGuard, IsUnavailable, BigEndianSafe);
     Out.push_back(&Entry.back());
   }
@@ -2142,84 +2153,40 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
   OS << "#endif\n\n";
 }
 
-void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
-                                        SmallVectorImpl<Intrinsic *> &Defs) {
-  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
+void NeonEmitter::genNeonImmCheckTypes(raw_ostream &OS) {
+  OS << "#ifdef GET_NEON_IMMCHECKTYPES\n";
+
+  for (auto *RV : Records.getAllDerivedDefinitions("ImmCheckType")) {
+    OS << "  " << RV->getNameInitAsString() << " = " << RV->getValueAsInt("Value") << ",\n";
+  }
 
+  OS << "#endif\n\n";
+}
+
+void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
+  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
+  int EltType;
+  // Ensure these are only emitted once.
   std::set<std::string> Emitted;
 
-  for (auto *Def : Defs) {
-    if (Def->hasBody())
-      continue;
-    // Functions which do not have an immediate do not need to have range
-    // checking code emitted.
-    if (!Def->hasImmediate())
-      continue;
-    if (Emitted.find(Def->getMangledName()) != Emitted.end())
+  for (auto &Def : Defs) {
+    if (Emitted.find(Def->getMangledName()) != Emitted.end() || !Def->hasImmediate())
       continue;
 
-    std::string LowerBound, UpperBound;
-
-    Record *R = Def->getRecord();
-    if (R->getValueAsBit("isVXAR")) {
-      //VXAR takes an immediate in the range [0, 63]
-      LowerBound = "0";
-      UpperBound = "63";
-    } else if (R->getValueAsBit("isVCVT_N")) {
-      // VCVT between floating- and fixed-point values takes an immediate
-      // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16.
-      LowerBound = "1";
-	  if (Def->getBaseType().getElementSizeInBits() == 16 ||
-		  Def->getName().find('h') != std::string::npos)
-		// VCVTh operating on FP16 intrinsics in range [1, 16)
-		UpperBound = "15";
-	  else if (Def->getBaseType().getElementSizeInBits() == 32)
-        UpperBound = "31";
-	  else
-        UpperBound = "63";
-    } else if (R->getValueAsBit("isScalarShift")) {
-      // Right shifts have an 'r' in the name, left shifts do not. Convert
-      // instructions have the same bounds and right shifts.
-      if (Def->getName().find('r') != std::string::npos ||
-          Def->getName().find("cvt") != std::string::npos)
-        LowerBound = "1";
-
-      UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);
-    } else if (R->getValueAsBit("isShift")) {
-      // Builtins which are overloaded by type will need to have their upper
-      // bound computed at Sema time based on the type constant.
-
-      // Right shifts have an 'r' in the name, left shifts do not.
-      if (Def->getName().find('r') != std::string::npos)
-        LowerBound = "1";
-      UpperBound = "RFT(TV, true)";
-    } else if (Def->getClassKind(true) == ClassB) {
-      // ClassB intrinsics have a type (and hence lane number) that is only
-      // known at runtime.
-      if (R->getValueAsBit("isLaneQ"))
-        UpperBound = "RFT(TV, false, true)";
-      else
-        UpperBound = "RFT(TV, false, false)";
-    } else {
-      // The immediate generally refers to a lane in the preceding argument.
-      assert(Def->getImmediateIdx() > 0);
-      Type T = Def->getParamType(Def->getImmediateIdx() - 1);
-      UpperBound = utostr(T.getNumElements() - 1);
-    }
+    // If the Def has a body (operation DAGs), it is not a __builtin_neon_
+    if(Def->hasBody()) continue;
 
-    // Calculate the index of the immediate that should be range checked.
-    unsigned Idx = Def->getNumParams();
-    if (Def->hasImmediate())
-      Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());
-
-    OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "
-       << "i = " << Idx << ";";
-    if (!LowerBound.empty())
-      OS << " l = " << LowerBound << ";";
-    if (!UpperBound.empty())
-      OS << " u = " << UpperBound << ";";
-    OS << " break;\n";
+    OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
+    
+    for(const auto &Check: Def->getImmChecks()){
+      EltType = std::get<2>(Check);   // elt type argument
+      if(EltType >= 0)
+        EltType = Def->getParamType(EltType).getNeonEnum();
 
+      OS << "  ImmChecks.push_back(std::make_tuple(" << std::get<0>(Check) << 
+                ", " << std::get<1>(Check) <<  ", " << EltType << ")); \n";
+      OS << "  break;\n";
+    }
     Emitted.insert(Def->getMangledName());
   }
 
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index caedd5978a87c0..027aa4b4c6bb29 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1573,7 +1573,7 @@ void SVEEmitter::createTypeFlags(raw_ostream &OS) {
     OS << "  " << KV.getKey() << " = " << KV.getValue() << ",\n";
   OS << "#endif\n\n";
 
-  OS << "#ifdef LLVM_GET_SVE_IMMCHECKTYPES\n";
+  OS << "#ifdef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES\n";
   for (auto &KV : ImmCheckTypes)
     OS << "  " << KV.getKey() << " = " << KV.getValue() << ",\n";
   OS << "#endif\n\n";

>From 3307306b7f64fdf3674bc757f848b0046e54fa22 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Tue, 23 Jul 2024 22:28:48 +0000
Subject: [PATCH 02/13] Updated/consistent vcmla codegen tests

---
 clang/test/CodeGen/aarch64-neon-vcmla.c | 610 +++++++++++++++---------
 1 file changed, 384 insertions(+), 226 deletions(-)

diff --git a/clang/test/CodeGen/aarch64-neon-vcmla.c b/clang/test/CodeGen/aarch64-neon-vcmla.c
index 2ff48fd97b4271..d82d74d019c012 100644
--- a/clang/test/CodeGen/aarch64-neon-vcmla.c
+++ b/clang/test/CodeGen/aarch64-neon-vcmla.c
@@ -1,438 +1,596 @@
-// RUN: %clang_cc1 -triple arm64-apple-ios -target-feature +neon \
-// RUN:        -target-feature +v8.3a \
-// RUN:        -target-feature +fullfp16 \
-// RUN:        -disable-O0-optnone -emit-llvm -o - %s | opt -S -O1 | FileCheck %s
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon  \
+// RUN:            -target-feature +v8.3a -target-feature +fullfp16 \
+// RUN:            -disable-O0-optnone -emit-llvm -o - %s | opt -S -O1 | FileCheck %s
 
 // REQUIRES: aarch64-registered-target
 
 #include <arm_neon.h>
 
-// CHECK-LABEL: @test_vcmla_f16(
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_F163_I]]
+//
 float16x4_t test_vcmla_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_F323_I]]
+//
 float32x2_t test_vcmla_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_f16(
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_F163_I]]
+//
 float16x8_t test_vcmlaq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_f32(
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_F323_I]]
+//
 float32x4_t test_vcmlaq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_f64(
-// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
-// CHECK: ret <2 x double> [[RES]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_f64(
+// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
+// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_F643_I]]
+//
 float64x2_t test_vcmlaq_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_rot90_f16(
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT90_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT90_F163_I]]
+//
 float16x4_t test_vcmla_rot90_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot90_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_rot90_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT90_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT90_F323_I]]
+//
 float32x2_t test_vcmla_rot90_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot90_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot90_f16(
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT90_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
+//
 float16x8_t test_vcmlaq_rot90_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot90_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot90_f32(
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT90_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
+//
 float32x4_t test_vcmlaq_rot90_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot90_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot90_f64(
-// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
-// CHECK: ret <2 x double> [[RES]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot90_f64(
+// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT90_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
+// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_ROT90_F643_I]]
+//
 float64x2_t test_vcmlaq_rot90_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_rot90_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_rot180_f16(
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT180_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT180_F163_I]]
+//
 float16x4_t test_vcmla_rot180_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot180_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_rot180_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT180_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT180_F323_I]]
+//
 float32x2_t test_vcmla_rot180_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot180_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot180_f16(
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT180_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
+//
 float16x8_t test_vcmlaq_rot180_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot180_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot180_f32(
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT180_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
+//
 float32x4_t test_vcmlaq_rot180_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot180_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot180_f64(
-// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
-// CHECK: ret <2 x double> [[RES]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot180_f64(
+// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT180_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
+// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_ROT180_F643_I]]
+//
 float64x2_t test_vcmlaq_rot180_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_rot180_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_rot270_f16(
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT270_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT270_F163_I]]
+//
 float16x4_t test_vcmla_rot270_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot270_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_rot270_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT270_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT270_F323_I]]
+//
 float32x2_t test_vcmla_rot270_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot270_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot270_f16(
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT270_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
+//
 float16x8_t test_vcmlaq_rot270_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot270_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot270_f32(
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT270_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
+//
 float32x4_t test_vcmlaq_rot270_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot270_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot270_f64(
-// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
-// CHECK: ret <2 x double> [[RES]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot270_f64(
+// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT270_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
+// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_ROT270_F643_I]]
+//
 float64x2_t test_vcmlaq_rot270_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_rot270_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_lane_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLA_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_F163_I]]
+//
 float16x4_t test_vcmla_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmla_laneq_f16(
-// CHECK:  %0 = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK:  %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
-// CHECK:  %2 = bitcast <2 x i32> %1 to <4 x half>
-// CHECK:  %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK:  %vcmla_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> 
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_laneq_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLA_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_F163_I]]
+//
 float16x4_t test_vcmla_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
   return vcmla_laneq_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_lane_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_F163_I]]
+//
 float16x8_t test_vcmlaq_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_laneq_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
+// CHECK-NEXT:    [[VCMLAQ_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_F163_I]]
+//
 float16x8_t test_vcmlaq_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: @test_vcmla_lane_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_F323_I]]
+//
 float32x2_t test_vcmla_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmla_laneq_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
-// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_laneq_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
+// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT:    [[VCMLA_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_F323_I]]
+//
 float32x2_t test_vcmla_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_lane_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
-// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
-// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT:    [[VCMLAQ_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_F323_I]]
+//
 float32x4_t test_vcmlaq_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmlaq_laneq_f32(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_laneq_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_F323_I]]
+//
 float32x4_t test_vcmlaq_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmla_rot90_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_lane_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLA_ROT90_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT90_F163_I]]
+//
 float16x4_t test_vcmla_rot90_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot90_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK: %0 = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
-// CHECK: %2 = bitcast <2 x i32> %1 to <4 x half>
-// CHECK: %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: %vcmla_rot90_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
-// CHECK: ret <4 x half> %vcmla_rot90_f163.i
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_laneq_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT:    [[VCMLA_ROT90_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT90_F163_I]]
+//
 float16x4_t test_vcmla_rot90_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
   return vcmla_rot90_laneq_f16(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot90_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_lane_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_ROT90_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
+//
 float16x8_t test_vcmlaq_rot90_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_rot90_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot90_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_laneq_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
+// CHECK-NEXT:    [[VCMLAQ_ROT90_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
+//
 float16x8_t test_vcmlaq_rot90_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot90_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: @test_vcmla_rot90_lane_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT90_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT90_F323_I]]
+//
 float32x2_t test_vcmla_rot90_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot90_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmla_rot90_laneq_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
-// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_laneq_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
+// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT:    [[VCMLA_ROT90_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT90_F323_I]]
+//
 float32x2_t test_vcmla_rot90_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_rot90_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot90_lane_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
-// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
-// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT:    [[VCMLAQ_ROT90_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
+//
 float32x4_t test_vcmlaq_rot90_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_rot90_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot90_laneq_f32(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_laneq_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_ROT90_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
+//
 float32x4_t test_vcmlaq_rot90_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot90_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmla_rot180_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_lane_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLA_ROT180_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT180_F163_I]]
+//
 float16x4_t test_vcmla_rot180_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot180_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmla_rot180_laneq_f16(
-// CHECK:  %0 = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK:  %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
-// CHECK:  %2 = bitcast <2 x i32> %1 to <4 x half>
-// CHECK:  %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK:  %vcmla_rot180_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
-// CHECK:  ret <4 x half> %vcmla_rot180_f163.i
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_laneq_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLA_ROT180_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT180_F163_I]]
+//
 float16x4_t test_vcmla_rot180_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
   return vcmla_rot180_laneq_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot180_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_lane_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_ROT180_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
+//
 float16x8_t test_vcmlaq_rot180_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_rot180_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot180_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_laneq_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
+// CHECK-NEXT:    [[VCMLAQ_ROT180_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
+//
 float16x8_t test_vcmlaq_rot180_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot180_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: @test_vcmla_rot180_lane_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT180_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT180_F323_I]]
+//
 float32x2_t test_vcmla_rot180_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot180_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmla_rot180_laneq_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
-// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_laneq_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
+// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT:    [[VCMLA_ROT180_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT180_F323_I]]
+//
 float32x2_t test_vcmla_rot180_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_rot180_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot180_lane_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
-// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
-// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT:    [[VCMLAQ_ROT180_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
+//
 float32x4_t test_vcmlaq_rot180_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_rot180_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot180_laneq_f32(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_laneq_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_ROT180_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
+//
 float32x4_t test_vcmlaq_rot180_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot180_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmla_rot270_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_lane_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLA_ROT270_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT270_F163_I]]
+//
 float16x4_t test_vcmla_rot270_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot270_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmla_rot270_laneq_f16(
-// CHECK:  %0 = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK:  %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
-// CHECK:  %2 = bitcast <2 x i32> %1 to <4 x half>
-// CHECK:  %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK:  %vcmla_rot270_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
-// CHECK:  ret <4 x half> %vcmla_rot270_f163.
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_laneq_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT:    [[VCMLA_ROT270_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT270_F163_I]]
+//
 float16x4_t test_vcmla_rot270_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
   return vcmla_rot270_laneq_f16(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot270_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_lane_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_ROT270_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
+//
 float16x8_t test_vcmlaq_rot270_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_rot270_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot270_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_laneq_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
+// CHECK-NEXT:    [[VCMLAQ_ROT270_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
+//
 float16x8_t test_vcmlaq_rot270_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot270_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: @test_vcmla_rot270_lane_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT270_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT270_F323_I]]
+//
 float32x2_t test_vcmla_rot270_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot270_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmla_rot270_laneq_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
-// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_laneq_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
+// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT:    [[VCMLA_ROT270_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT270_F323_I]]
+//
 float32x2_t test_vcmla_rot270_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_rot270_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot270_lane_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
-// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
-// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[DUP]] to <4 x float>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT:    [[VCMLAQ_ROT270_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
+//
 float32x4_t test_vcmlaq_rot270_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_rot270_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot270_laneq_f32(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_laneq_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_ROT270_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
+//
 float32x4_t test_vcmlaq_rot270_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot270_laneq_f32(acc, lhs, rhs, 1);
 }

>From eeec94967b7b93aa0cd62f48bbda8ffb695419eb Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 24 Jul 2024 11:33:27 +0000
Subject: [PATCH 03/13] Run clang-format

---
 clang/include/clang/Basic/TargetBuiltins.h | 51 ++++++------
 clang/include/clang/Sema/SemaARM.h         |  4 +-
 clang/lib/Sema/SemaARM.cpp                 | 93 +++++++++++-----------
 clang/utils/TableGen/NeonEmitter.cpp       | 54 ++++++++-----
 4 files changed, 106 insertions(+), 96 deletions(-)

diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 50e17ad7e16284..384811f9281ac6 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -209,7 +209,7 @@ namespace clang {
         Flags |= QuadFlag;
     }
 
-    EltType getEltType() const { return (EltType)(Flags & EltTypeMask); } 
+    EltType getEltType() const { return (EltType)(Flags & EltTypeMask); }
     bool isPoly() const {
       EltType ET = getEltType();
       return ET == Poly8 || ET == Poly16 || ET == Poly64;
@@ -217,36 +217,36 @@ namespace clang {
     bool isUnsigned() const { return (Flags & UnsignedFlag) != 0; }
     bool isQuad() const { return (Flags & QuadFlag) != 0; };
     unsigned getEltSizeInBits() const {
-      switch(getEltType()){
-        case Int8:
-        case Poly8:
-          return 8;
-        case Int16:
-        case Float16:
-        case Poly16:
-        case BFloat16:
-          return 16;
-        case Int32:
-        case Float32:
-          return 32;
-        case Int64:
-        case Float64:
-        case Poly64:
-          return 64;
-        case Poly128:
-          return 128;
-        default:
-          llvm_unreachable("Invalid NeonTypeFlag!");
+      switch (getEltType()) {
+      case Int8:
+      case Poly8:
+        return 8;
+      case Int16:
+      case Float16:
+      case Poly16:
+      case BFloat16:
+        return 16;
+      case Int32:
+      case Float32:
+        return 32;
+      case Int64:
+      case Float64:
+      case Poly64:
+        return 64;
+      case Poly128:
+        return 128;
+      default:
+        llvm_unreachable("Invalid NeonTypeFlag!");
       }
     }
   };
 
-    // Shared between SVE/SME and NEON
-    enum ArmImmCheckType {
+  // Shared between SVE/SME and NEON
+  enum ArmImmCheckType {
 #define LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
 #include "clang/Basic/arm_sve_typeflags.inc"
-#undef  LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
-    };
+#undef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
+  };
 
   /// Flags to identify the types for overloaded SVE builtins.
   class SVETypeFlags {
@@ -279,7 +279,6 @@ namespace clang {
 #undef LLVM_GET_SVE_MERGETYPES
     };
 
-
     SVETypeFlags(uint64_t F) : Flags(F) {
       EltTypeShift = llvm::countr_zero(EltTypeMask);
       MemEltTypeShift = llvm::countr_zero(MemEltTypeMask);
diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index 1ced84300c1794..2f13e60f081c52 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -41,8 +41,8 @@ class SemaARM : public SemaBase {
                                     unsigned MaxWidth);
   bool CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
                                     CallExpr *TheCall);
-  bool ParseNeonImmChecks(CallExpr *TheCall, 
-                          SmallVector<std::tuple<int, int, int>, 2> &ImmChecks, 
+  bool ParseNeonImmChecks(CallExpr *TheCall,
+                          SmallVector<std::tuple<int, int, int>, 2> &ImmChecks,
                           int OverloadType);
   bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 8f4d94e1df6788..bbcca1c72465ab 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -369,62 +369,63 @@ enum ArmSMEState : unsigned {
   ArmZT0Mask = 0b11 << 2
 };
 
-bool SemaARM::ParseNeonImmChecks(CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 2> 
-                                &ImmChecks, int OverloadType = -1) {
+bool SemaARM::ParseNeonImmChecks(
+    CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 2> &ImmChecks,
+    int OverloadType = -1) {
   int ArgIdx, CheckTy, ElementType;
   bool hasError = false;
 
   for (auto &I : ImmChecks) {
     std::tie(ArgIdx, CheckTy, ElementType) = I;
 
-    NeonTypeFlags Type = (OverloadType != -1) ? 
-                          NeonTypeFlags(OverloadType) : NeonTypeFlags(ElementType); 
-          
-    switch((ArmImmCheckType)CheckTy) {
-      case ArmImmCheckType::ImmCheck0_3:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 3);
-        break;
-      case ArmImmCheckType::ImmCheck0_63:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 63);
-        break;
-      case ArmImmCheckType::ImmCheck0_7:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 7);
-        break;
-      case ArmImmCheckType::ImmCheck1_16:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 16);
-        break;
-      case ArmImmCheckType::ImmCheck1_32:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 32);
-        break;
-      case ArmImmCheckType::ImmCheck1_64:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 64);
-        break;
-      case ArmImmCheckType::ImmCheckLaneIndex:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,  (64 << Type.isQuad()) / 
-                                                    Type.getEltSizeInBits() - 1);
-        break; 
-      case ArmImmCheckType::ImmCheckLaneQIndex:    // force to use quad
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 
-                                                    (128/Type.getEltSizeInBits()) - 1);
-        break;
-      case ArmImmCheckType::ImmCheckShiftLeft:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 
-                                                    Type.getEltSizeInBits() - 1);
-        break;
-      case ArmImmCheckType::ImmCheckShiftRight:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 
-                                                    1, Type.getEltSizeInBits());
-        break;
-      default:
-        llvm_unreachable("Invalid Neon immediate range typeflag!");
-        break;
+    NeonTypeFlags Type = (OverloadType != -1) ? NeonTypeFlags(OverloadType)
+                                              : NeonTypeFlags(ElementType);
+
+    switch ((ArmImmCheckType)CheckTy) {
+    case ArmImmCheckType::ImmCheck0_3:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 3);
+      break;
+    case ArmImmCheckType::ImmCheck0_63:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 63);
+      break;
+    case ArmImmCheckType::ImmCheck0_7:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 7);
+      break;
+    case ArmImmCheckType::ImmCheck1_16:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 16);
+      break;
+    case ArmImmCheckType::ImmCheck1_32:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 32);
+      break;
+    case ArmImmCheckType::ImmCheck1_64:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 64);
+      break;
+    case ArmImmCheckType::ImmCheckLaneIndex:
+      hasError |= SemaRef.BuiltinConstantArgRange(
+          TheCall, ArgIdx, 0,
+          (64 << Type.isQuad()) / Type.getEltSizeInBits() - 1);
+      break;
+    case ArmImmCheckType::ImmCheckLaneQIndex: // force to use quad
+      hasError |= SemaRef.BuiltinConstantArgRange(
+          TheCall, ArgIdx, 0, (128 / Type.getEltSizeInBits()) - 1);
+      break;
+    case ArmImmCheckType::ImmCheckShiftLeft:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+                                                  Type.getEltSizeInBits() - 1);
+      break;
+    case ArmImmCheckType::ImmCheckShiftRight:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1,
+                                                  Type.getEltSizeInBits());
+      break;
+    default:
+      llvm_unreachable("Invalid Neon immediate range typeflag!");
+      break;
     }
   }
 
   return hasError;
 }
 
-
 bool SemaARM::ParseSVEImmChecks(
     CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) {
   // Perform all the immediate checks for this builtin call.
@@ -833,8 +834,8 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 #include "clang/Basic/arm_fp16.inc"
 #include "clang/Basic/arm_neon.inc"
 #undef GET_NEON_IMMEDIATE_CHECK
-    }
-    
+  }
+
   return ParseNeonImmChecks(TheCall, ImmChecks, TV);
 }
 
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 7666b53000edc6..8dc6312525cf55 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -370,10 +370,13 @@ class Intrinsic {
 
 public:
   Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
-            TypeSpec InTS, ArrayRef<std::tuple<int, int, int>> ImmChecks, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
-            StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable, bool BigEndianSafe)
-      : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), ImmChecks(ImmChecks), CK(CK), Body(Body),
-        ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()), IsUnavailable(IsUnavailable),
+            TypeSpec InTS, ArrayRef<std::tuple<int, int, int>> ImmChecks,
+            ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
+            StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable,
+            bool BigEndianSafe)
+      : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), ImmChecks(ImmChecks),
+        CK(CK), Body(Body), ArchGuard(ArchGuard.str()),
+        TargetGuard(TargetGuard.str()), IsUnavailable(IsUnavailable),
         BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false),
         UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."),
         Emitter(Emitter) {
@@ -416,14 +419,14 @@ class Intrinsic {
   /// Get the architectural guard string (#ifdef).
   std::string getArchGuard() const { return ArchGuard; }
   std::string getTargetGuard() const { return TargetGuard; }
-  ArrayRef<std::tuple<int, int, int>> getImmChecks() const {return ImmChecks; }
+  ArrayRef<std::tuple<int, int, int>> getImmChecks() const { return ImmChecks; }
   /// Get the non-mangled name.
   std::string getName() const { return Name; }
 
   /// Return true if the intrinsic takes an immediate operand.
   bool hasImmediate() const {
     return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); });
-    //return !ImmChecks.empty();
+    // return !ImmChecks.empty();
   }
 
   // Return if the supplied argument is an immediate
@@ -558,6 +561,7 @@ class NeonEmitter {
   void genNeonImmCheckTypes(raw_ostream &OS);
   void genIntrinsicRangeCheckCode(raw_ostream &OS,
                                   SmallVectorImpl<Intrinsic *> &Defs);
+
 public:
   /// Called by Intrinsic - this attempts to get an intrinsic that takes
   /// the given types as arguments.
@@ -1032,7 +1036,7 @@ std::string Intrinsic::getBuiltinTypeStr() {
     if (LocalCK == ClassI && T.isInteger())
       T.makeSigned();
 
-    if(isArgImmediate(I))
+    if (isArgImmediate(I))
       T.makeImmediate(32);
 
     S += T.builtin_str();
@@ -1953,12 +1957,13 @@ void NeonEmitter::createIntrinsic(Record *R,
   bool BigEndianSafe  = R->getValueAsBit("BigEndianSafe");
   std::string ArchGuard = std::string(R->getValueAsString("ArchGuard"));
   std::string TargetGuard = std::string(R->getValueAsString("TargetGuard"));
-  std::vector<Record*> ImmCheckList = R->getValueAsListOfDefs("ImmChecks");
+  std::vector<Record *> ImmCheckList = R->getValueAsListOfDefs("ImmChecks");
 
   SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
-  for(const auto *R: ImmCheckList) {
+  for (const auto *R : ImmCheckList) {
 
-    ImmChecks.push_back(std::make_tuple(R->getValueAsInt("Arg"), 
+    ImmChecks.push_back(
+        std::make_tuple(R->getValueAsInt("Arg"),
                         R->getValueAsDef("Kind")->getValueAsInt("Value"),
                         R->getValueAsInt("EltSizeArg")));
   }
@@ -2003,8 +2008,9 @@ void NeonEmitter::createIntrinsic(Record *R,
   auto &Entry = IntrinsicMap[Name];
 
   for (auto &I : NewTypeSpecs) {
-    Entry.emplace_back(R, Name, Proto, I.first, I.second, ImmChecks, CK, Body, *this,
-                       ArchGuard, TargetGuard, IsUnavailable, BigEndianSafe);
+    Entry.emplace_back(R, Name, Proto, I.first, I.second, ImmChecks, CK, Body,
+                       *this, ArchGuard, TargetGuard, IsUnavailable,
+                       BigEndianSafe);
     Out.push_back(&Entry.back());
   }
 
@@ -2157,34 +2163,38 @@ void NeonEmitter::genNeonImmCheckTypes(raw_ostream &OS) {
   OS << "#ifdef GET_NEON_IMMCHECKTYPES\n";
 
   for (auto *RV : Records.getAllDerivedDefinitions("ImmCheckType")) {
-    OS << "  " << RV->getNameInitAsString() << " = " << RV->getValueAsInt("Value") << ",\n";
+    OS << "  " << RV->getNameInitAsString() << " = "
+       << RV->getValueAsInt("Value") << ",\n";
   }
 
   OS << "#endif\n\n";
 }
 
-void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
+void NeonEmitter::genIntrinsicRangeCheckCode(
+    raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
   int EltType;
   // Ensure these are only emitted once.
   std::set<std::string> Emitted;
 
   for (auto &Def : Defs) {
-    if (Emitted.find(Def->getMangledName()) != Emitted.end() || !Def->hasImmediate())
+    if (Emitted.find(Def->getMangledName()) != Emitted.end() ||
+        !Def->hasImmediate())
       continue;
 
     // If the Def has a body (operation DAGs), it is not a __builtin_neon_
-    if(Def->hasBody()) continue;
+    if (Def->hasBody())
+      continue;
 
     OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
-    
-    for(const auto &Check: Def->getImmChecks()){
-      EltType = std::get<2>(Check);   // elt type argument
-      if(EltType >= 0)
+
+    for (const auto &Check : Def->getImmChecks()) {
+      EltType = std::get<2>(Check); // elt type argument
+      if (EltType >= 0)
         EltType = Def->getParamType(EltType).getNeonEnum();
 
-      OS << "  ImmChecks.push_back(std::make_tuple(" << std::get<0>(Check) << 
-                ", " << std::get<1>(Check) <<  ", " << EltType << ")); \n";
+      OS << "  ImmChecks.push_back(std::make_tuple(" << std::get<0>(Check)
+         << ", " << std::get<1>(Check) << ", " << EltType << ")); \n";
       OS << "  break;\n";
     }
     Emitted.insert(Def->getMangledName());

>From cd38a9679536b721cd5d1282bd8a3156be5f24a9 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Mon, 29 Jul 2024 11:47:31 +0000
Subject: [PATCH 04/13] Move immediate check types to own generated file

---
 clang/include/clang/Basic/CMakeLists.txt   | 3 +++
 clang/include/clang/Basic/TargetBuiltins.h | 2 +-
 clang/utils/TableGen/SveEmitter.cpp        | 9 +++++++++
 clang/utils/TableGen/TableGen.cpp          | 7 +++++++
 clang/utils/TableGen/TableGenBackends.h    | 1 +
 llvm/docs/CommandGuide/tblgen.rst          | 4 ++++
 6 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt
index 2ef6ddc68f4bf3..f069f4fc118f27 100644
--- a/clang/include/clang/Basic/CMakeLists.txt
+++ b/clang/include/clang/Basic/CMakeLists.txt
@@ -67,6 +67,9 @@ clang_tablegen(arm_neon.inc -gen-arm-neon-sema
 clang_tablegen(arm_fp16.inc -gen-arm-neon-sema
   SOURCE arm_fp16.td
   TARGET ClangARMFP16)
+clang_tablegen(arm_immcheck_types.inc -gen-arm-immcheck-types
+  SOURCE arm_sve.td
+  TARGET ClangARMImmChecks)
 clang_tablegen(arm_mve_builtins.inc -gen-arm-mve-builtin-def
   SOURCE arm_mve.td
   TARGET ClangARMMveBuiltinsDef)
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 384811f9281ac6..15757e43f4a5e5 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -244,7 +244,7 @@ namespace clang {
   // Shared between SVE/SME and NEON
   enum ArmImmCheckType {
 #define LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
-#include "clang/Basic/arm_sve_typeflags.inc"
+#include "clang/Basic/arm_immcheck_types.inc"
 #undef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
   };
 
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 027aa4b4c6bb29..897fd24bd75cc0 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -388,6 +388,9 @@ class SVEEmitter {
   /// Emit all the range checks for the immediates.
   void createRangeChecks(raw_ostream &o);
 
+  // Emit all the ImmCheckTypes to arm_immcheck_types.h
+  void createImmCheckTypes(raw_ostream &OS);
+
   /// Create the SVETypeFlags used in CGBuiltins
   void createTypeFlags(raw_ostream &o);
 
@@ -1572,7 +1575,9 @@ void SVEEmitter::createTypeFlags(raw_ostream &OS) {
   for (auto &KV : MergeTypes)
     OS << "  " << KV.getKey() << " = " << KV.getValue() << ",\n";
   OS << "#endif\n\n";
+}
 
+void SVEEmitter::createImmCheckTypes(raw_ostream &OS) {
   OS << "#ifdef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES\n";
   for (auto &KV : ImmCheckTypes)
     OS << "  " << KV.getKey() << " = " << KV.getValue() << ",\n";
@@ -1858,6 +1863,10 @@ void EmitSveTypeFlags(RecordKeeper &Records, raw_ostream &OS) {
   SVEEmitter(Records).createTypeFlags(OS);
 }
 
+void EmitImmCheckTypes(RecordKeeper &Records, raw_ostream &OS) {
+  SVEEmitter(Records).createImmCheckTypes(OS);
+}
+
 void EmitSveStreamingAttrs(RecordKeeper &Records, raw_ostream &OS) {
   SVEEmitter(Records).createStreamingAttrs(OS, ACLEKind::SVE);
 }
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 42cc704543f18e..dab447ff7d944f 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -75,6 +75,7 @@ enum ActionType {
   GenArmVectorType,
   GenArmNeonSema,
   GenArmNeonTest,
+  GenArmImmCheckTypes,
   GenArmMveHeader,
   GenArmMveBuiltinDef,
   GenArmMveBuiltinSema,
@@ -234,6 +235,9 @@ cl::opt<ActionType> Action(
                    "Generate ARM NEON sema support for clang"),
         clEnumValN(GenArmNeonTest, "gen-arm-neon-test",
                    "Generate ARM NEON tests for clang"),
+        clEnumValN(GenArmImmCheckTypes, "gen-arm-immcheck-types",
+                   "Generate arm_immchecktypes.h (immediate range check types)"
+                   " for clang"),
         clEnumValN(GenArmSveHeader, "gen-arm-sve-header",
                    "Generate arm_sve.h for clang"),
         clEnumValN(GenArmSveBuiltins, "gen-arm-sve-builtins",
@@ -469,6 +473,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenArmNeonTest:
     EmitNeonTest(Records, OS);
     break;
+  case GenArmImmCheckTypes:
+    EmitImmCheckTypes(Records, OS);
+    break;
   case GenArmMveHeader:
     EmitMveHeader(Records, OS);
     break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index 5f2dd257cb90a9..3a424c9c91fe71 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -108,6 +108,7 @@ void EmitNeonSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitVectorTypes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitNeonTest(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 
+void EmitImmCheckTypes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitSveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitSveBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitSveBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
diff --git a/llvm/docs/CommandGuide/tblgen.rst b/llvm/docs/CommandGuide/tblgen.rst
index aa4c8e17864050..92186579e682df 100644
--- a/llvm/docs/CommandGuide/tblgen.rst
+++ b/llvm/docs/CommandGuide/tblgen.rst
@@ -276,6 +276,10 @@ clang-tblgen Options
 
   Generate ARM NEON tests for Clang.
 
+.. option:: -gen-arm-immcheck-types
+
+  Generate ``arm_immcheck_types.inc`` for Clang.
+
 .. option:: -gen-arm-sve-header
 
   Generate ``arm_sve.h`` for Clang.

>From 76bbd0cca741398bfd04ed8550450f617f5d91ea Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Fri, 9 Aug 2024 10:15:27 +0000
Subject: [PATCH 05/13] Refactor immediate range checking change

For each immediate, a NEON instruction must define
  - The index of the immediate argument to be chcked

  - The type of immediate range check to be performed,
    (e.g., ImmCheckShiftRight)

  - The index of the argument whose type defines the context
    of this immediate check (base type, vector size).

	- Note that if this definition generates a polymorphic
	  NEON builtin, the base type defined by this argument is
	  overwritten by that of the type code supplied to the
	  overloaded builtin call. This third argument is omitted
	  in some cases due to this.

- Example
 - vfma_laneq
 (https://developer.arm.com/architectures/instruction-sets/intrinsics/#f:@navigationhierarchiessimdisa=[Neon]&q=vfma_laneq)
	- The immdiate is supplied in argument 3.
	- The immdiate is used as an index into the lanes of argument 2
	- So we must perform an immediate check on argument 3, based on
	  the number of lanes in argument 2.
	- ImmCheck<3, ImmCheckLaneIndex, 2>

- Changes
 - include/clang/Basic/arm_neon.td
	- Fix incorrect immediate range specifiers
	- Replace ImmCheckLaneQIndex occurences with
	  ImmCheckLaneIndex and (vector width is now inferred
	  from type context argument).
 - include/clang/Basic/arm_fp16.td
	- fix immediate range of fcvt instructions
 - include/clang/Basic/arm_immcheck_incl.td
	- Remove ImmCheckLaneQIndex
	- Add ImmCheckCvt
 - include/clang/Basic/TargetBuiltins.h
	- revert renaming of ImmCheckTypes

 - utils/Tablegen/NeonEMitter.cpp
	- Share ImmCheck class with SVEEmitter
	- Move parsing of ImmCheck list to constructor so that
	  TypeContextArg can be understood.
 - utils/TableGen/SVEEmitter.cpp
	- Reflect renaming of 'EltSizeArg' to 'TypeContextArg'.

 - Addition
  - llvm/include/llvm/TableGen/AArch64ImmCheck.h
	- move ImmCheck cpp class so that it can be shared
	  by NeonEmitter and SVEEmitter.
---
 clang/include/clang/Basic/TargetBuiltins.h    |   2 +-
 clang/include/clang/Basic/arm_fp16.td         |  15 +-
 .../include/clang/Basic/arm_immcheck_incl.td  |   6 +-
 clang/include/clang/Basic/arm_neon.td         |  83 +++--
 clang/include/clang/Sema/SemaARM.h            |  15 +-
 clang/lib/Sema/SemaARM.cpp                    | 350 +++++++++---------
 clang/utils/TableGen/NeonEmitter.cpp          |  80 ++--
 clang/utils/TableGen/SveEmitter.cpp           |  21 +-
 llvm/include/llvm/TableGen/AArch64ImmCheck.h  |  37 ++
 9 files changed, 309 insertions(+), 300 deletions(-)
 create mode 100644 llvm/include/llvm/TableGen/AArch64ImmCheck.h

diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 15757e43f4a5e5..a85e070cad0383 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -242,7 +242,7 @@ namespace clang {
   };
 
   // Shared between SVE/SME and NEON
-  enum ArmImmCheckType {
+  enum ImmCheckType {
 #define LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
 #include "clang/Basic/arm_immcheck_types.inc"
 #undef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td
index 42228a3ba1ffad..81d257fc73033e 100644
--- a/clang/include/clang/Basic/arm_fp16.td
+++ b/clang/include/clang/Basic/arm_fp16.td
@@ -81,12 +81,17 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
     def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "(1F<)(1!)I", "iUi">;
     def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "(1F<<)(1!)I", "lUl">;
     def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "(1S)1I", "Sh">;
-    def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "(1S>)1I", "Sh">;
-    def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "(1S>>)1I", "Sh">;
-    def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "(1U)1I", "Sh">;
-    def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "(1U>)1I", "Sh">;
-    def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "(1U>>)1I", "Sh">;
   }
+    def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "(1S>)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
+    def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "(1S>>)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
+    def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "(1U)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
+    def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "(1U>)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
+    def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "(1U>>)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
   // Comparison
   def SCALAR_CMEQRH   : SInst<"vceq", "(1U)11", "Sh">;
   def SCALAR_CMEQZH   : SInst<"vceqz", "(1U)1", "Sh">;
diff --git a/clang/include/clang/Basic/arm_immcheck_incl.td b/clang/include/clang/Basic/arm_immcheck_incl.td
index 3b20248f650400..88440532799839 100644
--- a/clang/include/clang/Basic/arm_immcheck_incl.td
+++ b/clang/include/clang/Basic/arm_immcheck_incl.td
@@ -11,7 +11,7 @@ def ImmCheckShiftRightNarrow    : ImmCheckType<4>;  // 1..sizeinbits(elt)/2
 def ImmCheckShiftLeft           : ImmCheckType<5>;  // 0..(sizeinbits(elt) - 1)
 def ImmCheck0_7                 : ImmCheckType<6>;  // 0..7
 def ImmCheckLaneIndex           : ImmCheckType<7>;  // 0..(128/(1*sizeinbits(elt)) - 1)
-def ImmCheckLaneQIndex          : ImmCheckType<8>;  // (Neon) treat type as Quad
+def ImmCheckCvt                 : ImmCheckType<8>;  // 1..sizeinbits(elt) (same as ShiftRight)
 def ImmCheckLaneIndexCompRotate : ImmCheckType<9>;  // 0..(128/(2*sizeinbits(elt)) - 1)
 def ImmCheckLaneIndexDot        : ImmCheckType<10>; // 0..(128/(4*sizeinbits(elt)) - 1)
 def ImmCheckComplexRot90_270    : ImmCheckType<11>; // [90,270]
@@ -31,9 +31,9 @@ def ImmCheck1_32                : ImmCheckType<24>; // 1..32
 def ImmCheck1_64                : ImmCheckType<25>; // 1..64
 def ImmCheck0_63                : ImmCheckType<26>; // 0..63
 
-class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
+class ImmCheck<int arg, ImmCheckType kind, int typeArg = -1> {
   int Arg = arg;
   // The index of the argument whose type should be referred to when validating this immedaite.
-  int EltSizeArg = eltSizeArg;
+  int TypeContextArg = typeArg;
   ImmCheckType Kind = kind;
 }
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index ee823f6ef68139..c77b428c3e92b5 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -288,14 +288,14 @@ def SPLAT  : WInst<"splat_lane", ".(!q)I",
                     [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
                    "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl", 
-                   [ImmCheck<1, ImmCheckLaneQIndex, 1>]> {
+                   [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
   let isLaneQ = 1;
 }
 let TargetGuard = "bf16,neon" in {
   def SPLAT_BF  : WInst<"splat_lane", ".(!q)I", "bQb", 
                       [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
   def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb", 
-                      [ImmCheck<1, ImmCheckLaneQIndex, 1>]> {
+                      [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
     let isLaneQ = 1;
   }
 }
@@ -421,6 +421,12 @@ def VQSHL_N    : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
                       [ImmCheck<1, ImmCheckShiftLeft>]>;
 def VQSHLU_N   : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl", 
                       [ImmCheck<1, ImmCheckShiftLeft>]>;
+
+// Narrowing right shifts should have an immediate range of 1..(sizeinbits(arg)/2).
+// However, as the overloaded type code that is supplied to a polymorphic builtin
+// is that of the return type (half as wide as the argument in this case), using
+// ImmCheckShiftRightNarrow would return in an upper bound of (sizeinbits(arg)/2)/2.
+// ImmCheckShiftRight produces the correct behavior here.
 def VSHRN_N    : IInst<"vshrn_n", "<QI", "silUsUiUl", 
                       [ImmCheck<1, ImmCheckShiftRight>]>;
 def VQSHRUN_N  : SInst<"vqshrun_n", "(<U)QI", "sil", 
@@ -433,6 +439,12 @@ def VRSHRN_N   : IInst<"vrshrn_n", "<QI", "silUsUiUl",
                       [ImmCheck<1, ImmCheckShiftRight>]>;
 def VQRSHRN_N  : SInst<"vqrshrn_n", "<QI", "silUsUiUl", 
                       [ImmCheck<1, ImmCheckShiftRight>]>;
+
+// Widening left-shifts should have a range of 0..(sizeinbits(arg)-1).
+// This polymorphic builtin is supplied the wider return type as it's overloaded
+// base type, so the range here is actually 0..(sizeinbits(arg)*2).
+// This cannot be rectified currently due to a use of vshll_n_s16 with an
+// out-of-bounds immediate in the defintiion of vcvt_f32_bf16.
 def VSHLL_N    : SInst<"vshll_n", "(>Q).I", "csiUcUsUi", 
                       [ImmCheck<1, ImmCheckShiftLeft>]>;
 
@@ -479,14 +491,14 @@ def VLD1_X2_F16   : WInst<"vld1_x2", "2(c*!)", "hQh">;
 def VLD1_X3_F16   : WInst<"vld1_x3", "3(c*!)", "hQh">;
 def VLD1_X4_F16   : WInst<"vld1_x4", "4(c*!)", "hQh">;
 def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh", 
-                          [ImmCheck<2, ImmCheck0_3, 1>]>;
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def VLD1_DUP_F16  : WInst<"vld1_dup", ".(c*!)", "hQh">;
 def VST1_F16      : WInst<"vst1", "v*(.!)", "hQh">;
 def VST1_X2_F16   : WInst<"vst1_x2", "v*(2!)", "hQh">;
 def VST1_X3_F16   : WInst<"vst1_x3", "v*(3!)", "hQh">;
 def VST1_X4_F16   : WInst<"vst1_x4", "v*(4!)", "hQh">;
 def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh", 
-                          [ImmCheck<2, ImmCheck0_3, 1>]>;
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -523,20 +535,20 @@ def VLD2_DUP_F16  : WInst<"vld2_dup", "2(c*!)", "hQh">;
 def VLD3_DUP_F16  : WInst<"vld3_dup", "3(c*!)", "hQh">;
 def VLD4_DUP_F16  : WInst<"vld4_dup", "4(c*!)", "hQh">;
 def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh", 
-                          [ImmCheck<4, ImmCheck0_3, 1>]>;
+                          [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
 def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh", 
-                          [ImmCheck<5, ImmCheck0_3, 1>]>;
+                          [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh", 
-                          [ImmCheck<6, ImmCheck0_3, 1>]>;
+                          [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
 def VST2_F16      : WInst<"vst2", "v*(2!)", "hQh">;
 def VST3_F16      : WInst<"vst3", "v*(3!)", "hQh">;
 def VST4_F16      : WInst<"vst4", "v*(4!)", "hQh">;
 def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh", 
-                          [ImmCheck<3, ImmCheck0_3, 1>]>;
+                          [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
 def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh", 
-                         [ImmCheck<4, ImmCheck0_3, 1>]>;
+                         [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
 def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh", 
-                          [ImmCheck<5, ImmCheck0_3, 1>]>;
+                          [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -655,9 +667,9 @@ def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "..qI", "siQsQi", OP_QRDMULH_LN>;
 }
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
 def A64_VQDMULH_LANE  : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi", 
-                              [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
+                              [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi", 
-                              [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
+                              [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 }
 
 let TargetGuard = "v8.1a,neon" in {
@@ -1072,9 +1084,9 @@ def VMLS_LANEQ   : IOpInst<"vmls_laneq", "...QI",
 }
 
 def VFMA_LANE    : IInst<"vfma_lane", "...qI", "fdQfQd", 
-                        [ImmCheck<3, ImmCheckLaneIndex, 0>]>;
+                        [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 def VFMA_LANEQ   : IInst<"vfma_laneq", "...QI", "fdQfQd", 
-                        [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
+                        [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
   let isLaneQ = 1;
 }
 def VFMS_LANE    : IOpInst<"vfms_lane", "...qI", "fdQfQd", OP_FMS_LN>;
@@ -1151,9 +1163,9 @@ def VQDMULL_HIGH_LANEQ  : SOpInst<"vqdmull_high_laneq", "(>Q)QQI", "si",
 
 let isLaneQ = 1 in {
 def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi", 
-                          [ImmCheck<2, ImmCheckLaneQIndex, 1>]>;
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi", 
-                          [ImmCheck<2, ImmCheckLaneQIndex, 1>]>;
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 }
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a,neon" in {
 def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN> {
@@ -1425,16 +1437,16 @@ def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl",
 let isScalarNarrowShift = 1 in {
   // Signed/Unsigned Saturating Shift Right Narrow (Immediate)
   def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl", 
-                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+                            [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
   // Signed/Unsigned Saturating Rounded Shift Right Narrow (Immediate)
   def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl", 
-                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+                            [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
   // Signed Saturating Shift Right Unsigned Narrow (Immediate)
   def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<U)1I", "SsSiSl", 
-                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+                            [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
   // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
   def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<U)1I", "SsSiSl", 
-                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+                            [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1660,11 +1672,11 @@ def SCALAR_VMUL_N : IInst<"vmul_n", "..1", "d">;
 
 // VMUL_LANE_A64 d type implemented using scalar mul lane
 def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d", 
-                            [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
+                            [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 
 // VMUL_LANEQ d type implemented using scalar mul lane
 def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d", 
-                              [ImmCheck<2, ImmCheckLaneQIndex, 1>]> {
+                              [ImmCheck<2, ImmCheckLaneIndex, 1>]> {
   let isLaneQ = 1;
 }
 
@@ -1680,7 +1692,7 @@ def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ
 def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd", 
                             [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd", 
-                            [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
+                            [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
   let isLaneQ = 1;
 }
 
@@ -1700,15 +1712,15 @@ def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR
 def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi", 
                                 [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
 def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi", 
-                                [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
+                                [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
   let isLaneQ = 1;
 }
 
 // Signed Saturating Doubling Multiply-Subtract Long (scalar by element)
 def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi", 
-                              [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+                              [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
 def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi", 
-                              [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
+                              [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
   let isLaneQ = 1;
 }
 
@@ -1741,7 +1753,7 @@ def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR
 def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
                             [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
-                            [ImmCheck<1, ImmCheckLaneQIndex, 0>]> {
+                            [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
   let isLaneQ = 1;
 }
 
@@ -1901,7 +1913,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def VFMA_LANEH   : IInst<"vfma_lane", "...qI", "hQh", 
                           [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
   def VFMA_LANEQH  : IInst<"vfma_laneq", "...QI", "hQh", 
-                          [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
+                          [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
     let isLaneQ = 1;
   }
 
@@ -1911,7 +1923,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def SCALAR_FMLA_LANEH  : IInst<"vfma_lane", "111.I", "Sh", 
                                 [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
   def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh", 
-                                [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
+                                [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
     let isLaneQ = 1;
   }
 
@@ -1948,7 +1960,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh", 
                                 [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
   def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh", 
-                                [ImmCheck<2, ImmCheckLaneQIndex, 1>]> {
+                                [ImmCheck<2, ImmCheckLaneIndex, 1>]> {
     let isLaneQ = 1;
   }
 
@@ -1971,7 +1983,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
   def SCALAR_VDUP_LANEH  : IInst<"vdup_lane", "1.I", "Sh", 
                                 [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
   def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh", 
-                                [ImmCheck<1, ImmCheckLaneQIndex, 0>]> {
+                                [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
     let isLaneQ = 1;
   }
 }
@@ -2063,10 +2075,9 @@ multiclass VCMLA_ROTS<string type, string lanety, string laneqty> {
            (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast lanety, $p2), $p3))))>>;
 
     let isLaneQ = 1 in  {
-      // vcmla{ROT}_laneq
-      // ACLE specifies that the fp16 vcmla_#ROT_laneq variant has an immedaite range of 0 <= lane <= 1.
-      // fp16 is the only variant for which these two differ.
-      // https://developer.arm.com/documentation/ihi0073/latest/ 
+      // ACLE specifies that the f16 variant of vcmla_#ROT_laneq has an immediate range 0<=lane<=1,
+      // whereas the f16 variant of vcmlaq_#ROT_laneq has an immediate range 0<=lane<=3.
+      // f16 is the only type for which these two differ.
       defvar getlanety = !if(!eq(type, "h"), lanety, laneqty);
       def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type,  Op<(call "vcmla" # ROT, $p0, $p1,
                 (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast getlanety, $p2), $p3))))>>;
@@ -2126,7 +2137,7 @@ let TargetGuard = "bf16,neon" in {
   def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb", 
                           [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
   def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb", 
-                          [ImmCheck<1, ImmCheckLaneQIndex, 0>]> {
+                          [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
     let isLaneQ = 1;
   }
 
diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index 2f13e60f081c52..c7f4abb822e2f3 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -15,6 +15,7 @@
 
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/Expr.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Sema/SemaBase.h"
 #include "llvm/ADT/SmallVector.h"
@@ -37,18 +38,20 @@ class SemaARM : public SemaBase {
                             /// flags. Do Sema checks for the runtime mode.
   };
 
+  bool CheckImmediateArg(CallExpr *TheCall, unsigned CheckTy, unsigned ArgIdx,
+                         unsigned EltBitWidth, unsigned VecBitWidth);
   bool CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall,
                                     unsigned MaxWidth);
   bool CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
                                     CallExpr *TheCall);
-  bool ParseNeonImmChecks(CallExpr *TheCall,
-                          SmallVector<std::tuple<int, int, int>, 2> &ImmChecks,
-                          int OverloadType);
+  bool
+  ParseNeonImmChecks(CallExpr *TheCall,
+                     SmallVector<std::tuple<int, int, int, int>, 2> &ImmChecks,
+                     int OverloadType);
+  bool ParseSVEImmChecks(CallExpr *TheCall,
+                         SmallVector<std::tuple<int, int, int>, 3> &ImmChecks);
   bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
-  bool
-  ParseSVEImmChecks(CallExpr *TheCall,
-                    llvm::SmallVector<std::tuple<int, int, int>, 3> &ImmChecks);
   bool CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckCDEBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
                                    CallExpr *TheCall);
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index bbcca1c72465ab..f6e00f957a51f4 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -12,7 +12,6 @@
 
 #include "clang/Sema/SemaARM.h"
 #include "clang/Basic/DiagnosticSema.h"
-#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/ParsedAttr.h"
 #include "clang/Sema/Sema.h"
@@ -369,202 +368,183 @@ enum ArmSMEState : unsigned {
   ArmZT0Mask = 0b11 << 2
 };
 
+bool SemaARM::CheckImmediateArg(CallExpr *TheCall, unsigned CheckTy,
+                                unsigned ArgIdx, unsigned EltBitWidth,
+                                unsigned VecBitWidth) {
+
+  typedef bool (*OptionSetCheckFnTy)(int64_t Value);
+
+  // Function that checks whether the operand (ArgIdx) is an immediate
+  // that is one of the predefined values.
+  auto CheckImmediateInSet = [&](OptionSetCheckFnTy CheckImm,
+                                 int ErrDiag) -> bool {
+    // We can't check the value of a dependent argument.
+    Expr *Arg = TheCall->getArg(ArgIdx);
+    if (Arg->isTypeDependent() || Arg->isValueDependent())
+      return false;
+
+    // Check constant-ness first.
+    llvm::APSInt Imm;
+    if (SemaRef.BuiltinConstantArg(TheCall, ArgIdx, Imm))
+      return true;
+
+    if (!CheckImm(Imm.getSExtValue()))
+      return Diag(TheCall->getBeginLoc(), ErrDiag) << Arg->getSourceRange();
+    return false;
+  };
+
+  switch ((ImmCheckType)CheckTy) {
+  case ImmCheckType::ImmCheck0_31:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 31))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_13:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 13))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_63:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 63))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_16:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 16))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_7:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 7))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_1:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_3:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 3))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_7:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 7))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckExtract:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+                                        (2048 / EltBitWidth) - 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckCvt:
+  case ImmCheckType::ImmCheckShiftRight:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, EltBitWidth))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckShiftRightNarrow:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, EltBitWidth / 2))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckShiftLeft:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, EltBitWidth - 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckLaneIndex:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+                                        (VecBitWidth / (1 * EltBitWidth)) - 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckLaneIndexCompRotate:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+                                        (VecBitWidth / (2 * EltBitWidth)) - 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckLaneIndexDot:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+                                        (VecBitWidth / (4 * EltBitWidth)) - 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckComplexRot90_270:
+    if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; },
+                            diag::err_rotation_argument_to_cadd))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckComplexRotAll90:
+    if (CheckImmediateInSet(
+            [](int64_t V) { return V == 0 || V == 90 || V == 180 || V == 270; },
+            diag::err_rotation_argument_to_cmla))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_1:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_2:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 2))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_3:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 3))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_0:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 0))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_15:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 15))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_255:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 255))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_32:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 32))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_64:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 64))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck2_4_Mul2:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 2, 4) ||
+        SemaRef.BuiltinConstantArgMultiple(TheCall, ArgIdx, 2))
+      return true;
+    break;
+  default:
+    llvm_unreachable("Invalid immediate range typeflag!");
+    break;
+  }
+  return false;
+}
+
 bool SemaARM::ParseNeonImmChecks(
-    CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 2> &ImmChecks,
+    CallExpr *TheCall,
+    SmallVector<std::tuple<int, int, int, int>, 2> &ImmChecks,
     int OverloadType = -1) {
-  int ArgIdx, CheckTy, ElementType;
-  bool hasError = false;
+  unsigned CheckTy;
+  unsigned ArgIdx, ElementSizeInBits, VecSizeInBits;
+  bool HasError = false;
 
-  for (auto &I : ImmChecks) {
-    std::tie(ArgIdx, CheckTy, ElementType) = I;
+  for (const auto &I : ImmChecks) {
+    std::tie(ArgIdx, CheckTy, ElementSizeInBits, VecSizeInBits) = I;
 
-    NeonTypeFlags Type = (OverloadType != -1) ? NeonTypeFlags(OverloadType)
-                                              : NeonTypeFlags(ElementType);
+    if (OverloadType >= 0)
+      ElementSizeInBits = NeonTypeFlags(OverloadType).getEltSizeInBits();
 
-    switch ((ArmImmCheckType)CheckTy) {
-    case ArmImmCheckType::ImmCheck0_3:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 3);
-      break;
-    case ArmImmCheckType::ImmCheck0_63:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 63);
-      break;
-    case ArmImmCheckType::ImmCheck0_7:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 7);
-      break;
-    case ArmImmCheckType::ImmCheck1_16:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 16);
-      break;
-    case ArmImmCheckType::ImmCheck1_32:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 32);
-      break;
-    case ArmImmCheckType::ImmCheck1_64:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 64);
-      break;
-    case ArmImmCheckType::ImmCheckLaneIndex:
-      hasError |= SemaRef.BuiltinConstantArgRange(
-          TheCall, ArgIdx, 0,
-          (64 << Type.isQuad()) / Type.getEltSizeInBits() - 1);
-      break;
-    case ArmImmCheckType::ImmCheckLaneQIndex: // force to use quad
-      hasError |= SemaRef.BuiltinConstantArgRange(
-          TheCall, ArgIdx, 0, (128 / Type.getEltSizeInBits()) - 1);
-      break;
-    case ArmImmCheckType::ImmCheckShiftLeft:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
-                                                  Type.getEltSizeInBits() - 1);
-      break;
-    case ArmImmCheckType::ImmCheckShiftRight:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1,
-                                                  Type.getEltSizeInBits());
-      break;
-    default:
-      llvm_unreachable("Invalid Neon immediate range typeflag!");
-      break;
-    }
+    HasError |= CheckImmediateArg(TheCall, CheckTy, ArgIdx, ElementSizeInBits,
+                                  VecSizeInBits);
   }
 
-  return hasError;
+  return HasError;
 }
 
 bool SemaARM::ParseSVEImmChecks(
     CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) {
-  // Perform all the immediate checks for this builtin call.
-  bool HasError = false;
-  for (auto &I : ImmChecks) {
-    int ArgNum, CheckTy, ElementSizeInBits;
-    std::tie(ArgNum, CheckTy, ElementSizeInBits) = I;
-
-    typedef bool (*OptionSetCheckFnTy)(int64_t Value);
-
-    // Function that checks whether the operand (ArgNum) is an immediate
-    // that is one of the predefined values.
-    auto CheckImmediateInSet = [&](OptionSetCheckFnTy CheckImm,
-                                   int ErrDiag) -> bool {
-      // We can't check the value of a dependent argument.
-      Expr *Arg = TheCall->getArg(ArgNum);
-      if (Arg->isTypeDependent() || Arg->isValueDependent())
-        return false;
-
-      // Check constant-ness first.
-      llvm::APSInt Imm;
-      if (SemaRef.BuiltinConstantArg(TheCall, ArgNum, Imm))
-        return true;
 
-      if (!CheckImm(Imm.getSExtValue()))
-        return Diag(TheCall->getBeginLoc(), ErrDiag) << Arg->getSourceRange();
-      return false;
-    };
+  bool HasError = false;
+  unsigned CheckTy, ArgIdx, ElementSizeInBits;
 
-    switch ((ArmImmCheckType)CheckTy) {
-    case ArmImmCheckType::ImmCheck0_31:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 31))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_13:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 13))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck1_16:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 16))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_7:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck1_1:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck1_3:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck1_7:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckExtract:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
-                                          (2048 / ElementSizeInBits) - 1))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckShiftRight:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1,
-                                          ElementSizeInBits))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckShiftRightNarrow:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1,
-                                          ElementSizeInBits / 2))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckShiftLeft:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
-                                          ElementSizeInBits - 1))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckLaneIndex:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
-                                          (128 / (1 * ElementSizeInBits)) - 1))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckLaneIndexCompRotate:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
-                                          (128 / (2 * ElementSizeInBits)) - 1))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckLaneIndexDot:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
-                                          (128 / (4 * ElementSizeInBits)) - 1))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckComplexRot90_270:
-      if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; },
-                              diag::err_rotation_argument_to_cadd))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckComplexRotAll90:
-      if (CheckImmediateInSet(
-              [](int64_t V) {
-                return V == 0 || V == 90 || V == 180 || V == 270;
-              },
-              diag::err_rotation_argument_to_cmla))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_1:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 1))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_2:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 2))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_3:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 3))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_0:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 0))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_15:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 15))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_255:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 255))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck2_4_Mul2:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
-          SemaRef.BuiltinConstantArgMultiple(TheCall, ArgNum, 2))
-        HasError = true;
-      break;
-    default:
-      llvm_unreachable("Invalid SVE immediate range typeflag!");
-      break;
-    }
+  for (const auto &I : ImmChecks) {
+    std::tie(ArgIdx, CheckTy, ElementSizeInBits) = I;
+    HasError |=
+        CheckImmediateArg(TheCall, CheckTy, ArgIdx, ElementSizeInBits, 128);
   }
 
   return HasError;
@@ -826,7 +806,7 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 
   // For NEON intrinsics which take an immediate value as part of the
   // instruction, range check them here.
-  SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
+  SmallVector<std::tuple<int, int, int, int>, 2> ImmChecks;
   switch (BuiltinID) {
   default:
     return false;
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 8dc6312525cf55..c790994665accd 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/AArch64ImmCheck.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/SetTheory.h"
@@ -334,7 +335,7 @@ class Intrinsic {
   /// The types of return value [0] and parameters [1..].
   std::vector<Type> Types;
 
-  SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
+  SmallVector<ImmCheck, 2> ImmChecks;
   /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
   int PolymorphicKeyType;
   /// The local variables defined.
@@ -370,16 +371,14 @@ class Intrinsic {
 
 public:
   Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
-            TypeSpec InTS, ArrayRef<std::tuple<int, int, int>> ImmChecks,
-            ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
+            TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
             StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable,
             bool BigEndianSafe)
-      : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), ImmChecks(ImmChecks),
-        CK(CK), Body(Body), ArchGuard(ArchGuard.str()),
-        TargetGuard(TargetGuard.str()), IsUnavailable(IsUnavailable),
-        BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false),
-        UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."),
-        Emitter(Emitter) {
+      : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body),
+        ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()),
+        IsUnavailable(IsUnavailable), BigEndianSafe(BigEndianSafe),
+        PolymorphicKeyType(0), NeededEarly(false), UseMacro(false),
+        BaseType(OutTS, "."), InBaseType(InTS, "."), Emitter(Emitter) {
     // Modify the TypeSpec per-argument to get a concrete Type, and create
     // known variables for each.
     // Types[0] is the return value.
@@ -408,6 +407,26 @@ class Intrinsic {
           (Type.isScalar() && Type.isHalf()))
         UseMacro = true;
     }
+
+    int ArgIdx, Kind, TypeArgIdx;
+    std::vector<Record *> ImmCheckList = R->getValueAsListOfDefs("ImmChecks");
+    for (const auto *I : ImmCheckList) {
+      unsigned EltSizeInBits = 0, VecSizeInBits = 0;
+
+      ArgIdx = I->getValueAsInt("Arg");
+      TypeArgIdx = I->getValueAsInt("TypeContextArg");
+      Kind = I->getValueAsDef("Kind")->getValueAsInt("Value");
+
+      assert((ArgIdx >= 0 && Kind >= 0) && "Arg and Kind must be nonnegative");
+
+      if (TypeArgIdx >= 0) {
+        EltSizeInBits = getParamType(TypeArgIdx).getElementSizeInBits();
+        VecSizeInBits = getParamType(TypeArgIdx).getSizeInBits();
+      }
+
+      ImmChecks.emplace_back(
+          ImmCheck(ArgIdx, Kind, EltSizeInBits, VecSizeInBits));
+    }
   }
 
   /// Get the Record that this intrinsic is based off.
@@ -419,14 +438,13 @@ class Intrinsic {
   /// Get the architectural guard string (#ifdef).
   std::string getArchGuard() const { return ArchGuard; }
   std::string getTargetGuard() const { return TargetGuard; }
-  ArrayRef<std::tuple<int, int, int>> getImmChecks() const { return ImmChecks; }
+  ArrayRef<ImmCheck> getImmChecks() const { return ImmChecks; }
   /// Get the non-mangled name.
   std::string getName() const { return Name; }
 
   /// Return true if the intrinsic takes an immediate operand.
   bool hasImmediate() const {
     return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); });
-    // return !ImmChecks.empty();
   }
 
   // Return if the supplied argument is an immediate
@@ -558,7 +576,6 @@ class NeonEmitter {
                                      SmallVectorImpl<Intrinsic *> &Defs);
   void genOverloadTypeCheckCode(raw_ostream &OS,
                                 SmallVectorImpl<Intrinsic *> &Defs);
-  void genNeonImmCheckTypes(raw_ostream &OS);
   void genIntrinsicRangeCheckCode(raw_ostream &OS,
                                   SmallVectorImpl<Intrinsic *> &Defs);
 
@@ -1957,16 +1974,6 @@ void NeonEmitter::createIntrinsic(Record *R,
   bool BigEndianSafe  = R->getValueAsBit("BigEndianSafe");
   std::string ArchGuard = std::string(R->getValueAsString("ArchGuard"));
   std::string TargetGuard = std::string(R->getValueAsString("TargetGuard"));
-  std::vector<Record *> ImmCheckList = R->getValueAsListOfDefs("ImmChecks");
-
-  SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
-  for (const auto *R : ImmCheckList) {
-
-    ImmChecks.push_back(
-        std::make_tuple(R->getValueAsInt("Arg"),
-                        R->getValueAsDef("Kind")->getValueAsInt("Value"),
-                        R->getValueAsInt("EltSizeArg")));
-  }
 
   bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
   std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith"));
@@ -2008,9 +2015,8 @@ void NeonEmitter::createIntrinsic(Record *R,
   auto &Entry = IntrinsicMap[Name];
 
   for (auto &I : NewTypeSpecs) {
-    Entry.emplace_back(R, Name, Proto, I.first, I.second, ImmChecks, CK, Body,
-                       *this, ArchGuard, TargetGuard, IsUnavailable,
-                       BigEndianSafe);
+    Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this,
+                       ArchGuard, TargetGuard, IsUnavailable, BigEndianSafe);
     Out.push_back(&Entry.back());
   }
 
@@ -2159,21 +2165,9 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
   OS << "#endif\n\n";
 }
 
-void NeonEmitter::genNeonImmCheckTypes(raw_ostream &OS) {
-  OS << "#ifdef GET_NEON_IMMCHECKTYPES\n";
-
-  for (auto *RV : Records.getAllDerivedDefinitions("ImmCheckType")) {
-    OS << "  " << RV->getNameInitAsString() << " = "
-       << RV->getValueAsInt("Value") << ",\n";
-  }
-
-  OS << "#endif\n\n";
-}
-
 void NeonEmitter::genIntrinsicRangeCheckCode(
     raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
-  int EltType;
   // Ensure these are only emitted once.
   std::set<std::string> Emitted;
 
@@ -2187,15 +2181,11 @@ void NeonEmitter::genIntrinsicRangeCheckCode(
       continue;
 
     OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
-
     for (const auto &Check : Def->getImmChecks()) {
-      EltType = std::get<2>(Check); // elt type argument
-      if (EltType >= 0)
-        EltType = Def->getParamType(EltType).getNeonEnum();
-
-      OS << "  ImmChecks.push_back(std::make_tuple(" << std::get<0>(Check)
-         << ", " << std::get<1>(Check) << ", " << EltType << ")); \n";
-      OS << "  break;\n";
+      OS << " ImmChecks.push_back(std::make_tuple(" << Check.getArg() << ", "
+         << Check.getKind() << ", " << Check.getElementSizeInBits() << ", "
+         << Check.getVecSizeInBits() << "));\n"
+         << " break;\n";
     }
     Emitted.insert(Def->getMangledName());
   }
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 897fd24bd75cc0..3a667e10bd8d72 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -27,6 +27,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/TableGen/AArch64ImmCheck.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include <array>
@@ -49,23 +50,6 @@ enum class ACLEKind { SVE, SME };
 using TypeSpec = std::string;
 
 namespace {
-
-class ImmCheck {
-  unsigned Arg;
-  unsigned Kind;
-  unsigned ElementSizeInBits;
-
-public:
-  ImmCheck(unsigned Arg, unsigned Kind, unsigned ElementSizeInBits = 0)
-      : Arg(Arg), Kind(Kind), ElementSizeInBits(ElementSizeInBits) {}
-  ImmCheck(const ImmCheck &Other) = default;
-  ~ImmCheck() = default;
-
-  unsigned getArg() const { return Arg; }
-  unsigned getKind() const { return Kind; }
-  unsigned getElementSizeInBits() const { return ElementSizeInBits; }
-};
-
 class SVEType {
   bool Float, Signed, Immediate, Void, Constant, Pointer, BFloat;
   bool DefaultType, IsScalable, Predicate, PredicatePattern, PrefetchOp,
@@ -433,7 +417,6 @@ const std::array<SVEEmitter::ReinterpretTypeInfo, 12> SVEEmitter::Reinterprets =
 
 } // end anonymous namespace
 
-
 //===----------------------------------------------------------------------===//
 // Type implementation
 //===----------------------------------------------------------------------===//
@@ -1214,7 +1197,7 @@ void SVEEmitter::createIntrinsic(
     SmallVector<ImmCheck, 2> ImmChecks;
     for (auto *R : ImmCheckList) {
       int64_t Arg = R->getValueAsInt("Arg");
-      int64_t EltSizeArg = R->getValueAsInt("EltSizeArg");
+      int64_t EltSizeArg = R->getValueAsInt("TypeContextArg");
       int64_t Kind = R->getValueAsDef("Kind")->getValueAsInt("Value");
       assert(Arg >= 0 && Kind >= 0 && "Arg and Kind must be nonnegative");
 
diff --git a/llvm/include/llvm/TableGen/AArch64ImmCheck.h b/llvm/include/llvm/TableGen/AArch64ImmCheck.h
new file mode 100644
index 00000000000000..ff8c4a1865ea34
--- /dev/null
+++ b/llvm/include/llvm/TableGen/AArch64ImmCheck.h
@@ -0,0 +1,37 @@
+//===----- AArch64ImmCheck.h -- ARM immediate range check -----*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the ImmCheck class which supports the range-checking of
+/// immediate values supplied to AArch64 SVE/SME and NEON intrinsics.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef AARCH64_IMMCHECK_H
+#define AARCH64_IMMCHECK_H
+
+class ImmCheck {
+  unsigned Arg;
+  unsigned Kind;
+  unsigned ElementSizeInBits;
+  unsigned VecSizeInBits;
+
+public:
+  ImmCheck(unsigned Arg, unsigned Kind, unsigned ElementSizeInBits = 0,
+           unsigned VecSizeInBits = 128)
+      : Arg(Arg), Kind(Kind), ElementSizeInBits(ElementSizeInBits),
+        VecSizeInBits(VecSizeInBits) {}
+  ImmCheck(const ImmCheck &Other) = default;
+  ~ImmCheck() = default;
+
+  unsigned getArg() const { return Arg; }
+  unsigned getKind() const { return Kind; }
+  unsigned getElementSizeInBits() const { return ElementSizeInBits; }
+  unsigned getVecSizeInBits() const { return VecSizeInBits; }
+};
+
+#endif

>From 28ca4eb4bbe04448ef67d33b7dd13d4dec6a3e69 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Fri, 9 Aug 2024 10:40:30 +0000
Subject: [PATCH 06/13] Add NEON intrinsic immediate range tests

- Create ../clang/test/Sema/aarch64-neon-immediate-ranges/
	- including immediate range checking tests for all intrinsics
	  that take an immediate argument and aren't tested elsewhere.
	- A note has been made where tests have been excluded to avoid
	  duplication, and where these existing tests have been found.
	- A note has been made about the exclusion of widening left-shifts
	  from these tests.
	- These tests were created with the help of this tool (written by
	  the author of this commit), https://github.com/SpencerAbson/imm-test-gen
		- Although they are generated on a per-file basis, care has been
		  taken to ensure that the tests are correct and no test
		  duplication is introduced.
---
 .../aarch64-neon-immediate-ranges/bfloat16.c  |  204 ++++
 .../conversions.c                             |  144 +++
 .../copy-vector-lane.c                        |  498 ++++++++
 .../aarch64-neon-immediate-ranges/dotprod.c   |   29 +
 .../extract-elt-from-vector.c                 |  301 +++++
 .../extract-vector-from-vectors.c             |  134 ++
 .../fp16-scalar.c                             |   24 +
 .../aarch64-neon-immediate-ranges/fp16-v84.c  |   89 ++
 .../fp16-vector.c                             |  181 +++
 .../fused-multiply-accumulate.c               |  115 ++
 .../matrix-multiplication.c                   |   50 +
 .../multiply-extended.c                       |   69 ++
 .../saturating-multiply-accumulate.c          |   91 ++
 .../saturating-multiply-by-scalar-and-widen.c |  136 +++
 .../set-lanes-to-value.c                      |  277 +++++
 .../set-vector-lane.c                         |  162 +++
 .../sqrdmlah-ranges.c                         |   94 ++
 .../vector-load.c                             |  525 ++++++++
 .../vector-multiply-accumulate-by-scalar.c    |  161 +++
 .../vector-multiply-by-scalar-and-widen.c     |   78 ++
 .../vector-multiply-by-scalar.c               |  158 +++
 .../vector-multiply-subtract-by-scalar.c      |  161 +++
 .../vector-shift-left.c                       |  542 +++++++++
 .../vector-shift-right.c                      | 1083 +++++++++++++++++
 .../vector-store.c                            |  448 +++++++
 25 files changed, 5754 insertions(+)
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/bfloat16.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/conversions.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/copy-vector-lane.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/dotprod.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/extract-elt-from-vector.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/extract-vector-from-vectors.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/fp16-scalar.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/fp16-v84.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/fp16-vector.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/fused-multiply-accumulate.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/matrix-multiplication.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/multiply-extended.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-accumulate.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-by-scalar-and-widen.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/set-lanes-to-value.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/set-vector-lane.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/sqrdmlah-ranges.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-load.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-accumulate-by-scalar.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar-and-widen.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-subtract-by-scalar.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-left.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-right.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-store.c

diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/bfloat16.c b/clang/test/Sema/aarch64-neon-immediate-ranges/bfloat16.c
new file mode 100644
index 00000000000000..bde8efc260f172
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/bfloat16.c
@@ -0,0 +1,204 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -target-feature +bf16 -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+#include <arm_bf16.h>
+// REQUIRES: aarch64-registered-target
+
+// clang/test/Sema/aarch64-neon-bf16-ranges.c includes tests for:
+// vcopy_lane_bf16, vcopyq_lane_bf16, vcopy_laneq_bf16, vcopyq_laneq_bf16
+
+void test_set_all_lanes_to_the_same_value_bf16(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4) {
+	vdup_lane_bf16(arg_b16x4, 0);
+	vdup_lane_bf16(arg_b16x4, 3);
+	vdup_lane_bf16(arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_bf16(arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_bf16(arg_b16x4, 0);
+	vdupq_lane_bf16(arg_b16x4, 3);
+	vdupq_lane_bf16(arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_bf16(arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_bf16(arg_b16x8, 0);
+	vdup_laneq_bf16(arg_b16x8, 7);
+	vdup_laneq_bf16(arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_bf16(arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_bf16(arg_b16x8, 0);
+	vdupq_laneq_bf16(arg_b16x8, 7);
+	vdupq_laneq_bf16(arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_bf16(arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vduph_lane_bf16(arg_b16x4, 0);
+	vduph_lane_bf16(arg_b16x4, 3);
+	vduph_lane_bf16(arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_lane_bf16(arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vduph_laneq_bf16(arg_b16x8, 0);
+	vduph_laneq_bf16(arg_b16x8, 7);
+	vduph_laneq_bf16(arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_laneq_bf16(arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_split_vectors_bf16(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4) {
+	vget_lane_bf16(arg_b16x4, 0);
+	vget_lane_bf16(arg_b16x4, 3);
+	vget_lane_bf16(arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_bf16(arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_bf16(arg_b16x8, 0);
+	vgetq_lane_bf16(arg_b16x8, 7);
+	vgetq_lane_bf16(arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_bf16(arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_bf16(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4, bfloat16_t arg_b16) {
+	vset_lane_bf16(arg_b16, arg_b16x4, 0);
+	vset_lane_bf16(arg_b16, arg_b16x4, 3);
+	vset_lane_bf16(arg_b16, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_bf16(arg_b16, arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_bf16(arg_b16, arg_b16x8, 0);
+	vsetq_lane_bf16(arg_b16, arg_b16x8, 7);
+	vsetq_lane_bf16(arg_b16, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_bf16(arg_b16, arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_load_bf16(bfloat16x4_t arg_b16x4, bfloat16x4x4_t arg_b16x4x4, bfloat16x8x4_t arg_b16x8x4,
+					bfloat16x4x3_t arg_b16x4x3, bfloat16x8_t arg_b16x8, bfloat16_t* arg_b16_ptr,
+					bfloat16x8x3_t arg_b16x8x3, bfloat16x4x2_t arg_b16x4x2, bfloat16x8x2_t arg_b16x8x2) {
+	vld1_lane_bf16(arg_b16_ptr, arg_b16x4, 0);
+	vld1_lane_bf16(arg_b16_ptr, arg_b16x4, 3);
+	vld1_lane_bf16(arg_b16_ptr, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_bf16(arg_b16_ptr, arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_bf16(arg_b16_ptr, arg_b16x8, 0);
+	vld1q_lane_bf16(arg_b16_ptr, arg_b16x8, 7);
+	vld1q_lane_bf16(arg_b16_ptr, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_bf16(arg_b16_ptr, arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 0);
+	vld2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 3);
+	vld2_lane_bf16(arg_b16_ptr, arg_b16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 0);
+	vld2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 7);
+	vld2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 0);
+	vld3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 3);
+	vld3_lane_bf16(arg_b16_ptr, arg_b16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 0);
+	vld3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 7);
+	vld3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 0);
+	vld4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 3);
+	vld4_lane_bf16(arg_b16_ptr, arg_b16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 0);
+	vld4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 7);
+	vld4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_bf16(bfloat16x4_t arg_b16x4, bfloat16x4x4_t arg_b16x4x4, bfloat16x8x4_t arg_b16x8x4,
+					 bfloat16x4x3_t arg_b16x4x3, bfloat16x8_t arg_b16x8, bfloat16_t* arg_b16_ptr,
+					 bfloat16x8x3_t arg_b16x8x3, bfloat16x4x2_t arg_b16x4x2, bfloat16x8x2_t arg_b16x8x2) {
+	vst1_lane_bf16(arg_b16_ptr, arg_b16x4, 0);
+	vst1_lane_bf16(arg_b16_ptr, arg_b16x4, 3);
+	vst1_lane_bf16(arg_b16_ptr, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_bf16(arg_b16_ptr, arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_bf16(arg_b16_ptr, arg_b16x8, 0);
+	vst1q_lane_bf16(arg_b16_ptr, arg_b16x8, 7);
+	vst1q_lane_bf16(arg_b16_ptr, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_bf16(arg_b16_ptr, arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 0);
+	vst2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 3);
+	vst2_lane_bf16(arg_b16_ptr, arg_b16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 0);
+	vst2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 7);
+	vst2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 0);
+	vst3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 3);
+	vst3_lane_bf16(arg_b16_ptr, arg_b16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 0);
+	vst3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 7);
+	vst3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 0);
+	vst4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 3);
+	vst4_lane_bf16(arg_b16_ptr, arg_b16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 0);
+	vst4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 7);
+	vst4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_dot_product_f32(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4, float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vbfdot_lane_f32(arg_f32x2, arg_b16x4, arg_b16x4, 0);
+	vbfdot_lane_f32(arg_f32x2, arg_b16x4, arg_b16x4, 1);
+	vbfdot_lane_f32(arg_f32x2, arg_b16x4, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vbfdot_lane_f32(arg_f32x2, arg_b16x4, arg_b16x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vbfdotq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 0);
+	vbfdotq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 3);
+	vbfdotq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vbfdotq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vbfdot_laneq_f32(arg_f32x2, arg_b16x4, arg_b16x8, 0);
+	vbfdot_laneq_f32(arg_f32x2, arg_b16x4, arg_b16x8, 3);
+	vbfdot_laneq_f32(arg_f32x2, arg_b16x4, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vbfdot_laneq_f32(arg_f32x2, arg_b16x4, arg_b16x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vbfdotq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 0);
+	vbfdotq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 1);
+	vbfdotq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vbfdotq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_accumulate_by_scalar_f32(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4, float32x4_t arg_f32x4) {
+	vbfmlalbq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 0);
+	vbfmlalbq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 3);
+	vbfmlalbq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vbfmlalbq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vbfmlalbq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 0);
+	vbfmlalbq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 7);
+	vbfmlalbq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vbfmlalbq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vbfmlaltq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 0);
+	vbfmlaltq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 3);
+	vbfmlaltq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vbfmlaltq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vbfmlaltq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 0);
+	vbfmlaltq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 7);
+	vbfmlaltq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vbfmlaltq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/conversions.c b/clang/test/Sema/aarch64-neon-immediate-ranges/conversions.c
new file mode 100644
index 00000000000000..30ae7f73924221
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/conversions.c
@@ -0,0 +1,144 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_conversions_f32(float32_t arg_f32, float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vcvt_n_s32_f32(arg_f32x2, 1);
+	vcvt_n_s32_f32(arg_f32x2, 32);
+	vcvt_n_s32_f32(arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_s32_f32(arg_f32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_s32_f32(arg_f32x4, 1);
+	vcvtq_n_s32_f32(arg_f32x4, 32);
+	vcvtq_n_s32_f32(arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_s32_f32(arg_f32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvt_n_u32_f32(arg_f32x2, 1);
+	vcvt_n_u32_f32(arg_f32x2, 32);
+	vcvt_n_u32_f32(arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_u32_f32(arg_f32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_u32_f32(arg_f32x4, 1);
+	vcvtq_n_u32_f32(arg_f32x4, 32);
+	vcvtq_n_u32_f32(arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_u32_f32(arg_f32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvts_n_s32_f32(arg_f32, 1);
+	vcvts_n_s32_f32(arg_f32, 32);
+	vcvts_n_s32_f32(arg_f32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvts_n_s32_f32(arg_f32, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvts_n_u32_f32(arg_f32, 1);
+	vcvts_n_u32_f32(arg_f32, 32);
+	vcvts_n_u32_f32(arg_f32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvts_n_u32_f32(arg_f32, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_f64(float64x1_t arg_f64x1, float64x2_t arg_f64x2, float64_t arg_f64) {
+	vcvt_n_s64_f64(arg_f64x1, 1);
+	vcvt_n_s64_f64(arg_f64x1, 64);
+	vcvt_n_s64_f64(arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_s64_f64(arg_f64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_s64_f64(arg_f64x2, 1);
+	vcvtq_n_s64_f64(arg_f64x2, 64);
+	vcvtq_n_s64_f64(arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_s64_f64(arg_f64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvt_n_u64_f64(arg_f64x1, 1);
+	vcvt_n_u64_f64(arg_f64x1, 64);
+	vcvt_n_u64_f64(arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_u64_f64(arg_f64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_u64_f64(arg_f64x2, 1);
+	vcvtq_n_u64_f64(arg_f64x2, 64);
+	vcvtq_n_u64_f64(arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_u64_f64(arg_f64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtd_n_s64_f64(arg_f64, 1);
+	vcvtd_n_s64_f64(arg_f64, 64);
+	vcvtd_n_s64_f64(arg_f64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtd_n_s64_f64(arg_f64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtd_n_u64_f64(arg_f64, 1);
+	vcvtd_n_u64_f64(arg_f64, 64);
+	vcvtd_n_u64_f64(arg_f64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtd_n_u64_f64(arg_f64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_s32(int32_t arg_i32, int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vcvt_n_f32_s32(arg_i32x2, 1);
+	vcvt_n_f32_s32(arg_i32x2, 32);
+	vcvt_n_f32_s32(arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f32_s32(arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_f32_s32(arg_i32x4, 1);
+	vcvtq_n_f32_s32(arg_i32x4, 32);
+	vcvtq_n_f32_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_f32_s32(arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvts_n_f32_s32(arg_i32, 1);
+	vcvts_n_f32_s32(arg_i32, 32);
+	vcvts_n_f32_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvts_n_f32_s32(arg_i32, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_u32(uint32x4_t arg_u32x4, uint32x2_t arg_u32x2, uint32_t arg_u32) {
+	vcvt_n_f32_u32(arg_u32x2, 1);
+	vcvt_n_f32_u32(arg_u32x2, 32);
+	vcvt_n_f32_u32(arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f32_u32(arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_f32_u32(arg_u32x4, 1);
+	vcvtq_n_f32_u32(arg_u32x4, 32);
+	vcvtq_n_f32_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_f32_u32(arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvts_n_f32_u32(arg_u32, 1);
+	vcvts_n_f32_u32(arg_u32, 32);
+	vcvts_n_f32_u32(arg_u32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvts_n_f32_u32(arg_u32, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_s64(int64x2_t arg_i64x2, int64x1_t arg_i64x1, int64_t arg_i64) {
+	vcvt_n_f64_s64(arg_i64x1, 1);
+	vcvt_n_f64_s64(arg_i64x1, 64);
+	vcvt_n_f64_s64(arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f64_s64(arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_f64_s64(arg_i64x2, 1);
+	vcvtq_n_f64_s64(arg_i64x2, 64);
+	vcvtq_n_f64_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_f64_s64(arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtd_n_f64_s64(arg_i64, 1);
+	vcvtd_n_f64_s64(arg_i64, 64);
+	vcvtd_n_f64_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtd_n_f64_s64(arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vcvt_n_f64_u64(arg_u64x1, 1);
+	vcvt_n_f64_u64(arg_u64x1, 64);
+	vcvt_n_f64_u64(arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f64_u64(arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_f64_u64(arg_u64x2, 1);
+	vcvtq_n_f64_u64(arg_u64x2, 64);
+	vcvtq_n_f64_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_f64_u64(arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtd_n_f64_u64(arg_u64, 1);
+	vcvtd_n_f64_u64(arg_u64, 64);
+	vcvtd_n_f64_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtd_n_f64_u64(arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/copy-vector-lane.c b/clang/test/Sema/aarch64-neon-immediate-ranges/copy-vector-lane.c
new file mode 100644
index 00000000000000..aafd36d1ccfe62
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/copy-vector-lane.c
@@ -0,0 +1,498 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_copy_vector_lane_s8(int8x16_t arg_i8x16, int8x8_t arg_i8x8) {
+	vcopy_lane_s8(arg_i8x8, 0, arg_i8x8, 0);
+
+	vcopy_lane_s8(arg_i8x8, 7, arg_i8x8, 0);
+	vcopy_lane_s8(arg_i8x8, -1, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s8(arg_i8x8, 8, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_s8(arg_i8x8, 0, arg_i8x8, 7);
+	vcopy_lane_s8(arg_i8x8, 0, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s8(arg_i8x8, 0, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s8(arg_i8x16, 0, arg_i8x8, 0);
+	vcopyq_lane_s8(arg_i8x16, 15, arg_i8x8, 0);
+	vcopyq_lane_s8(arg_i8x16, -1, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s8(arg_i8x16, 16, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s8(arg_i8x16, 0, arg_i8x8, 7);
+	vcopyq_lane_s8(arg_i8x16, 0, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s8(arg_i8x16, 0, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s8(arg_i8x8, 0, arg_i8x16, 0);
+	vcopy_laneq_s8(arg_i8x8, 7, arg_i8x16, 0);
+	vcopy_laneq_s8(arg_i8x8, -1, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s8(arg_i8x8, 8, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s8(arg_i8x8, 0, arg_i8x16, 15);
+	vcopy_laneq_s8(arg_i8x8, 0, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s8(arg_i8x8, 0, arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s8(arg_i8x16, 0, arg_i8x16, 0);
+	vcopyq_laneq_s8(arg_i8x16, 15, arg_i8x16, 0);
+	vcopyq_laneq_s8(arg_i8x16, -1, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s8(arg_i8x16, 16, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s8(arg_i8x16, 0, arg_i8x16, 15);
+	vcopyq_laneq_s8(arg_i8x16, 0, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s8(arg_i8x16, 0, arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vcopy_lane_s16(arg_i16x4, 0, arg_i16x4, 0);
+	vcopy_lane_s16(arg_i16x4, 3, arg_i16x4, 0);
+	vcopy_lane_s16(arg_i16x4, -1, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s16(arg_i16x4, 4, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_s16(arg_i16x4, 0, arg_i16x4, 3);
+	vcopy_lane_s16(arg_i16x4, 0, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s16(arg_i16x4, 0, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s16(arg_i16x8, 0, arg_i16x4, 0);
+	vcopyq_lane_s16(arg_i16x8, 7, arg_i16x4, 0);
+	vcopyq_lane_s16(arg_i16x8, -1, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s16(arg_i16x8, 8, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s16(arg_i16x8, 0, arg_i16x4, 3);
+	vcopyq_lane_s16(arg_i16x8, 0, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s16(arg_i16x8, 0, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s16(arg_i16x4, 0, arg_i16x8, 0);
+	vcopy_laneq_s16(arg_i16x4, 3, arg_i16x8, 0);
+	vcopy_laneq_s16(arg_i16x4, -1, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s16(arg_i16x4, 4, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s16(arg_i16x4, 0, arg_i16x8, 7);
+	vcopy_laneq_s16(arg_i16x4, 0, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s16(arg_i16x4, 0, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s16(arg_i16x8, 0, arg_i16x8, 0);
+	vcopyq_laneq_s16(arg_i16x8, 7, arg_i16x8, 0);
+	vcopyq_laneq_s16(arg_i16x8, -1, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s16(arg_i16x8, 8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s16(arg_i16x8, 0, arg_i16x8, 7);
+	vcopyq_laneq_s16(arg_i16x8, 0, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s16(arg_i16x8, 0, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vcopy_lane_s32(arg_i32x2, 0, arg_i32x2, 0);
+	vcopy_lane_s32(arg_i32x2, 1, arg_i32x2, 0);
+	vcopy_lane_s32(arg_i32x2, -1, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s32(arg_i32x2, 2, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_s32(arg_i32x2, 0, arg_i32x2, 1);
+	vcopy_lane_s32(arg_i32x2, 0, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s32(arg_i32x2, 0, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s32(arg_i32x4, 0, arg_i32x2, 0);
+	vcopyq_lane_s32(arg_i32x4, 3, arg_i32x2, 0);
+	vcopyq_lane_s32(arg_i32x4, -1, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s32(arg_i32x4, 4, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s32(arg_i32x4, 0, arg_i32x2, 1);
+	vcopyq_lane_s32(arg_i32x4, 0, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s32(arg_i32x4, 0, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s32(arg_i32x2, 0, arg_i32x4, 0);
+	vcopy_laneq_s32(arg_i32x2, 1, arg_i32x4, 0);
+	vcopy_laneq_s32(arg_i32x2, -1, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s32(arg_i32x2, 2, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s32(arg_i32x2, 0, arg_i32x4, 3);
+	vcopy_laneq_s32(arg_i32x2, 0, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s32(arg_i32x2, 0, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s32(arg_i32x4, 0, arg_i32x4, 0);
+	vcopyq_laneq_s32(arg_i32x4, 3, arg_i32x4, 0);
+	vcopyq_laneq_s32(arg_i32x4, -1, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s32(arg_i32x4, 4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s32(arg_i32x4, 0, arg_i32x4, 3);
+	vcopyq_laneq_s32(arg_i32x4, 0, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s32(arg_i32x4, 0, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_s64(int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vcopy_lane_s64(arg_i64x1, 0, arg_i64x1, 0);
+	vcopy_lane_s64(arg_i64x1, -1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s64(arg_i64x1, 1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s64(arg_i64x1, 0, arg_i64x1, 0);
+	vcopy_lane_s64(arg_i64x1, 0, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s64(arg_i64x1, 0, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s64(arg_i64x2, 0, arg_i64x1, 0);
+	vcopyq_lane_s64(arg_i64x2, 1, arg_i64x1, 0);
+	vcopyq_lane_s64(arg_i64x2, -1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s64(arg_i64x2, 2, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s64(arg_i64x2, 0, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s64(arg_i64x2, 0, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s64(arg_i64x1, 0, arg_i64x2, 0);
+	vcopy_laneq_s64(arg_i64x1, -1, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s64(arg_i64x1, 1, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s64(arg_i64x1, 0, arg_i64x2, 1);
+	vcopy_laneq_s64(arg_i64x1, 0, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s64(arg_i64x1, 0, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s64(arg_i64x2, 0, arg_i64x2, 0);
+	vcopyq_laneq_s64(arg_i64x2, 1, arg_i64x2, 0);
+	vcopyq_laneq_s64(arg_i64x2, -1, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s64(arg_i64x2, 2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s64(arg_i64x2, 0, arg_i64x2, 1);
+	vcopyq_laneq_s64(arg_i64x2, 0, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s64(arg_i64x2, 0, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vcopy_lane_u8(arg_u8x8, 0, arg_u8x8, 0);
+	vcopy_lane_u8(arg_u8x8, 7, arg_u8x8, 0);
+	vcopy_lane_u8(arg_u8x8, -1, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u8(arg_u8x8, 8, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_u8(arg_u8x8, 0, arg_u8x8, 7);
+	vcopy_lane_u8(arg_u8x8, 0, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u8(arg_u8x8, 0, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u8(arg_u8x16, 0, arg_u8x8, 0);
+	vcopyq_lane_u8(arg_u8x16, 15, arg_u8x8, 0);
+	vcopyq_lane_u8(arg_u8x16, -1, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u8(arg_u8x16, 16, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u8(arg_u8x16, 0, arg_u8x8, 7);
+	vcopyq_lane_u8(arg_u8x16, 0, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u8(arg_u8x16, 0, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u8(arg_u8x8, 0, arg_u8x16, 0);
+	vcopy_laneq_u8(arg_u8x8, 7, arg_u8x16, 0);
+	vcopy_laneq_u8(arg_u8x8, -1, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u8(arg_u8x8, 8, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u8(arg_u8x8, 0, arg_u8x16, 15);
+	vcopy_laneq_u8(arg_u8x8, 0, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u8(arg_u8x8, 0, arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u8(arg_u8x16, 0, arg_u8x16, 0);
+	vcopyq_laneq_u8(arg_u8x16, 15, arg_u8x16, 0);
+	vcopyq_laneq_u8(arg_u8x16, -1, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u8(arg_u8x16, 16, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u8(arg_u8x16, 0, arg_u8x16, 15);
+	vcopyq_laneq_u8(arg_u8x16, 0, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u8(arg_u8x16, 0, arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vcopy_lane_u16(arg_u16x4, 0, arg_u16x4, 0);
+	vcopy_lane_u16(arg_u16x4, 3, arg_u16x4, 0);
+	vcopy_lane_u16(arg_u16x4, -1, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u16(arg_u16x4, 4, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_u16(arg_u16x4, 0, arg_u16x4, 3);
+	vcopy_lane_u16(arg_u16x4, 0, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u16(arg_u16x4, 0, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u16(arg_u16x8, 0, arg_u16x4, 0);
+	vcopyq_lane_u16(arg_u16x8, 7, arg_u16x4, 0);
+	vcopyq_lane_u16(arg_u16x8, -1, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u16(arg_u16x8, 8, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u16(arg_u16x8, 0, arg_u16x4, 3);
+	vcopyq_lane_u16(arg_u16x8, 0, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u16(arg_u16x8, 0, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u16(arg_u16x4, 0, arg_u16x8, 0);
+	vcopy_laneq_u16(arg_u16x4, 3, arg_u16x8, 0);
+	vcopy_laneq_u16(arg_u16x4, -1, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u16(arg_u16x4, 4, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u16(arg_u16x4, 0, arg_u16x8, 7);
+	vcopy_laneq_u16(arg_u16x4, 0, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u16(arg_u16x4, 0, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u16(arg_u16x8, 0, arg_u16x8, 0);
+	vcopyq_laneq_u16(arg_u16x8, 7, arg_u16x8, 0);
+	vcopyq_laneq_u16(arg_u16x8, -1, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u16(arg_u16x8, 8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u16(arg_u16x8, 0, arg_u16x8, 7);
+	vcopyq_laneq_u16(arg_u16x8, 0, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u16(arg_u16x8, 0, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vcopy_lane_u32(arg_u32x2, 0, arg_u32x2, 0);
+	vcopy_lane_u32(arg_u32x2, 1, arg_u32x2, 0);
+	vcopy_lane_u32(arg_u32x2, -1, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u32(arg_u32x2, 2, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_u32(arg_u32x2, 0, arg_u32x2, 1);
+	vcopy_lane_u32(arg_u32x2, 0, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u32(arg_u32x2, 0, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u32(arg_u32x4, 0, arg_u32x2, 0);
+	vcopyq_lane_u32(arg_u32x4, 3, arg_u32x2, 0);
+	vcopyq_lane_u32(arg_u32x4, -1, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u32(arg_u32x4, 4, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u32(arg_u32x4, 0, arg_u32x2, 1);
+	vcopyq_lane_u32(arg_u32x4, 0, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u32(arg_u32x4, 0, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u32(arg_u32x2, 0, arg_u32x4, 0);
+	vcopy_laneq_u32(arg_u32x2, 1, arg_u32x4, 0);
+	vcopy_laneq_u32(arg_u32x2, -1, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u32(arg_u32x2, 2, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u32(arg_u32x2, 0, arg_u32x4, 3);
+	vcopy_laneq_u32(arg_u32x2, 0, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u32(arg_u32x2, 0, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u32(arg_u32x4, 0, arg_u32x4, 0);
+	vcopyq_laneq_u32(arg_u32x4, 3, arg_u32x4, 0);
+	vcopyq_laneq_u32(arg_u32x4, -1, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u32(arg_u32x4, 4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u32(arg_u32x4, 0, arg_u32x4, 3);
+	vcopyq_laneq_u32(arg_u32x4, 0, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u32(arg_u32x4, 0, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_u64(uint64x2_t arg_u64x2, uint64x1_t arg_u64x1) {
+	vcopy_lane_u64(arg_u64x1, 0, arg_u64x1, 0);
+	vcopy_lane_u64(arg_u64x1, -1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u64(arg_u64x1, 1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_u64(arg_u64x1, 0, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u64(arg_u64x1, 0, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u64(arg_u64x2, 0, arg_u64x1, 0);
+	vcopyq_lane_u64(arg_u64x2, 1, arg_u64x1, 0);
+	vcopyq_lane_u64(arg_u64x2, -1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u64(arg_u64x2, 2, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u64(arg_u64x2, 0, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u64(arg_u64x2, 0, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u64(arg_u64x1, 0, arg_u64x2, 0);
+	vcopy_laneq_u64(arg_u64x1, -1, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u64(arg_u64x1, 1, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u64(arg_u64x1, 0, arg_u64x2, 1);
+	vcopy_laneq_u64(arg_u64x1, 0, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u64(arg_u64x1, 0, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u64(arg_u64x2, 0, arg_u64x2, 0);
+	vcopyq_laneq_u64(arg_u64x2, 1, arg_u64x2, 0);
+	vcopyq_laneq_u64(arg_u64x2, -1, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u64(arg_u64x2, 2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u64(arg_u64x2, 0, arg_u64x2, 1);
+	vcopyq_laneq_u64(arg_u64x2, 0, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u64(arg_u64x2, 0, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_p64(poly64x1_t arg_p64x1, poly64x2_t arg_p64x2) {
+	vcopy_lane_p64(arg_p64x1, 0, arg_p64x1, 0);
+	vcopy_lane_p64(arg_p64x1, -1, arg_p64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p64(arg_p64x1, 1, arg_p64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_p64(arg_p64x1, 0, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p64(arg_p64x1, 0, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p64(arg_p64x2, 0, arg_p64x1, 0);
+	vcopyq_lane_p64(arg_p64x2, 1, arg_p64x1, 0);
+	vcopyq_lane_p64(arg_p64x2, -1, arg_p64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p64(arg_p64x2, 2, arg_p64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p64(arg_p64x2, 0, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p64(arg_p64x2, 0, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p64(arg_p64x1, 0, arg_p64x2, 0);
+	vcopy_laneq_p64(arg_p64x1, -1, arg_p64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p64(arg_p64x1, 1, arg_p64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p64(arg_p64x1, 0, arg_p64x2, 1);
+	vcopy_laneq_p64(arg_p64x1, 0, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p64(arg_p64x1, 0, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p64(arg_p64x2, 0, arg_p64x2, 0);
+	vcopyq_laneq_p64(arg_p64x2, 1, arg_p64x2, 0);
+	vcopyq_laneq_p64(arg_p64x2, -1, arg_p64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p64(arg_p64x2, 2, arg_p64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p64(arg_p64x2, 0, arg_p64x2, 1);
+	vcopyq_laneq_p64(arg_p64x2, 0, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p64(arg_p64x2, 0, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_f32(float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vcopy_lane_f32(arg_f32x2, 0, arg_f32x2, 0);
+	vcopy_lane_f32(arg_f32x2, 1, arg_f32x2, 0);
+	vcopy_lane_f32(arg_f32x2, -1, arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_f32(arg_f32x2, 2, arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_f32(arg_f32x2, 0, arg_f32x2, 1);
+	vcopy_lane_f32(arg_f32x2, 0, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_f32(arg_f32x2, 0, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_f32(arg_f32x4, 0, arg_f32x2, 0);
+	vcopyq_lane_f32(arg_f32x4, 3, arg_f32x2, 0);
+	vcopyq_lane_f32(arg_f32x4, -1, arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_f32(arg_f32x4, 4, arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_f32(arg_f32x4, 0, arg_f32x2, 1);
+	vcopyq_lane_f32(arg_f32x4, 0, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_f32(arg_f32x4, 0, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_f32(arg_f32x2, 0, arg_f32x4, 0);
+	vcopy_laneq_f32(arg_f32x2, 1, arg_f32x4, 0);
+	vcopy_laneq_f32(arg_f32x2, -1, arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_f32(arg_f32x2, 2, arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_f32(arg_f32x2, 0, arg_f32x4, 3);
+	vcopy_laneq_f32(arg_f32x2, 0, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_f32(arg_f32x2, 0, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_f32(arg_f32x4, 0, arg_f32x4, 0);
+	vcopyq_laneq_f32(arg_f32x4, 3, arg_f32x4, 0);
+	vcopyq_laneq_f32(arg_f32x4, -1, arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_f32(arg_f32x4, 4, arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_f32(arg_f32x4, 0, arg_f32x4, 3);
+	vcopyq_laneq_f32(arg_f32x4, 0, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_f32(arg_f32x4, 0, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_f64(float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vcopy_lane_f64(arg_f64x1, 0, arg_f64x1, 0);
+	vcopy_lane_f64(arg_f64x1, -1, arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_f64(arg_f64x1, 1, arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_f64(arg_f64x1, 0, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_f64(arg_f64x1, 0, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_f64(arg_f64x2, 0, arg_f64x1, 0);
+	vcopyq_lane_f64(arg_f64x2, 1, arg_f64x1, 0);
+	vcopyq_lane_f64(arg_f64x2, -1, arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_f64(arg_f64x2, 2, arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_f64(arg_f64x2, 0, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_f64(arg_f64x2, 0, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_f64(arg_f64x1, 0, arg_f64x2, 0);
+	vcopy_laneq_f64(arg_f64x1, -1, arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_f64(arg_f64x1, 1, arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_f64(arg_f64x1, 0, arg_f64x2, 1);
+	vcopy_laneq_f64(arg_f64x1, 0, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_f64(arg_f64x1, 0, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_f64(arg_f64x2, 0, arg_f64x2, 0);
+	vcopyq_laneq_f64(arg_f64x2, 1, arg_f64x2, 0);
+	vcopyq_laneq_f64(arg_f64x2, -1, arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_f64(arg_f64x2, 2, arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_f64(arg_f64x2, 0, arg_f64x2, 1);
+	vcopyq_laneq_f64(arg_f64x2, 0, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_f64(arg_f64x2, 0, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vcopy_lane_p8(arg_p8x8, 0, arg_p8x8, 0);
+	vcopy_lane_p8(arg_p8x8, 7, arg_p8x8, 0);
+	vcopy_lane_p8(arg_p8x8, -1, arg_p8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p8(arg_p8x8, 8, arg_p8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_p8(arg_p8x8, 0, arg_p8x8, 7);
+	vcopy_lane_p8(arg_p8x8, 0, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p8(arg_p8x8, 0, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p8(arg_p8x16, 0, arg_p8x8, 0);
+	vcopyq_lane_p8(arg_p8x16, 15, arg_p8x8, 0);
+	vcopyq_lane_p8(arg_p8x16, -1, arg_p8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p8(arg_p8x16, 16, arg_p8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p8(arg_p8x16, 0, arg_p8x8, 7);
+	vcopyq_lane_p8(arg_p8x16, 0, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p8(arg_p8x16, 0, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p8(arg_p8x8, 0, arg_p8x16, 0);
+	vcopy_laneq_p8(arg_p8x8, 7, arg_p8x16, 0);
+	vcopy_laneq_p8(arg_p8x8, -1, arg_p8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p8(arg_p8x8, 8, arg_p8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p8(arg_p8x8, 0, arg_p8x16, 15);
+	vcopy_laneq_p8(arg_p8x8, 0, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p8(arg_p8x8, 0, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p8(arg_p8x16, 0, arg_p8x16, 0);
+	vcopyq_laneq_p8(arg_p8x16, 15, arg_p8x16, 0);
+	vcopyq_laneq_p8(arg_p8x16, -1, arg_p8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p8(arg_p8x16, 16, arg_p8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p8(arg_p8x16, 0, arg_p8x16, 15);
+	vcopyq_laneq_p8(arg_p8x16, 0, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p8(arg_p8x16, 0, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_p16(poly16x8_t arg_p16x8, poly16x4_t arg_p16x4) {
+	vcopy_lane_p16(arg_p16x4, 0, arg_p16x4, 0);
+	vcopy_lane_p16(arg_p16x4, 3, arg_p16x4, 0);
+	vcopy_lane_p16(arg_p16x4, -1, arg_p16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p16(arg_p16x4, 4, arg_p16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_p16(arg_p16x4, 0, arg_p16x4, 3);
+	vcopy_lane_p16(arg_p16x4, 0, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p16(arg_p16x4, 0, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p16(arg_p16x8, 0, arg_p16x4, 0);
+	vcopyq_lane_p16(arg_p16x8, 7, arg_p16x4, 0);
+	vcopyq_lane_p16(arg_p16x8, -1, arg_p16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p16(arg_p16x8, 8, arg_p16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p16(arg_p16x8, 0, arg_p16x4, 3);
+	vcopyq_lane_p16(arg_p16x8, 0, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p16(arg_p16x8, 0, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p16(arg_p16x4, 0, arg_p16x8, 0);
+	vcopy_laneq_p16(arg_p16x4, 3, arg_p16x8, 0);
+	vcopy_laneq_p16(arg_p16x4, -1, arg_p16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p16(arg_p16x4, 4, arg_p16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p16(arg_p16x4, 0, arg_p16x8, 7);
+	vcopy_laneq_p16(arg_p16x4, 0, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p16(arg_p16x4, 0, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p16(arg_p16x8, 0, arg_p16x8, 0);
+	vcopyq_laneq_p16(arg_p16x8, 7, arg_p16x8, 0);
+	vcopyq_laneq_p16(arg_p16x8, -1, arg_p16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p16(arg_p16x8, 8, arg_p16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p16(arg_p16x8, 0, arg_p16x8, 7);
+	vcopyq_laneq_p16(arg_p16x8, 0, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p16(arg_p16x8, 0, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/dotprod.c b/clang/test/Sema/aarch64-neon-immediate-ranges/dotprod.c
new file mode 100644
index 00000000000000..553ee096e490c7
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/dotprod.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.2a -target-feature +dotprod -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// s32 variant is tested under clang/test/CodeGen/arm-neon-range-checks
+void test_dot_product_u32(uint8x8_t arg_u8x8, uint32x2_t arg_u32x2, uint8x16_t arg_u8x16, uint32x4_t arg_u32x4) {
+	vdot_lane_u32(arg_u32x2, arg_u8x8, arg_u8x8, 0);
+	vdot_lane_u32(arg_u32x2, arg_u8x8, arg_u8x8, 1);
+	vdot_lane_u32(arg_u32x2, arg_u8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdot_lane_u32(arg_u32x2, arg_u8x8, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdotq_laneq_u32(arg_u32x4, arg_u8x16, arg_u8x16, 0);
+	vdotq_laneq_u32(arg_u32x4, arg_u8x16, arg_u8x16, 3);
+	vdotq_laneq_u32(arg_u32x4, arg_u8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdotq_laneq_u32(arg_u32x4, arg_u8x16, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdot_laneq_u32(arg_u32x2, arg_u8x8, arg_u8x16, 0);
+	vdot_laneq_u32(arg_u32x2, arg_u8x8, arg_u8x16, 3);
+	vdot_laneq_u32(arg_u32x2, arg_u8x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdot_laneq_u32(arg_u32x2, arg_u8x8, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdotq_lane_u32(arg_u32x4, arg_u8x16, arg_u8x8, 0);
+	vdotq_lane_u32(arg_u32x4, arg_u8x16, arg_u8x8, 1);
+	vdotq_lane_u32(arg_u32x4, arg_u8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdotq_lane_u32(arg_u32x4, arg_u8x16, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/extract-elt-from-vector.c b/clang/test/Sema/aarch64-neon-immediate-ranges/extract-elt-from-vector.c
new file mode 100644
index 00000000000000..5738f5ad27f3e8
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/extract-elt-from-vector.c
@@ -0,0 +1,301 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_extract_one_element_from_vector_s8(int8x16_t arg_i8x16, int8x8_t arg_i8x8) {
+	vdupb_lane_s8(arg_i8x8, 0);
+	vdupb_lane_s8(arg_i8x8, 7);
+	vdupb_lane_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_lane_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupb_laneq_s8(arg_i8x16, 0);
+	vdupb_laneq_s8(arg_i8x16, 15);
+	vdupb_laneq_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_laneq_s8(arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_s8(arg_i8x8, 0);
+	vget_lane_s8(arg_i8x8, 7);
+	vget_lane_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_s8(arg_i8x16, 0);
+	vgetq_lane_s8(arg_i8x16, 15);
+	vgetq_lane_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_s8(arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vduph_lane_s16(arg_i16x4, 0);
+	vduph_lane_s16(arg_i16x4, 3);
+	vduph_lane_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_lane_s16(arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vduph_laneq_s16(arg_i16x8, 0);
+	vduph_laneq_s16(arg_i16x8, 7);
+	vduph_laneq_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_laneq_s16(arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_s16(arg_i16x4, 0);
+	vget_lane_s16(arg_i16x4, 3);
+	vget_lane_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_s16(arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_s16(arg_i16x8, 0);
+	vgetq_lane_s16(arg_i16x8, 7);
+	vgetq_lane_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_s16(arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_s32(int32x4_t arg_i32x4, int32x2_t arg_i32x2) {
+	vdups_lane_s32(arg_i32x2, 0);
+	vdups_lane_s32(arg_i32x2, 1);
+	vdups_lane_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_lane_s32(arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdups_laneq_s32(arg_i32x4, 0);
+	vdups_laneq_s32(arg_i32x4, 3);
+	vdups_laneq_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_laneq_s32(arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_s32(arg_i32x2, 0);
+	vget_lane_s32(arg_i32x2, 1);
+	vget_lane_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_s32(arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_s32(arg_i32x4, 0);
+	vgetq_lane_s32(arg_i32x4, 3);
+	vgetq_lane_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_s32(arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_s64(int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vdupd_lane_s64(arg_i64x1, 0);
+	vdupd_lane_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_lane_s64(arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupd_laneq_s64(arg_i64x2, 0);
+	vdupd_laneq_s64(arg_i64x2, 1);
+	vdupd_laneq_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_laneq_s64(arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_s64(arg_i64x1, 0);
+	vget_lane_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_s64(arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_s64(arg_i64x2, 0);
+	vgetq_lane_s64(arg_i64x2, 1);
+	vgetq_lane_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_s64(arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vdupb_lane_u8(arg_u8x8, 0);
+	vdupb_lane_u8(arg_u8x8, 7);
+	vdupb_lane_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_lane_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupb_laneq_u8(arg_u8x16, 0);
+	vdupb_laneq_u8(arg_u8x16, 15);
+	vdupb_laneq_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_laneq_u8(arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_u8(arg_u8x8, 0);
+	vget_lane_u8(arg_u8x8, 7);
+	vget_lane_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_u8(arg_u8x16, 0);
+	vgetq_lane_u8(arg_u8x16, 15);
+	vgetq_lane_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_u8(arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vduph_lane_u16(arg_u16x4, 0);
+	vduph_lane_u16(arg_u16x4, 3);
+	vduph_lane_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_lane_u16(arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vduph_laneq_u16(arg_u16x8, 0);
+	vduph_laneq_u16(arg_u16x8, 7);
+	vduph_laneq_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_laneq_u16(arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_u16(arg_u16x4, 0);
+	vget_lane_u16(arg_u16x4, 3);
+	vget_lane_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_u16(arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_u16(arg_u16x8, 0);
+	vgetq_lane_u16(arg_u16x8, 7);
+	vgetq_lane_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_u16(arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vdups_lane_u32(arg_u32x2, 0);
+	vdups_lane_u32(arg_u32x2, 1);
+	vdups_lane_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_lane_u32(arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdups_laneq_u32(arg_u32x4, 0);
+	vdups_laneq_u32(arg_u32x4, 3);
+	vdups_laneq_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_laneq_u32(arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_u32(arg_u32x2, 0);
+	vget_lane_u32(arg_u32x2, 1);
+	vget_lane_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_u32(arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_u32(arg_u32x4, 0);
+	vgetq_lane_u32(arg_u32x4, 3);
+	vgetq_lane_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_u32(arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_u64(uint64x1_t arg_u64x1, uint64x2_t arg_u64x2) {
+	vdupd_lane_u64(arg_u64x1, 0);
+	vdupd_lane_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_lane_u64(arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupd_laneq_u64(arg_u64x2, 0);
+	vdupd_laneq_u64(arg_u64x2, 1);
+	vdupd_laneq_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_laneq_u64(arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_u64(arg_u64x1, 0);
+	vget_lane_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_u64(arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_u64(arg_u64x2, 0);
+	vgetq_lane_u64(arg_u64x2, 1);
+	vgetq_lane_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_u64(arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_f32(float32x4_t arg_f32x4, float32x2_t arg_f32x2) {
+	vdups_lane_f32(arg_f32x2, 0);
+	vdups_lane_f32(arg_f32x2, 1);
+	vdups_lane_f32(arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_lane_f32(arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdups_laneq_f32(arg_f32x4, 0);
+	vdups_laneq_f32(arg_f32x4, 3);
+	vdups_laneq_f32(arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_laneq_f32(arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_f32(arg_f32x2, 0);
+	vget_lane_f32(arg_f32x2, 1);
+	vget_lane_f32(arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_f32(arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_f32(arg_f32x4, 0);
+	vgetq_lane_f32(arg_f32x4, 3);
+	vgetq_lane_f32(arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_f32(arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_f64(float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vdupd_lane_f64(arg_f64x1, 0);
+	vdupd_lane_f64(arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_lane_f64(arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupd_laneq_f64(arg_f64x2, 0);
+	vdupd_laneq_f64(arg_f64x2, 1);
+	vdupd_laneq_f64(arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_laneq_f64(arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_f64(arg_f64x1, 0);
+	vget_lane_f64(arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_f64(arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_f64(arg_f64x2, 0);
+	vgetq_lane_f64(arg_f64x2, 1);
+	vgetq_lane_f64(arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_f64(arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vdupb_lane_p8(arg_p8x8, 0);
+	vdupb_lane_p8(arg_p8x8, 7);
+	vdupb_lane_p8(arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_lane_p8(arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupb_laneq_p8(arg_p8x16, 0);
+	vdupb_laneq_p8(arg_p8x16, 15);
+	vdupb_laneq_p8(arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_laneq_p8(arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_p8(arg_p8x8, 0);
+	vget_lane_p8(arg_p8x8, 7);
+	vget_lane_p8(arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_p8(arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_p8(arg_p8x16, 0);
+	vgetq_lane_p8(arg_p8x16, 15);
+	vgetq_lane_p8(arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_p8(arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_p16(poly16x4_t arg_p16x4, poly16x8_t arg_p16x8) {
+	vduph_lane_p16(arg_p16x4, 0);
+	vduph_lane_p16(arg_p16x4, 3);
+	vduph_lane_p16(arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_lane_p16(arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vduph_laneq_p16(arg_p16x8, 0);
+	vduph_laneq_p16(arg_p16x8, 7);
+	vduph_laneq_p16(arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_laneq_p16(arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_p16(arg_p16x4, 0);
+	vget_lane_p16(arg_p16x4, 3);
+	vget_lane_p16(arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_p16(arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_p16(arg_p16x8, 0);
+	vgetq_lane_p16(arg_p16x8, 7);
+	vgetq_lane_p16(arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_p16(arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_p64(poly64x2_t arg_p64x2, poly64x1_t arg_p64x1) {
+	vget_lane_p64(arg_p64x1, 0);
+	vget_lane_p64(arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_p64(arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_p64(arg_p64x2, 0);
+	vgetq_lane_p64(arg_p64x2, 1);
+	vgetq_lane_p64(arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_p64(arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_f16(float16x8_t arg_f16x8, float16x4_t arg_f16x4) {
+	vget_lane_f16(arg_f16x4, 0);
+	vget_lane_f16(arg_f16x4, 3);
+	vget_lane_f16(arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_f16(arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_f16(arg_f16x8, 0);
+	vgetq_lane_f16(arg_f16x8, 7);
+	vgetq_lane_f16(arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_f16(arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/extract-vector-from-vectors.c b/clang/test/Sema/aarch64-neon-immediate-ranges/extract-vector-from-vectors.c
new file mode 100644
index 00000000000000..a17df47eb98eaf
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/extract-vector-from-vectors.c
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vext_s8, vextq_s8, vext_u8, vextq_u8, vext_p8, vextq_p8 are tested under
+// clang/test/Sema/aarch64-neon-ranges.c
+
+void test_extract_vector_from_a_pair_of_vectors_s16(int16x8_t arg_i16x8, int16x4_t arg_i16x4) {
+	vext_s16(arg_i16x4, arg_i16x4, 0);
+	vext_s16(arg_i16x4, arg_i16x4, 3);
+	vext_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_s16(arg_i16x8, arg_i16x8, 0);
+	vextq_s16(arg_i16x8, arg_i16x8, 7);
+	vextq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vext_s32(arg_i32x2, arg_i32x2, 0);
+	vext_s32(arg_i32x2, arg_i32x2, 1);
+	vext_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_s32(arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_s32(arg_i32x4, arg_i32x4, 0);
+	vextq_s32(arg_i32x4, arg_i32x4, 3);
+	vextq_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_s32(arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_s64(int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vext_s64(arg_i64x1, arg_i64x1, 0);
+	vext_s64(arg_i64x1, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_s64(arg_i64x1, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_s64(arg_i64x2, arg_i64x2, 0);
+	vextq_s64(arg_i64x2, arg_i64x2, 1);
+	vextq_s64(arg_i64x2, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_s64(arg_i64x2, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vext_u16(arg_u16x4, arg_u16x4, 0);
+	vext_u16(arg_u16x4, arg_u16x4, 3);
+	vext_u16(arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_u16(arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_u16(arg_u16x8, arg_u16x8, 0);
+	vextq_u16(arg_u16x8, arg_u16x8, 7);
+	vextq_u16(arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_u16(arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vext_u32(arg_u32x2, arg_u32x2, 0);
+	vext_u32(arg_u32x2, arg_u32x2, 1);
+	vext_u32(arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_u32(arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_u32(arg_u32x4, arg_u32x4, 0);
+	vextq_u32(arg_u32x4, arg_u32x4, 3);
+	vextq_u32(arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_u32(arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_u64(uint64x1_t arg_u64x1, uint64x2_t arg_u64x2) {
+	vext_u64(arg_u64x1, arg_u64x1, 0);
+	vext_u64(arg_u64x1, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_u64(arg_u64x1, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_u64(arg_u64x2, arg_u64x2, 0);
+	vextq_u64(arg_u64x2, arg_u64x2, 1);
+	vextq_u64(arg_u64x2, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_u64(arg_u64x2, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_p64(poly64x2_t arg_p64x2, poly64x1_t arg_p64x1) {
+	vext_p64(arg_p64x1, arg_p64x1, 0);
+	vext_p64(arg_p64x1, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_p64(arg_p64x1, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_p64(arg_p64x2, arg_p64x2, 0);
+	vextq_p64(arg_p64x2, arg_p64x2, 1);
+	vextq_p64(arg_p64x2, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_p64(arg_p64x2, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_f32(float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vext_f32(arg_f32x2, arg_f32x2, 0);
+	vext_f32(arg_f32x2, arg_f32x2, 1);
+	vext_f32(arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_f32(arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_f32(arg_f32x4, arg_f32x4, 0);
+	vextq_f32(arg_f32x4, arg_f32x4, 3);
+	vextq_f32(arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_f32(arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_f64(float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vext_f64(arg_f64x1, arg_f64x1, 0);
+	vext_f64(arg_f64x1, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_f64(arg_f64x1, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_f64(arg_f64x2, arg_f64x2, 0);
+	vextq_f64(arg_f64x2, arg_f64x2, 1);
+	vextq_f64(arg_f64x2, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_f64(arg_f64x2, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_p16(poly16x8_t arg_p16x8, poly16x4_t arg_p16x4) {
+	vext_p16(arg_p16x4, arg_p16x4, 0);
+	vext_p16(arg_p16x4, arg_p16x4, 3);
+	vext_p16(arg_p16x4, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_p16(arg_p16x4, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_p16(arg_p16x8, arg_p16x8, 0);
+	vextq_p16(arg_p16x8, arg_p16x8, 7);
+	vextq_p16(arg_p16x8, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_p16(arg_p16x8, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-scalar.c b/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-scalar.c
new file mode 100644
index 00000000000000..6f83169498fb76
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-scalar.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -target-feature +v8.2a -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+#include <arm_fp16.h>
+// REQUIRES: aarch64-registered-target
+
+// vcvth_n_f16_s16, vcvth_n_f16_s32, vcvth_n_f16_s64, vcvth_n_f16_u16, vcvth_n_f16_u32
+// vcvth_n_s16_f16, vcvth_n_s32_f16, vcvth_n_s64_f16, vcvth_n_u16_f16, vcvth_n_u32_f16
+// are tested under clang/test/Sema/aarch64-neon-fp16-ranges.c
+
+void test_conversions_u64(uint64_t arg_u64) {
+	vcvth_n_f16_u64(arg_u64, 1);
+	vcvth_n_f16_u64(arg_u64, 16);
+	vcvth_n_f16_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_f16_u64(arg_u64, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_conversions_f16(float16_t arg_f16) {
+	vcvth_n_u64_f16(arg_f16, 1);
+	vcvth_n_u64_f16(arg_f16, 16);
+	vcvth_n_u64_f16(arg_f16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_u64_f16(arg_f16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-v84.c b/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-v84.c
new file mode 100644
index 00000000000000..d31cf321d76196
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-v84.c
@@ -0,0 +1,89 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.4a -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_fused_multiply_accumulate_f16(float32x2_t arg_f32x2, float32x4_t arg_f32x4, float16x4_t arg_f16x4, float16x8_t arg_f16x8) {
+	vfmlal_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 0);
+	vfmlal_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 3);
+	vfmlal_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlal_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlal_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 0);
+	vfmlal_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 7);
+	vfmlal_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlal_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlalq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 0);
+	vfmlalq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 3);
+	vfmlalq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlalq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlalq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 0);
+	vfmlalq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 7);
+	vfmlalq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlalq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlsl_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 0);
+	vfmlsl_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 3);
+	vfmlsl_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlsl_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlsl_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 0);
+	vfmlsl_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 7);
+	vfmlsl_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlsl_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlslq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 0);
+	vfmlslq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 3);
+	vfmlslq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlslq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlslq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 0);
+	vfmlslq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 7);
+	vfmlslq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlslq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlal_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 0);
+	vfmlal_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 3);
+	vfmlal_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlal_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlsl_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 0);
+	vfmlsl_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 3);
+	vfmlsl_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlsl_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlalq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 0);
+	vfmlalq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 3);
+	vfmlalq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlalq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlslq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 0);
+	vfmlslq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 3);
+	vfmlslq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlslq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlal_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 0);
+	vfmlal_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 7);
+	vfmlal_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlal_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlsl_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 0);
+	vfmlsl_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 7);
+	vfmlsl_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlsl_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlalq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 0);
+	vfmlalq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 7);
+	vfmlalq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlalq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlslq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 0);
+	vfmlslq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 7);
+	vfmlslq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlslq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-vector.c b/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-vector.c
new file mode 100644
index 00000000000000..6460018b744086
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-vector.c
@@ -0,0 +1,181 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -target-feature +v8.2a -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+#include <arm_fp16.h>
+// REQUIRES: aarch64-registered-target
+
+// vcvtq_n_f16_u16 is tested under clang/test/Sema/arm-mve-immediates.c
+
+void test_multiplication_f16(float16_t arg_f16, float16x8_t arg_f16x8, float16x4_t arg_f16x4) {
+	vmul_lane_f16(arg_f16x4, arg_f16x4, 0);
+	vmul_lane_f16(arg_f16x4, arg_f16x4, 3);
+	vmul_lane_f16(arg_f16x4, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_f16(arg_f16x4, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_f16(arg_f16x8, arg_f16x4, 0);
+	vmulq_lane_f16(arg_f16x8, arg_f16x4, 3);
+	vmulq_lane_f16(arg_f16x8, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_f16(arg_f16x8, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_f16(arg_f16x4, arg_f16x8, 0);
+	vmul_laneq_f16(arg_f16x4, arg_f16x8, 7);
+	vmul_laneq_f16(arg_f16x4, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_f16(arg_f16x4, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_f16(arg_f16x8, arg_f16x8, 0);
+	vmulq_laneq_f16(arg_f16x8, arg_f16x8, 7);
+	vmulq_laneq_f16(arg_f16x8, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_f16(arg_f16x8, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulh_lane_f16(arg_f16, arg_f16x4, 0);
+	vmulh_lane_f16(arg_f16, arg_f16x4, 3);
+	vmulh_lane_f16(arg_f16, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulh_lane_f16(arg_f16, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulh_laneq_f16(arg_f16, arg_f16x8, 0);
+	vmulh_laneq_f16(arg_f16, arg_f16x8, 7);
+	vmulh_laneq_f16(arg_f16, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulh_laneq_f16(arg_f16, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_multiply_extended_f16(float16_t arg_f16, float16x8_t arg_f16x8, float16x4_t arg_f16x4) {
+	vmulx_lane_f16(arg_f16x4, arg_f16x4, 0);
+	vmulx_lane_f16(arg_f16x4, arg_f16x4, 3);
+	vmulx_lane_f16(arg_f16x4, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulx_lane_f16(arg_f16x4, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_lane_f16(arg_f16x8, arg_f16x4, 0);
+	vmulxq_lane_f16(arg_f16x8, arg_f16x4, 3);
+	vmulxq_lane_f16(arg_f16x8, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_lane_f16(arg_f16x8, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulx_laneq_f16(arg_f16x4, arg_f16x8, 0);
+	vmulx_laneq_f16(arg_f16x4, arg_f16x8, 7);
+	vmulx_laneq_f16(arg_f16x4, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulx_laneq_f16(arg_f16x4, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_laneq_f16(arg_f16x8, arg_f16x8, 0);
+	vmulxq_laneq_f16(arg_f16x8, arg_f16x8, 7);
+	vmulxq_laneq_f16(arg_f16x8, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_laneq_f16(arg_f16x8, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxh_lane_f16(arg_f16, arg_f16x4, 0);
+	vmulxh_lane_f16(arg_f16, arg_f16x4, 3);
+	vmulxh_lane_f16(arg_f16, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxh_lane_f16(arg_f16, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxh_laneq_f16(arg_f16, arg_f16x8, 0);
+	vmulxh_laneq_f16(arg_f16, arg_f16x8, 7);
+	vmulxh_laneq_f16(arg_f16, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxh_laneq_f16(arg_f16, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_fused_multiply_accumulate_f16(float16_t arg_f16, float16x8_t arg_f16x8, float16x4_t arg_f16x4) {
+	vfma_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 0);
+	vfma_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 3);
+	vfma_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmaq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 0);
+	vfmaq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 3);
+	vfmaq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfma_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 0);
+	vfma_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 7);
+	vfma_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmaq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 0);
+	vfmaq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 7);
+	vfmaq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmah_lane_f16(arg_f16, arg_f16, arg_f16x4, 0);
+	vfmah_lane_f16(arg_f16, arg_f16, arg_f16x4, 3);
+	vfmah_lane_f16(arg_f16, arg_f16, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmah_lane_f16(arg_f16, arg_f16, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmah_laneq_f16(arg_f16, arg_f16, arg_f16x8, 0);
+	vfmah_laneq_f16(arg_f16, arg_f16, arg_f16x8, 7);
+	vfmah_laneq_f16(arg_f16, arg_f16, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmah_laneq_f16(arg_f16, arg_f16, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 0);
+	vfms_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 3);
+	vfms_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 0);
+	vfmsq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 3);
+	vfmsq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 0);
+	vfms_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 7);
+	vfms_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 0);
+	vfmsq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 7);
+	vfmsq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsh_lane_f16(arg_f16, arg_f16, arg_f16x4, 0);
+	vfmsh_lane_f16(arg_f16, arg_f16, arg_f16x4, 3);
+	vfmsh_lane_f16(arg_f16, arg_f16, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsh_lane_f16(arg_f16, arg_f16, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsh_laneq_f16(arg_f16, arg_f16, arg_f16x8, 0);
+	vfmsh_laneq_f16(arg_f16, arg_f16, arg_f16x8, 7);
+	vfmsh_laneq_f16(arg_f16, arg_f16, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsh_laneq_f16(arg_f16, arg_f16, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_s16(int16x8_t arg_i16x8, int16x4_t arg_i16x4) {
+	vcvt_n_f16_s16(arg_i16x4, 1);
+	vcvt_n_f16_s16(arg_i16x4, 16);
+	vcvt_n_f16_s16(arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f16_s16(arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_f16_s16(arg_i16x8, 1);
+	vcvtq_n_f16_s16(arg_i16x8, 16);
+	vcvtq_n_f16_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_f16_s16(arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vcvt_n_f16_u16(arg_u16x4, 1);
+	vcvt_n_f16_u16(arg_u16x4, 16);
+	vcvt_n_f16_u16(arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f16_u16(arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_f16(float16x8_t arg_f16x8, float16x4_t arg_f16x4) {
+	vcvt_n_s16_f16(arg_f16x4, 1);
+	vcvt_n_s16_f16(arg_f16x4, 16);
+	vcvt_n_s16_f16(arg_f16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_s16_f16(arg_f16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_s16_f16(arg_f16x8, 1);
+	vcvtq_n_s16_f16(arg_f16x8, 16);
+	vcvtq_n_s16_f16(arg_f16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_s16_f16(arg_f16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvt_n_u16_f16(arg_f16x4, 1);
+	vcvt_n_u16_f16(arg_f16x4, 16);
+	vcvt_n_u16_f16(arg_f16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_u16_f16(arg_f16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_u16_f16(arg_f16x8, 1);
+	vcvtq_n_u16_f16(arg_f16x8, 16);
+	vcvtq_n_u16_f16(arg_f16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_u16_f16(arg_f16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/fused-multiply-accumulate.c b/clang/test/Sema/aarch64-neon-immediate-ranges/fused-multiply-accumulate.c
new file mode 100644
index 00000000000000..1f03ed2264ffc6
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/fused-multiply-accumulate.c
@@ -0,0 +1,115 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vfma_laneq_f64, vfma_lane_f64, vfmaq_lane_f64, vfmaq_laneq_f64,
+// are tested under aarch64-neon-ranges.c
+
+void test_fused_multiply_accumulate_f32(float32x2_t arg_f32x2, float32_t arg_f32, float32x4_t arg_f32x4) {
+	vfma_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 0);
+	vfma_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 1);
+	vfma_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 0);
+	vfmaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 1);
+	vfmaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmas_lane_f32(arg_f32, arg_f32, arg_f32x2, 0);
+	vfmas_lane_f32(arg_f32, arg_f32, arg_f32x2, 1);
+	vfmas_lane_f32(arg_f32, arg_f32, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmas_lane_f32(arg_f32, arg_f32, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfma_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 0);
+	vfma_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 3);
+	vfma_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 0);
+	vfmaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 3);
+	vfmaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmas_laneq_f32(arg_f32, arg_f32, arg_f32x4, 0);
+	vfmas_laneq_f32(arg_f32, arg_f32, arg_f32x4, 3);
+	vfmas_laneq_f32(arg_f32, arg_f32, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmas_laneq_f32(arg_f32, arg_f32, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 0);
+	vfms_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 1);
+	vfms_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 0);
+	vfmsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 1);
+	vfmsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmss_lane_f32(arg_f32, arg_f32, arg_f32x2, 0);
+	vfmss_lane_f32(arg_f32, arg_f32, arg_f32x2, 1);
+	vfmss_lane_f32(arg_f32, arg_f32, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmss_lane_f32(arg_f32, arg_f32, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 0);
+	vfms_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 3);
+	vfms_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 0);
+	vfmsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 3);
+	vfmsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmss_laneq_f32(arg_f32, arg_f32, arg_f32x4, 0);
+	vfmss_laneq_f32(arg_f32, arg_f32, arg_f32x4, 3);
+	vfmss_laneq_f32(arg_f32, arg_f32, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmss_laneq_f32(arg_f32, arg_f32, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_fused_multiply_accumulate_f64(float64_t arg_f64, float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vfmad_lane_f64(arg_f64, arg_f64, arg_f64x1, 0);
+	vfmad_lane_f64(arg_f64, arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmad_lane_f64(arg_f64, arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfma_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 0);
+	vfma_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 1);
+	vfma_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmad_laneq_f64(arg_f64, arg_f64, arg_f64x2, 0);
+	vfmad_laneq_f64(arg_f64, arg_f64, arg_f64x2, 1);
+	vfmad_laneq_f64(arg_f64, arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmad_laneq_f64(arg_f64, arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_lane_f64(arg_f64x1, arg_f64x1, arg_f64x1, 0);
+	vfms_lane_f64(arg_f64x1, arg_f64x1, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_lane_f64(arg_f64x1, arg_f64x1, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_lane_f64(arg_f64x2, arg_f64x2, arg_f64x1, 0);
+	vfmsq_lane_f64(arg_f64x2, arg_f64x2, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_lane_f64(arg_f64x2, arg_f64x2, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsd_lane_f64(arg_f64, arg_f64, arg_f64x1, 0);
+	vfmsd_lane_f64(arg_f64, arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsd_lane_f64(arg_f64, arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 0);
+	vfms_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 1);
+	vfms_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, 0);
+	vfmsq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, 1);
+	vfmsq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsd_laneq_f64(arg_f64, arg_f64, arg_f64x2, 0);
+	vfmsd_laneq_f64(arg_f64, arg_f64, arg_f64x2, 1);
+	vfmsd_laneq_f64(arg_f64, arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsd_laneq_f64(arg_f64, arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/matrix-multiplication.c b/clang/test/Sema/aarch64-neon-immediate-ranges/matrix-multiplication.c
new file mode 100644
index 00000000000000..dd501b84bae47a
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/matrix-multiplication.c
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.6a -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_dot_product_s32(int8x8_t arg_i8x8, int32x2_t arg_i32x2, uint8x16_t arg_u8x16, uint8x8_t arg_u8x8,
+						  int32x4_t arg_i32x4, int8x16_t arg_i8x16) {
+	vusdot_lane_s32(arg_i32x2, arg_u8x8, arg_i8x8, 0);
+	vusdot_lane_s32(arg_i32x2, arg_u8x8, arg_i8x8, 1);
+	vusdot_lane_s32(arg_i32x2, arg_u8x8, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vusdot_lane_s32(arg_i32x2, arg_u8x8, arg_i8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsudot_lane_s32(arg_i32x2, arg_i8x8, arg_u8x8, 0);
+	vsudot_lane_s32(arg_i32x2, arg_i8x8, arg_u8x8, 1);
+	vsudot_lane_s32(arg_i32x2, arg_i8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsudot_lane_s32(arg_i32x2, arg_i8x8, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vusdot_laneq_s32(arg_i32x2, arg_u8x8, arg_i8x16, 0);
+	vusdot_laneq_s32(arg_i32x2, arg_u8x8, arg_i8x16, 3);
+	vusdot_laneq_s32(arg_i32x2, arg_u8x8, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vusdot_laneq_s32(arg_i32x2, arg_u8x8, arg_i8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsudot_laneq_s32(arg_i32x2, arg_i8x8, arg_u8x16, 0);
+	vsudot_laneq_s32(arg_i32x2, arg_i8x8, arg_u8x16, 3);
+	vsudot_laneq_s32(arg_i32x2, arg_i8x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsudot_laneq_s32(arg_i32x2, arg_i8x8, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vusdotq_lane_s32(arg_i32x4, arg_u8x16, arg_i8x8, 0);
+	vusdotq_lane_s32(arg_i32x4, arg_u8x16, arg_i8x8, 1);
+	vusdotq_lane_s32(arg_i32x4, arg_u8x16, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vusdotq_lane_s32(arg_i32x4, arg_u8x16, arg_i8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsudotq_lane_s32(arg_i32x4, arg_i8x16, arg_u8x8, 0);
+	vsudotq_lane_s32(arg_i32x4, arg_i8x16, arg_u8x8, 1);
+	vsudotq_lane_s32(arg_i32x4, arg_i8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsudotq_lane_s32(arg_i32x4, arg_i8x16, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vusdotq_laneq_s32(arg_i32x4, arg_u8x16, arg_i8x16, 0);
+	vusdotq_laneq_s32(arg_i32x4, arg_u8x16, arg_i8x16, 3);
+	vusdotq_laneq_s32(arg_i32x4, arg_u8x16, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vusdotq_laneq_s32(arg_i32x4, arg_u8x16, arg_i8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsudotq_laneq_s32(arg_i32x4, arg_i8x16, arg_u8x16, 0);
+	vsudotq_laneq_s32(arg_i32x4, arg_i8x16, arg_u8x16, 3);
+	vsudotq_laneq_s32(arg_i32x4, arg_i8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsudotq_laneq_s32(arg_i32x4, arg_i8x16, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/multiply-extended.c b/clang/test/Sema/aarch64-neon-immediate-ranges/multiply-extended.c
new file mode 100644
index 00000000000000..8c679e7e6a7d95
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/multiply-extended.c
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_multiply_extended_f32(float32_t arg_f32, float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vmulx_lane_f32(arg_f32x2, arg_f32x2, 0);
+	vmulx_lane_f32(arg_f32x2, arg_f32x2, 1);
+	vmulx_lane_f32(arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulx_lane_f32(arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_lane_f32(arg_f32x4, arg_f32x2, 0);
+	vmulxq_lane_f32(arg_f32x4, arg_f32x2, 1);
+	vmulxq_lane_f32(arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_lane_f32(arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxs_lane_f32(arg_f32, arg_f32x2, 0);
+	vmulxs_lane_f32(arg_f32, arg_f32x2, 1);
+	vmulxs_lane_f32(arg_f32, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxs_lane_f32(arg_f32, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulx_laneq_f32(arg_f32x2, arg_f32x4, 0);
+	vmulx_laneq_f32(arg_f32x2, arg_f32x4, 3);
+	vmulx_laneq_f32(arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulx_laneq_f32(arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_laneq_f32(arg_f32x4, arg_f32x4, 0);
+	vmulxq_laneq_f32(arg_f32x4, arg_f32x4, 3);
+	vmulxq_laneq_f32(arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_laneq_f32(arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxs_laneq_f32(arg_f32, arg_f32x4, 0);
+	vmulxs_laneq_f32(arg_f32, arg_f32x4, 3);
+	vmulxs_laneq_f32(arg_f32, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxs_laneq_f32(arg_f32, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_multiply_extended_f64(float64_t arg_f64, float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vmulx_lane_f64(arg_f64x1, arg_f64x1, 0);
+	vmulx_lane_f64(arg_f64x1, arg_f64x1, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vmulx_lane_f64(arg_f64x1, arg_f64x1, 1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_lane_f64(arg_f64x2, arg_f64x1, 0);
+	vmulxq_lane_f64(arg_f64x2, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_lane_f64(arg_f64x2, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxd_lane_f64(arg_f64, arg_f64x1, 0);
+	vmulxd_lane_f64(arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxd_lane_f64(arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulx_laneq_f64(arg_f64x1, arg_f64x2, 0);
+	vmulx_laneq_f64(arg_f64x1, arg_f64x2, 1);
+	vmulx_laneq_f64(arg_f64x1, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulx_laneq_f64(arg_f64x1, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_laneq_f64(arg_f64x2, arg_f64x2, 0);
+	vmulxq_laneq_f64(arg_f64x2, arg_f64x2, 1);
+	vmulxq_laneq_f64(arg_f64x2, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_laneq_f64(arg_f64x2, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxd_laneq_f64(arg_f64, arg_f64x2, 0);
+	vmulxd_laneq_f64(arg_f64, arg_f64x2, 1);
+	vmulxd_laneq_f64(arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxd_laneq_f64(arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-accumulate.c b/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-accumulate.c
new file mode 100644
index 00000000000000..4c3dfd0e65a08d
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-accumulate.c
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vqdmlal_lane_s32, vqdmlal_high_lane_s32, vqdmlsl_high_lane_s32, vqdmlsl_laneq_s32,
+// vqdmlsls_laneq_s32, vqdmlsl_high_laneq_s32, are tested under arm-neon-range-checks.c.
+
+void test_saturating_multiply_accumulate_s16(int16x4_t arg_i16x4, int32_t arg_i32, int16_t arg_i16,
+											 int32x4_t arg_i32x4, int16x8_t arg_i16x8) {
+	vqdmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 0);
+	vqdmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 3);
+	vqdmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlalh_lane_s16(arg_i32, arg_i16, arg_i16x4, 0);
+	vqdmlalh_lane_s16(arg_i32, arg_i16, arg_i16x4, 3);
+	vqdmlalh_lane_s16(arg_i32, arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlalh_lane_s16(arg_i32, arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 0);
+	vqdmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 3);
+	vqdmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 0);
+	vqdmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 7);
+	vqdmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlalh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 0);
+	vqdmlalh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 7);
+	vqdmlalh_laneq_s16(arg_i32, arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlalh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 0);
+	vqdmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 7);
+	vqdmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 0);
+	vqdmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 3);
+	vqdmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlslh_lane_s16(arg_i32, arg_i16, arg_i16x4, 0);
+	vqdmlslh_lane_s16(arg_i32, arg_i16, arg_i16x4, 3);
+	vqdmlslh_lane_s16(arg_i32, arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlslh_lane_s16(arg_i32, arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 0);
+	vqdmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 3);
+	vqdmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 0);
+	vqdmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 7);
+	vqdmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlslh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 0);
+	vqdmlslh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 7);
+	vqdmlslh_laneq_s16(arg_i32, arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlslh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 0);
+	vqdmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 7);
+	vqdmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_saturating_multiply_accumulate_s32(int32x2_t arg_i32x2, int64x2_t arg_i64x2, int32_t arg_i32,
+											 int32x4_t arg_i32x4, int64_t arg_i64) {
+	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, 0);
+	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, 1);
+	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, 0);
+	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, 3);
+	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, 0);
+	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, 1);
+	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-by-scalar-and-widen.c b/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-by-scalar-and-widen.c
new file mode 100644
index 00000000000000..a72d1950389dd5
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-by-scalar-and-widen.c
@@ -0,0 +1,136 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// clang/Sema/arm-neon-range-checks.c includes tests for:
+// vqdmull_lane_s32, vqdmull_laneq_s32,vqdmull_high_lane_s32, vqdmull_high_laneq_s32
+// vqdmulh_lane_s32, vqdmulhq_lane_s32, vqdmulh_laneq_s32, vqdmulhq_laneq_s32, vqrdmulh_lane_s32
+// vqrdmulhq_lane_s32, vqrdmulh_laneq_s32, vqrdmulhq_laneq_s32
+
+void test_saturating_multiply_by_scalar_and_widen_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8, int16_t arg_i16) {
+	vqdmull_lane_s16(arg_i16x4, arg_i16x4, 0);
+	vqdmull_lane_s16(arg_i16x4, arg_i16x4, 3);
+	vqdmull_lane_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_lane_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmullh_lane_s16(arg_i16, arg_i16x4, 0);
+	vqdmullh_lane_s16(arg_i16, arg_i16x4, 3);
+	vqdmullh_lane_s16(arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmullh_lane_s16(arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmull_high_lane_s16(arg_i16x8, arg_i16x4, 0);
+	vqdmull_high_lane_s16(arg_i16x8, arg_i16x4, 3);
+	vqdmull_high_lane_s16(arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_high_lane_s16(arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmull_laneq_s16(arg_i16x4, arg_i16x8, 0);
+	vqdmull_laneq_s16(arg_i16x4, arg_i16x8, 7);
+	vqdmull_laneq_s16(arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_laneq_s16(arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmullh_laneq_s16(arg_i16, arg_i16x8, 0);
+	vqdmullh_laneq_s16(arg_i16, arg_i16x8, 7);
+	vqdmullh_laneq_s16(arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmullh_laneq_s16(arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmull_high_laneq_s16(arg_i16x8, arg_i16x8, 0);
+	vqdmull_high_laneq_s16(arg_i16x8, arg_i16x8, 7);
+	vqdmull_high_laneq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_high_laneq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulh_lane_s16(arg_i16x4, arg_i16x4, 0);
+	vqdmulh_lane_s16(arg_i16x4, arg_i16x4, 3);
+	vqdmulh_lane_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulh_lane_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhq_lane_s16(arg_i16x8, arg_i16x4, 0);
+	vqdmulhq_lane_s16(arg_i16x8, arg_i16x4, 3);
+	vqdmulhq_lane_s16(arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhq_lane_s16(arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhh_lane_s16(arg_i16, arg_i16x4, 0);
+	vqdmulhh_lane_s16(arg_i16, arg_i16x4, 3);
+	vqdmulhh_lane_s16(arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhh_lane_s16(arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulh_laneq_s16(arg_i16x4, arg_i16x8, 0);
+	vqdmulh_laneq_s16(arg_i16x4, arg_i16x8, 7);
+	vqdmulh_laneq_s16(arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulh_laneq_s16(arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 0);
+	vqdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 7);
+	vqdmulhq_laneq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhh_laneq_s16(arg_i16, arg_i16x8, 0);
+	vqdmulhh_laneq_s16(arg_i16, arg_i16x8, 7);
+	vqdmulhh_laneq_s16(arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhh_laneq_s16(arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulh_lane_s16(arg_i16x4, arg_i16x4, 0);
+	vqrdmulh_lane_s16(arg_i16x4, arg_i16x4, 3);
+	vqrdmulh_lane_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulh_lane_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhq_lane_s16(arg_i16x8, arg_i16x4, 0);
+	vqrdmulhq_lane_s16(arg_i16x8, arg_i16x4, 3);
+	vqrdmulhq_lane_s16(arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhq_lane_s16(arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhh_lane_s16(arg_i16, arg_i16x4, 0);
+	vqrdmulhh_lane_s16(arg_i16, arg_i16x4, 3);
+	vqrdmulhh_lane_s16(arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhh_lane_s16(arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulh_laneq_s16(arg_i16x4, arg_i16x8, 0);
+	vqrdmulh_laneq_s16(arg_i16x4, arg_i16x8, 7);
+	vqrdmulh_laneq_s16(arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulh_laneq_s16(arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 0);
+	vqrdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 7);
+	vqrdmulhq_laneq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhh_laneq_s16(arg_i16, arg_i16x8, 0);
+	vqrdmulhh_laneq_s16(arg_i16, arg_i16x8, 7);
+	vqrdmulhh_laneq_s16(arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhh_laneq_s16(arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_saturating_multiply_by_scalar_and_widen_s32(int32x4_t arg_i32x4, int32_t arg_i32, int32x2_t arg_i32x2) {
+	vqdmulls_lane_s32(arg_i32, arg_i32x2, 0);
+	vqdmulls_lane_s32(arg_i32, arg_i32x2, 1);
+	vqdmulls_lane_s32(arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulls_lane_s32(arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulls_laneq_s32(arg_i32, arg_i32x4, 0);
+	vqdmulls_laneq_s32(arg_i32, arg_i32x4, 3);
+	vqdmulls_laneq_s32(arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulls_laneq_s32(arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhs_lane_s32(arg_i32, arg_i32x2, 0);
+	vqdmulhs_lane_s32(arg_i32, arg_i32x2, 1);
+	vqdmulhs_lane_s32(arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhs_lane_s32(arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, 0);
+	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, 3);
+	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, 0);
+	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, 1);
+	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhs_laneq_s32(arg_i32, arg_i32x4, 0);
+	vqrdmulhs_laneq_s32(arg_i32, arg_i32x4, 3);
+	vqrdmulhs_laneq_s32(arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhs_laneq_s32(arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/set-lanes-to-value.c b/clang/test/Sema/aarch64-neon-immediate-ranges/set-lanes-to-value.c
new file mode 100644
index 00000000000000..ea1fcd5dc03221
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/set-lanes-to-value.c
@@ -0,0 +1,277 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vdup_lane_s32, vdupq_lane_s32, vdup_laneq_s32, vdupq_laneq_s3 are tested 
+// under clang/test/CodeGen/arm-neon-range-checks.c
+
+void test_set_all_lanes_to_the_same_value_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vdup_lane_s8(arg_i8x8, 0);
+	vdup_lane_s8(arg_i8x8, 7);
+	vdup_lane_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_s8(arg_i8x8, 0);
+	vdupq_lane_s8(arg_i8x8, 7);
+	vdupq_lane_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_s8(arg_i8x16, 0);
+	vdup_laneq_s8(arg_i8x16, 15);
+	vdup_laneq_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_s8(arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_s8(arg_i8x16, 0);
+	vdupq_laneq_s8(arg_i8x16, 15);
+	vdupq_laneq_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_s8(arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vdup_lane_s16(arg_i16x4, 0);
+	vdup_lane_s16(arg_i16x4, 3);
+	vdup_lane_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_s16(arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_s16(arg_i16x4, 0);
+	vdupq_lane_s16(arg_i16x4, 3);
+	vdupq_lane_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_s16(arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_s16(arg_i16x8, 0);
+	vdup_laneq_s16(arg_i16x8, 7);
+	vdup_laneq_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_s16(arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_s16(arg_i16x8, 0);
+	vdupq_laneq_s16(arg_i16x8, 7);
+	vdupq_laneq_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_s16(arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+
+void test_set_all_lanes_to_the_same_value_s64(int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vdup_lane_s64(arg_i64x1, 0);
+	vdup_lane_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_s64(arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_s64(arg_i64x1, 0);
+	vdupq_lane_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_s64(arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_s64(arg_i64x2, 0);
+	vdup_laneq_s64(arg_i64x2, 1);
+	vdup_laneq_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_s64(arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_s64(arg_i64x2, 0);
+	vdupq_laneq_s64(arg_i64x2, 1);
+	vdupq_laneq_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_s64(arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vdup_lane_u8(arg_u8x8, 0);
+	vdup_lane_u8(arg_u8x8, 7);
+	vdup_lane_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_u8(arg_u8x8, 0);
+	vdupq_lane_u8(arg_u8x8, 7);
+	vdupq_lane_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_u8(arg_u8x16, 0);
+	vdup_laneq_u8(arg_u8x16, 15);
+	vdup_laneq_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_u8(arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_u8(arg_u8x16, 0);
+	vdupq_laneq_u8(arg_u8x16, 15);
+	vdupq_laneq_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_u8(arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vdup_lane_u16(arg_u16x4, 0);
+	vdup_lane_u16(arg_u16x4, 3);
+	vdup_lane_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_u16(arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_u16(arg_u16x4, 0);
+	vdupq_lane_u16(arg_u16x4, 3);
+	vdupq_lane_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_u16(arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_u16(arg_u16x8, 0);
+	vdup_laneq_u16(arg_u16x8, 7);
+	vdup_laneq_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_u16(arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_u16(arg_u16x8, 0);
+	vdupq_laneq_u16(arg_u16x8, 7);
+	vdupq_laneq_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_u16(arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_u32(uint32x4_t arg_u32x4, uint32x2_t arg_u32x2) {
+	vdup_lane_u32(arg_u32x2, 0);
+	vdup_lane_u32(arg_u32x2, 1);
+	vdup_lane_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_u32(arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_u32(arg_u32x2, 0);
+	vdupq_lane_u32(arg_u32x2, 1);
+	vdupq_lane_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_u32(arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_u32(arg_u32x4, 0);
+	vdup_laneq_u32(arg_u32x4, 3);
+	vdup_laneq_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_u32(arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_u32(arg_u32x4, 0);
+	vdupq_laneq_u32(arg_u32x4, 3);
+	vdupq_laneq_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_u32(arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_u64(uint64x1_t arg_u64x1, uint64x2_t arg_u64x2) {
+	vdup_lane_u64(arg_u64x1, 0);
+	vdup_lane_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_u64(arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_u64(arg_u64x1, 0);
+	vdupq_lane_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_u64(arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_u64(arg_u64x2, 0);
+	vdup_laneq_u64(arg_u64x2, 1);
+	vdup_laneq_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_u64(arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_u64(arg_u64x2, 0);
+	vdupq_laneq_u64(arg_u64x2, 1);
+	vdupq_laneq_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_u64(arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_p64(poly64x1_t arg_p64x1, poly64x2_t arg_p64x2) {
+	vdup_lane_p64(arg_p64x1, 0);
+	vdup_lane_p64(arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_p64(arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_p64(arg_p64x1, 0);
+	vdupq_lane_p64(arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_p64(arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_p64(arg_p64x2, 0);
+	vdup_laneq_p64(arg_p64x2, 1);
+	vdup_laneq_p64(arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_p64(arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_p64(arg_p64x2, 0);
+	vdupq_laneq_p64(arg_p64x2, 1);
+	vdupq_laneq_p64(arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_p64(arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_f32(float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vdup_lane_f32(arg_f32x2, 0);
+	vdup_lane_f32(arg_f32x2, 1);
+	vdup_lane_f32(arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_f32(arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_f32(arg_f32x2, 0);
+	vdupq_lane_f32(arg_f32x2, 1);
+	vdupq_lane_f32(arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_f32(arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_f32(arg_f32x4, 0);
+	vdup_laneq_f32(arg_f32x4, 3);
+	vdup_laneq_f32(arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_f32(arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_f32(arg_f32x4, 0);
+	vdupq_laneq_f32(arg_f32x4, 3);
+	vdupq_laneq_f32(arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_f32(arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vdup_lane_p8(arg_p8x8, 0);
+	vdup_lane_p8(arg_p8x8, 7);
+	vdup_lane_p8(arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_p8(arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_p8(arg_p8x8, 0);
+	vdupq_lane_p8(arg_p8x8, 7);
+	vdupq_lane_p8(arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_p8(arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_p8(arg_p8x16, 0);
+	vdup_laneq_p8(arg_p8x16, 15);
+	vdup_laneq_p8(arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_p8(arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_p8(arg_p8x16, 0);
+	vdupq_laneq_p8(arg_p8x16, 15);
+	vdupq_laneq_p8(arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_p8(arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_p16(poly16x4_t arg_p16x4, poly16x8_t arg_p16x8) {
+	vdup_lane_p16(arg_p16x4, 0);
+	vdup_lane_p16(arg_p16x4, 3);
+	vdup_lane_p16(arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_p16(arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_p16(arg_p16x4, 0);
+	vdupq_lane_p16(arg_p16x4, 3);
+	vdupq_lane_p16(arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_p16(arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_p16(arg_p16x8, 0);
+	vdup_laneq_p16(arg_p16x8, 7);
+	vdup_laneq_p16(arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_p16(arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_p16(arg_p16x8, 0);
+	vdupq_laneq_p16(arg_p16x8, 7);
+	vdupq_laneq_p16(arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_p16(arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_f64(float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vdup_lane_f64(arg_f64x1, 0);
+	vdup_lane_f64(arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_f64(arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_f64(arg_f64x1, 0);
+	vdupq_lane_f64(arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_f64(arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_f64(arg_f64x2, 0);
+	vdup_laneq_f64(arg_f64x2, 1);
+	vdup_laneq_f64(arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_f64(arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_f64(arg_f64x2, 0);
+	vdupq_laneq_f64(arg_f64x2, 1);
+	vdupq_laneq_f64(arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_f64(arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/set-vector-lane.c b/clang/test/Sema/aarch64-neon-immediate-ranges/set-vector-lane.c
new file mode 100644
index 00000000000000..3ab077ed562875
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/set-vector-lane.c
@@ -0,0 +1,162 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vsetq_lane_u8, vsetq_lane_u16, vsetq_lane_u32, vsetq_lane_u64 are
+// tesed under clang/test/Sema/arm-mve-immediates.c
+
+void test_set_vector_lane_u8(uint8x16_t arg_u8x16, uint8_t arg_u8, uint8x8_t arg_u8x8) {
+	vset_lane_u8(arg_u8, arg_u8x8, 0);
+	vset_lane_u8(arg_u8, arg_u8x8, 7);
+	vset_lane_u8(arg_u8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_u8(arg_u8, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_set_vector_lane_u16(uint16x4_t arg_u16x4, uint16_t arg_u16, uint16x8_t arg_u16x8) {
+	vset_lane_u16(arg_u16, arg_u16x4, 0);
+	vset_lane_u16(arg_u16, arg_u16x4, 3);
+	vset_lane_u16(arg_u16, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_u16(arg_u16, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_set_vector_lane_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4, uint32_t arg_u32) {
+	vset_lane_u32(arg_u32, arg_u32x2, 0);
+	vset_lane_u32(arg_u32, arg_u32x2, 1);
+	vset_lane_u32(arg_u32, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_u32(arg_u32, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_set_vector_lane_u64(uint64x2_t arg_u64x2, uint64x1_t arg_u64x1, uint64_t arg_u64) {
+	vset_lane_u64(arg_u64, arg_u64x1, 0);
+	vset_lane_u64(arg_u64, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_u64(arg_u64, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_set_vector_lane_p64(poly64_t arg_p64, poly64x1_t arg_p64x1, poly64x2_t arg_p64x2) {
+	vset_lane_p64(arg_p64, arg_p64x1, 0);
+	vset_lane_p64(arg_p64, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_p64(arg_p64, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_p64(arg_p64, arg_p64x2, 0);
+	vsetq_lane_p64(arg_p64, arg_p64x2, 1);
+	vsetq_lane_p64(arg_p64, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_p64(arg_p64, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_s8(int8x16_t arg_i8x16, int8x8_t arg_i8x8, int8_t arg_i8) {
+	vset_lane_s8(arg_i8, arg_i8x8, 0);
+	vset_lane_s8(arg_i8, arg_i8x8, 7);
+	vset_lane_s8(arg_i8, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_s8(arg_i8, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_s8(arg_i8, arg_i8x16, 0);
+	vsetq_lane_s8(arg_i8, arg_i8x16, 15);
+	vsetq_lane_s8(arg_i8, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_s8(arg_i8, arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_s16(int16x4_t arg_i16x4, int16_t arg_i16, int16x8_t arg_i16x8) {
+	vset_lane_s16(arg_i16, arg_i16x4, 0);
+	vset_lane_s16(arg_i16, arg_i16x4, 3);
+	vset_lane_s16(arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_s16(arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_s16(arg_i16, arg_i16x8, 0);
+	vsetq_lane_s16(arg_i16, arg_i16x8, 7);
+	vsetq_lane_s16(arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_s16(arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_s32(int32_t arg_i32, int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vset_lane_s32(arg_i32, arg_i32x2, 0);
+	vset_lane_s32(arg_i32, arg_i32x2, 1);
+	vset_lane_s32(arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_s32(arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_s32(arg_i32, arg_i32x4, 0);
+	vsetq_lane_s32(arg_i32, arg_i32x4, 3);
+	vsetq_lane_s32(arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_s32(arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_s64(int64_t arg_i64, int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vset_lane_s64(arg_i64, arg_i64x1, 0);
+	vset_lane_s64(arg_i64, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_s64(arg_i64, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_s64(arg_i64, arg_i64x2, 0);
+	vsetq_lane_s64(arg_i64, arg_i64x2, 1);
+	vsetq_lane_s64(arg_i64, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_s64(arg_i64, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_p8(poly8_t arg_p8, poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vset_lane_p8(arg_p8, arg_p8x8, 0);
+	vset_lane_p8(arg_p8, arg_p8x8, 7);
+	vset_lane_p8(arg_p8, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_p8(arg_p8, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_p8(arg_p8, arg_p8x16, 0);
+	vsetq_lane_p8(arg_p8, arg_p8x16, 15);
+	vsetq_lane_p8(arg_p8, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_p8(arg_p8, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_p16(poly16x4_t arg_p16x4, poly16_t arg_p16, poly16x8_t arg_p16x8) {
+	vset_lane_p16(arg_p16, arg_p16x4, 0);
+	vset_lane_p16(arg_p16, arg_p16x4, 3);
+	vset_lane_p16(arg_p16, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_p16(arg_p16, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_p16(arg_p16, arg_p16x8, 0);
+	vsetq_lane_p16(arg_p16, arg_p16x8, 7);
+	vsetq_lane_p16(arg_p16, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_p16(arg_p16, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_f16(float16x8_t arg_f16x8, float16x4_t arg_f16x4, float16_t arg_f16) {
+	vset_lane_f16(arg_f16, arg_f16x4, 0);
+	vset_lane_f16(arg_f16, arg_f16x4, 3);
+	vset_lane_f16(arg_f16, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_f16(arg_f16, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_f16(arg_f16, arg_f16x8, 0);
+	vsetq_lane_f16(arg_f16, arg_f16x8, 7);
+	vsetq_lane_f16(arg_f16, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_f16(arg_f16, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_f32(float32x2_t arg_f32x2, float32x4_t arg_f32x4, float32_t arg_f32) {
+	vset_lane_f32(arg_f32, arg_f32x2, 0);
+	vset_lane_f32(arg_f32, arg_f32x2, 1);
+	vset_lane_f32(arg_f32, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_f32(arg_f32, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_f32(arg_f32, arg_f32x4, 0);
+	vsetq_lane_f32(arg_f32, arg_f32x4, 3);
+	vsetq_lane_f32(arg_f32, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_f32(arg_f32, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_f64(float64x1_t arg_f64x1, float64x2_t arg_f64x2, float64_t arg_f64) {
+	vset_lane_f64(arg_f64, arg_f64x1, 0);
+	vset_lane_f64(arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_f64(arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_f64(arg_f64, arg_f64x2, 0);
+	vsetq_lane_f64(arg_f64, arg_f64x2, 1);
+	vsetq_lane_f64(arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_f64(arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/sqrdmlah-ranges.c b/clang/test/Sema/aarch64-neon-immediate-ranges/sqrdmlah-ranges.c
new file mode 100644
index 00000000000000..9da02914966295
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/sqrdmlah-ranges.c
@@ -0,0 +1,94 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -target-feature +v8.1a -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vqrdmlah_lane_s32, vqrdmlahq_lane_s32, vqrdmlah_laneq_s32, vqrdmlahq_laneq_s32, 
+// vqrdmlsh_lane_s32 are tested under clang/test/CodeGen/arm-neon-range-checks.c
+
+void test_saturating_multiply_accumulate_by_element_s16(int16x8_t arg_i16x8, int16_t arg_i16, int16x4_t arg_i16x4) {
+	vqrdmlah_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 0);
+	vqrdmlah_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 3);
+	vqrdmlah_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlah_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 0);
+	vqrdmlahq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 3);
+	vqrdmlahq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlah_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 0);
+	vqrdmlah_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 7);
+	vqrdmlah_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlah_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 0);
+	vqrdmlahq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 7);
+	vqrdmlahq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlsh_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 0);
+	vqrdmlsh_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 3);
+	vqrdmlsh_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlsh_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 0);
+	vqrdmlshq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 3);
+	vqrdmlshq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlsh_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 0);
+	vqrdmlsh_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 7);
+	vqrdmlsh_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlsh_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 0);
+	vqrdmlshq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 7);
+	vqrdmlshq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahh_lane_s16(arg_i16, arg_i16, arg_i16x4, 0);
+	vqrdmlahh_lane_s16(arg_i16, arg_i16, arg_i16x4, 3);
+	vqrdmlahh_lane_s16(arg_i16, arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahh_lane_s16(arg_i16, arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 0);
+	vqrdmlahh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 7);
+	vqrdmlahh_laneq_s16(arg_i16, arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshh_lane_s16(arg_i16, arg_i16, arg_i16x4, 0);
+	vqrdmlshh_lane_s16(arg_i16, arg_i16, arg_i16x4, 3);
+	vqrdmlshh_lane_s16(arg_i16, arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshh_lane_s16(arg_i16, arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 0);
+	vqrdmlshh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 7);
+	vqrdmlshh_laneq_s16(arg_i16, arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_saturating_multiply_accumulate_by_element_s32(int32x4_t arg_i32x4, int32_t arg_i32, int32x2_t arg_i32x2) {
+	vqrdmlahs_lane_s32(arg_i32, arg_i32, arg_i32x2, 0);
+	vqrdmlahs_lane_s32(arg_i32, arg_i32, arg_i32x2, 1);
+	vqrdmlahs_lane_s32(arg_i32, arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahs_lane_s32(arg_i32, arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 0);
+	vqrdmlahs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 3);
+	vqrdmlahs_laneq_s32(arg_i32, arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshs_lane_s32(arg_i32, arg_i32, arg_i32x2, 0);
+	vqrdmlshs_lane_s32(arg_i32, arg_i32, arg_i32x2, 1);
+	vqrdmlshs_lane_s32(arg_i32, arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshs_lane_s32(arg_i32, arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 0);
+	vqrdmlshs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 3);
+	vqrdmlshs_laneq_s32(arg_i32, arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-load.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-load.c
new file mode 100644
index 00000000000000..a11535acf0b4a7
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-load.c
@@ -0,0 +1,525 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+// the majority of s8,s16,s32 and s64 variants are tested under 
+// clang/test/Sema/aarch64-neon-ranges.c
+
+void test_vector_load_s64(int64x1x4_t arg_i64x1x4, int64x2_t arg_i64x2, int64_t *arg_i64_ptr,
+						  int64x2x3_t arg_i64x2x3, int64x1x2_t arg_i64x1x2, int64x2x2_t arg_i64x2x2,
+						  int64x2x4_t arg_i64x2x4, int64x1x3_t arg_i64x1x3, int64x1_t arg_i64x1) {
+	
+	vldap1_lane_s64(arg_i64_ptr, arg_i64x1, 0);
+	vldap1_lane_s64(arg_i64_ptr, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1_lane_s64(arg_i64_ptr, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1q_lane_s64(arg_i64_ptr, arg_i64x2, 0);
+	vldap1q_lane_s64(arg_i64_ptr, arg_i64x2, 1);
+	vldap1q_lane_s64(arg_i64_ptr, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1q_lane_s64(arg_i64_ptr, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1_lane_s64(arg_i64_ptr, arg_i64x1, 0);
+	vstl1_lane_s64(arg_i64_ptr, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1_lane_s64(arg_i64_ptr, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1q_lane_s64(arg_i64_ptr, arg_i64x2, 0);
+	vstl1q_lane_s64(arg_i64_ptr, arg_i64x2, 1);
+	vstl1q_lane_s64(arg_i64_ptr, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1q_lane_s64(arg_i64_ptr, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_u8(uint8x8x2_t arg_u8x8x2, uint8x16x2_t arg_u8x16x2, uint8x8x4_t arg_u8x8x4,
+						uint8x8_t arg_u8x8, uint8x8x3_t arg_u8x8x3, uint8x16_t arg_u8x16,
+						uint8x16x4_t arg_u8x16x4, uint8_t *arg_u8_ptr, uint8x16x3_t arg_u8x16x3) {
+	vld1_lane_u8(arg_u8_ptr, arg_u8x8, 0);
+	vld1_lane_u8(arg_u8_ptr, arg_u8x8, 7);
+	vld1_lane_u8(arg_u8_ptr, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_u8(arg_u8_ptr, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_u8(arg_u8_ptr, arg_u8x16, 0);
+	vld1q_lane_u8(arg_u8_ptr, arg_u8x16, 15);
+	vld1q_lane_u8(arg_u8_ptr, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_u8(arg_u8_ptr, arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_u8(arg_u8_ptr, arg_u8x8x2, 0);
+	vld2_lane_u8(arg_u8_ptr, arg_u8x8x2, 7);
+	vld2_lane_u8(arg_u8_ptr, arg_u8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_u8(arg_u8_ptr, arg_u8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 0);
+	vld2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 15);
+	vld2q_lane_u8(arg_u8_ptr, arg_u8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_u8(arg_u8_ptr, arg_u8x8x3, 0);
+	vld3_lane_u8(arg_u8_ptr, arg_u8x8x3, 7);
+	vld3_lane_u8(arg_u8_ptr, arg_u8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_u8(arg_u8_ptr, arg_u8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 0);
+	vld3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 15);
+	vld3q_lane_u8(arg_u8_ptr, arg_u8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_u8(arg_u8_ptr, arg_u8x8x4, 0);
+	vld4_lane_u8(arg_u8_ptr, arg_u8x8x4, 7);
+	vld4_lane_u8(arg_u8_ptr, arg_u8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_u8(arg_u8_ptr, arg_u8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 0);
+	vld4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 15);
+	vld4q_lane_u8(arg_u8_ptr, arg_u8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_u16(uint16x8x2_t arg_u16x8x2, uint16x8x4_t arg_u16x8x4, uint16x4x4_t arg_u16x4x4,
+						  uint16x4x2_t arg_u16x4x2, uint16x8_t arg_u16x8, uint16_t *arg_u16_ptr,
+						  uint16x8x3_t arg_u16x8x3, uint16x4_t arg_u16x4, uint16x4x3_t arg_u16x4x3) {
+	vld1_lane_u16(arg_u16_ptr, arg_u16x4, 0);
+	vld1_lane_u16(arg_u16_ptr, arg_u16x4, 3);
+	vld1_lane_u16(arg_u16_ptr, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_u16(arg_u16_ptr, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_u16(arg_u16_ptr, arg_u16x8, 0);
+	vld1q_lane_u16(arg_u16_ptr, arg_u16x8, 7);
+	vld1q_lane_u16(arg_u16_ptr, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_u16(arg_u16_ptr, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_u16(arg_u16_ptr, arg_u16x4x2, 0);
+	vld2_lane_u16(arg_u16_ptr, arg_u16x4x2, 3);
+	vld2_lane_u16(arg_u16_ptr, arg_u16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_u16(arg_u16_ptr, arg_u16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 0);
+	vld2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 7);
+	vld2q_lane_u16(arg_u16_ptr, arg_u16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_u16(arg_u16_ptr, arg_u16x4x3, 0);
+	vld3_lane_u16(arg_u16_ptr, arg_u16x4x3, 3);
+	vld3_lane_u16(arg_u16_ptr, arg_u16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_u16(arg_u16_ptr, arg_u16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 0);
+	vld3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 7);
+	vld3q_lane_u16(arg_u16_ptr, arg_u16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_u16(arg_u16_ptr, arg_u16x4x4, 0);
+	vld4_lane_u16(arg_u16_ptr, arg_u16x4x4, 3);
+	vld4_lane_u16(arg_u16_ptr, arg_u16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_u16(arg_u16_ptr, arg_u16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 0);
+	vld4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 7);
+	vld4q_lane_u16(arg_u16_ptr, arg_u16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_u32(uint32x2x3_t arg_u32x2x3, uint32x2_t arg_u32x2, uint32x2x4_t arg_u32x2x4,
+						  uint32x4_t arg_u32x4, uint32x4x2_t arg_u32x4x2, uint32x2x2_t arg_u32x2x2,
+						  void *arg_u32_ptr, uint32x4x4_t arg_u32x4x4, uint32x4x3_t arg_u32x4x3) {
+	vld1_lane_u32(arg_u32_ptr, arg_u32x2, 0);
+	vld1_lane_u32(arg_u32_ptr, arg_u32x2, 1);
+	vld1_lane_u32(arg_u32_ptr, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_u32(arg_u32_ptr, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_u32(arg_u32_ptr, arg_u32x4, 0);
+	vld1q_lane_u32(arg_u32_ptr, arg_u32x4, 3);
+	vld1q_lane_u32(arg_u32_ptr, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_u32(arg_u32_ptr, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_u32(arg_u32_ptr, arg_u32x2x2, 0);
+	vld2_lane_u32(arg_u32_ptr, arg_u32x2x2, 1);
+	vld2_lane_u32(arg_u32_ptr, arg_u32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_u32(arg_u32_ptr, arg_u32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 0);
+	vld2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 3);
+	vld2q_lane_u32(arg_u32_ptr, arg_u32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_u32(arg_u32_ptr, arg_u32x2x3, 0);
+	vld3_lane_u32(arg_u32_ptr, arg_u32x2x3, 1);
+	vld3_lane_u32(arg_u32_ptr, arg_u32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_u32(arg_u32_ptr, arg_u32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 0);
+	vld3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 3);
+	vld3q_lane_u32(arg_u32_ptr, arg_u32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_u32(arg_u32_ptr, arg_u32x2x4, 0);
+	vld4_lane_u32(arg_u32_ptr, arg_u32x2x4, 1);
+	vld4_lane_u32(arg_u32_ptr, arg_u32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_u32(arg_u32_ptr, arg_u32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 0);
+	vld4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 3);
+	vld4q_lane_u32(arg_u32_ptr, arg_u32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_u64(uint64x2x2_t arg_u64x2x2, uint64x1x2_t arg_u64x1x2, uint64x2x3_t arg_u64x2x3,
+						  uint64x1_t arg_u64x1, uint64x1x4_t arg_u64x1x4, uint64x1x3_t arg_u64x1x3,
+						  uint64_t *arg_u64_ptr, uint64x2_t arg_u64x2, uint64x2x4_t arg_u64x2x4) {
+	vld1_lane_u64(arg_u64_ptr, arg_u64x1, 0);
+	vld1_lane_u64(arg_u64_ptr, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_u64(arg_u64_ptr, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_u64(arg_u64_ptr, arg_u64x2, 0);
+	vld1q_lane_u64(arg_u64_ptr, arg_u64x2, 1);
+	vld1q_lane_u64(arg_u64_ptr, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_u64(arg_u64_ptr, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1_lane_u64(arg_u64_ptr, arg_u64x1, 0);
+	vldap1_lane_u64(arg_u64_ptr, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1_lane_u64(arg_u64_ptr, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1q_lane_u64(arg_u64_ptr, arg_u64x2, 0);
+	vldap1q_lane_u64(arg_u64_ptr, arg_u64x2, 1);
+	vldap1q_lane_u64(arg_u64_ptr, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1q_lane_u64(arg_u64_ptr, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1_lane_u64(arg_u64_ptr, arg_u64x1, 0);
+	vstl1_lane_u64(arg_u64_ptr, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1_lane_u64(arg_u64_ptr, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1q_lane_u64(arg_u64_ptr, arg_u64x2, 0);
+	vstl1q_lane_u64(arg_u64_ptr, arg_u64x2, 1);
+	vstl1q_lane_u64(arg_u64_ptr, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1q_lane_u64(arg_u64_ptr, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_u64(arg_u64_ptr, arg_u64x1x2, 0);
+	vld2_lane_u64(arg_u64_ptr, arg_u64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_u64(arg_u64_ptr, arg_u64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 0);
+	vld2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 1);
+	vld2q_lane_u64(arg_u64_ptr, arg_u64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_u64(arg_u64_ptr, arg_u64x1x3, 0);
+	vld3_lane_u64(arg_u64_ptr, arg_u64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_u64(arg_u64_ptr, arg_u64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 0);
+	vld3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 1);
+	vld3q_lane_u64(arg_u64_ptr, arg_u64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_u64(arg_u64_ptr, arg_u64x1x4, 0);
+	vld4_lane_u64(arg_u64_ptr, arg_u64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_u64(arg_u64_ptr, arg_u64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 0);
+	vld4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 1);
+	vld4q_lane_u64(arg_u64_ptr, arg_u64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_p64(poly64_t *arg_p64_ptr, poly64x2x2_t arg_p64x2x2, poly64x1x2_t arg_p64x1x2,
+						  poly64x2x4_t arg_p64x2x4, poly64x1x3_t arg_p64x1x3, poly64x2x3_t arg_p64x2x3,
+						  poly64x1_t arg_p64x1, poly64x2_t arg_p64x2, poly64x1x4_t arg_p64x1x4) {
+	vld1_lane_p64(arg_p64_ptr, arg_p64x1, 0);
+	vld1_lane_p64(arg_p64_ptr, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_p64(arg_p64_ptr, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_p64(arg_p64_ptr, arg_p64x2, 0);
+	vld1q_lane_p64(arg_p64_ptr, arg_p64x2, 1);
+	vld1q_lane_p64(arg_p64_ptr, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_p64(arg_p64_ptr, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1_lane_p64(arg_p64_ptr, arg_p64x1, 0);
+	vldap1_lane_p64(arg_p64_ptr, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1_lane_p64(arg_p64_ptr, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1q_lane_p64(arg_p64_ptr, arg_p64x2, 0);
+	vldap1q_lane_p64(arg_p64_ptr, arg_p64x2, 1);
+	vldap1q_lane_p64(arg_p64_ptr, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1q_lane_p64(arg_p64_ptr, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1_lane_p64(arg_p64_ptr, arg_p64x1, 0);
+	vstl1_lane_p64(arg_p64_ptr, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1_lane_p64(arg_p64_ptr, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1q_lane_p64(arg_p64_ptr, arg_p64x2, 0);
+	vstl1q_lane_p64(arg_p64_ptr, arg_p64x2, 1);
+	vstl1q_lane_p64(arg_p64_ptr, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1q_lane_p64(arg_p64_ptr, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_p64(arg_p64_ptr, arg_p64x1x2, 0);
+	vld2_lane_p64(arg_p64_ptr, arg_p64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_p64(arg_p64_ptr, arg_p64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 0);
+	vld2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 1);
+	vld2q_lane_p64(arg_p64_ptr, arg_p64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_p64(arg_p64_ptr, arg_p64x1x3, 0);
+	vld3_lane_p64(arg_p64_ptr, arg_p64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_p64(arg_p64_ptr, arg_p64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 0);
+	vld3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 1);
+	vld3q_lane_p64(arg_p64_ptr, arg_p64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_p64(arg_p64_ptr, arg_p64x1x4, 0);
+	vld4_lane_p64(arg_p64_ptr, arg_p64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_p64(arg_p64_ptr, arg_p64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 0);
+	vld4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 1);
+	vld4q_lane_p64(arg_p64_ptr, arg_p64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_f16(float16_t *arg_f16_ptr, float16x8_t arg_f16x8, float16x8x2_t arg_f16x8x2,
+						  float16x8x3_t arg_f16x8x3, float16x4x4_t arg_f16x4x4, float16x8x4_t arg_f16x8x4,
+						  float16x4x2_t arg_f16x4x2, float16x4_t arg_f16x4, float16x4x3_t arg_f16x4x3) {
+	vld1_lane_f16(arg_f16_ptr, arg_f16x4, 0);
+	vld1_lane_f16(arg_f16_ptr, arg_f16x4, 3);
+	vld1_lane_f16(arg_f16_ptr, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_f16(arg_f16_ptr, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_f16(arg_f16_ptr, arg_f16x8, 0);
+	vld1q_lane_f16(arg_f16_ptr, arg_f16x8, 7);
+	vld1q_lane_f16(arg_f16_ptr, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_f16(arg_f16_ptr, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_f16(arg_f16_ptr, arg_f16x4x2, 0);
+	vld2_lane_f16(arg_f16_ptr, arg_f16x4x2, 3);
+	vld2_lane_f16(arg_f16_ptr, arg_f16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_f16(arg_f16_ptr, arg_f16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 0);
+	vld2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 7);
+	vld2q_lane_f16(arg_f16_ptr, arg_f16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_f16(arg_f16_ptr, arg_f16x4x3, 0);
+	vld3_lane_f16(arg_f16_ptr, arg_f16x4x3, 3);
+	vld3_lane_f16(arg_f16_ptr, arg_f16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_f16(arg_f16_ptr, arg_f16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 0);
+	vld3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 7);
+	vld3q_lane_f16(arg_f16_ptr, arg_f16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_f16(arg_f16_ptr, arg_f16x4x4, 0);
+	vld4_lane_f16(arg_f16_ptr, arg_f16x4x4, 3);
+	vld4_lane_f16(arg_f16_ptr, arg_f16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_f16(arg_f16_ptr, arg_f16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 0);
+	vld4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 7);
+	vld4q_lane_f16(arg_f16_ptr, arg_f16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_f32(float32_t *arg_f32_ptr, float32x4x3_t arg_f32x4x3, float32x2x4_t arg_f32x2x4,
+						  float32x4x4_t arg_f32x4x4, float32x2x3_t arg_f32x2x3, float32x2x2_t arg_f32x2x2,
+						  float32x4x2_t arg_f32x4x2, float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vld1_lane_f32(arg_f32_ptr, arg_f32x2, 0);
+	vld1_lane_f32(arg_f32_ptr, arg_f32x2, 1);
+	vld1_lane_f32(arg_f32_ptr, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_f32(arg_f32_ptr, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_f32(arg_f32_ptr, arg_f32x4, 0);
+	vld1q_lane_f32(arg_f32_ptr, arg_f32x4, 3);
+	vld1q_lane_f32(arg_f32_ptr, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_f32(arg_f32_ptr, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_f32(arg_f32_ptr, arg_f32x2x2, 0);
+	vld2_lane_f32(arg_f32_ptr, arg_f32x2x2, 1);
+	vld2_lane_f32(arg_f32_ptr, arg_f32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_f32(arg_f32_ptr, arg_f32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 0);
+	vld2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 3);
+	vld2q_lane_f32(arg_f32_ptr, arg_f32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_f32(arg_f32_ptr, arg_f32x2x3, 0);
+	vld3_lane_f32(arg_f32_ptr, arg_f32x2x3, 1);
+	vld3_lane_f32(arg_f32_ptr, arg_f32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_f32(arg_f32_ptr, arg_f32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 0);
+	vld3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 3);
+	vld3q_lane_f32(arg_f32_ptr, arg_f32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_f32(arg_f32_ptr, arg_f32x2x4, 0);
+	vld4_lane_f32(arg_f32_ptr, arg_f32x2x4, 1);
+	vld4_lane_f32(arg_f32_ptr, arg_f32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_f32(arg_f32_ptr, arg_f32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 0);
+	vld4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 3);
+	vld4q_lane_f32(arg_f32_ptr, arg_f32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_p8(poly8x16_t arg_p8x16, poly8x8x2_t arg_p8x8x2, poly8x16x4_t arg_p8x16x4,
+						 poly8_t *arg_p8_ptr, poly8x8_t arg_p8x8, poly8x8x4_t arg_p8x8x4,
+						 poly8x16x2_t arg_p8x16x2, poly8x8x3_t arg_p8x8x3, poly8x16x3_t arg_p8x16x3) {
+	vld1_lane_p8(arg_p8_ptr, arg_p8x8, 0);
+	vld1_lane_p8(arg_p8_ptr, arg_p8x8, 7);
+	vld1_lane_p8(arg_p8_ptr, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_p8(arg_p8_ptr, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_p8(arg_p8_ptr, arg_p8x16, 0);
+	vld1q_lane_p8(arg_p8_ptr, arg_p8x16, 15);
+	vld1q_lane_p8(arg_p8_ptr, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_p8(arg_p8_ptr, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_p8(arg_p8_ptr, arg_p8x8x2, 0);
+	vld2_lane_p8(arg_p8_ptr, arg_p8x8x2, 7);
+	vld2_lane_p8(arg_p8_ptr, arg_p8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_p8(arg_p8_ptr, arg_p8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 0);
+	vld2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 15);
+	vld2q_lane_p8(arg_p8_ptr, arg_p8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_p8(arg_p8_ptr, arg_p8x8x3, 0);
+	vld3_lane_p8(arg_p8_ptr, arg_p8x8x3, 7);
+	vld3_lane_p8(arg_p8_ptr, arg_p8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_p8(arg_p8_ptr, arg_p8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 0);
+	vld3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 15);
+	vld3q_lane_p8(arg_p8_ptr, arg_p8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_p8(arg_p8_ptr, arg_p8x8x4, 0);
+	vld4_lane_p8(arg_p8_ptr, arg_p8x8x4, 7);
+	vld4_lane_p8(arg_p8_ptr, arg_p8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_p8(arg_p8_ptr, arg_p8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 0);
+	vld4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 15);
+	vld4q_lane_p8(arg_p8_ptr, arg_p8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_p16(poly16x8x4_t arg_p16x8x4, poly16x8_t arg_p16x8, poly16x4x4_t arg_p16x4x4,
+						  poly16x8x3_t arg_p16x8x3, poly16_t *arg_p16_ptr, poly16x4_t arg_p16x4,
+						  poly16x8x2_t arg_p16x8x2, poly16x4x2_t arg_p16x4x2, poly16x4x3_t arg_p16x4x3) {
+	vld1_lane_p16(arg_p16_ptr, arg_p16x4, 0);
+	vld1_lane_p16(arg_p16_ptr, arg_p16x4, 3);
+	vld1_lane_p16(arg_p16_ptr, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_p16(arg_p16_ptr, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_p16(arg_p16_ptr, arg_p16x8, 0);
+	vld1q_lane_p16(arg_p16_ptr, arg_p16x8, 7);
+	vld1q_lane_p16(arg_p16_ptr, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_p16(arg_p16_ptr, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_p16(arg_p16_ptr, arg_p16x4x2, 0);
+	vld2_lane_p16(arg_p16_ptr, arg_p16x4x2, 3);
+	vld2_lane_p16(arg_p16_ptr, arg_p16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_p16(arg_p16_ptr, arg_p16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 0);
+	vld2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 7);
+	vld2q_lane_p16(arg_p16_ptr, arg_p16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_p16(arg_p16_ptr, arg_p16x4x3, 0);
+	vld3_lane_p16(arg_p16_ptr, arg_p16x4x3, 3);
+	vld3_lane_p16(arg_p16_ptr, arg_p16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_p16(arg_p16_ptr, arg_p16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 0);
+	vld3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 7);
+	vld3q_lane_p16(arg_p16_ptr, arg_p16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_p16(arg_p16_ptr, arg_p16x4x4, 0);
+	vld4_lane_p16(arg_p16_ptr, arg_p16x4x4, 3);
+	vld4_lane_p16(arg_p16_ptr, arg_p16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_p16(arg_p16_ptr, arg_p16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 0);
+	vld4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 7);
+	vld4q_lane_p16(arg_p16_ptr, arg_p16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_f64(float64x1_t arg_f64x1, float64x1x2_t arg_f64x1x2, float64_t* arg_f64_ptr,
+						  float64x2x3_t arg_f64x2x3, float64x2x4_t arg_f64x2x4, float64x2x2_t arg_f64x2x2,
+						  float64x2_t arg_f64x2, float64x1x3_t arg_f64x1x3, float64x1x4_t arg_f64x1x4) {
+	vld1_lane_f64(arg_f64_ptr, arg_f64x1, 0);
+	vld1_lane_f64(arg_f64_ptr, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_f64(arg_f64_ptr, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_f64(arg_f64_ptr, arg_f64x2, 0);
+	vld1q_lane_f64(arg_f64_ptr, arg_f64x2, 1);
+	vld1q_lane_f64(arg_f64_ptr, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_f64(arg_f64_ptr, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1_lane_f64(arg_f64_ptr, arg_f64x1, 0);
+	vldap1_lane_f64(arg_f64_ptr, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1_lane_f64(arg_f64_ptr, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1q_lane_f64(arg_f64_ptr, arg_f64x2, 0);
+	vldap1q_lane_f64(arg_f64_ptr, arg_f64x2, 1);
+	vldap1q_lane_f64(arg_f64_ptr, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1q_lane_f64(arg_f64_ptr, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1_lane_f64(arg_f64_ptr, arg_f64x1, 0);
+	vstl1_lane_f64(arg_f64_ptr, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1_lane_f64(arg_f64_ptr, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1q_lane_f64(arg_f64_ptr, arg_f64x2, 0);
+	vstl1q_lane_f64(arg_f64_ptr, arg_f64x2, 1);
+	vstl1q_lane_f64(arg_f64_ptr, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1q_lane_f64(arg_f64_ptr, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_f64(arg_f64_ptr, arg_f64x1x2, 0);
+	vld2_lane_f64(arg_f64_ptr, arg_f64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_f64(arg_f64_ptr, arg_f64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 0);
+	vld2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 1);
+	vld2q_lane_f64(arg_f64_ptr, arg_f64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_f64(arg_f64_ptr, arg_f64x1x3, 0);
+	vld3_lane_f64(arg_f64_ptr, arg_f64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_f64(arg_f64_ptr, arg_f64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 0);
+	vld3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 1);
+	vld3q_lane_f64(arg_f64_ptr, arg_f64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_f64(arg_f64_ptr, arg_f64x1x4, 0);
+	vld4_lane_f64(arg_f64_ptr, arg_f64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_f64(arg_f64_ptr, arg_f64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 0);
+	vld4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 1);
+	vld4q_lane_f64(arg_f64_ptr, arg_f64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-accumulate-by-scalar.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-accumulate-by-scalar.c
new file mode 100644
index 00000000000000..a306200c62f1c0
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-accumulate-by-scalar.c
@@ -0,0 +1,161 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vmla_lane_s32, vmlaq_lane_s32, vmla_laneq_s32, vmlaq_laneq_s32, vmlal_lane_s32
+// vmlal_high_lane_s32, vmlal_laneq_s32, vmlal_high_laneq_s32 are tested under
+// clang/test/CodeGen/arm-neon-range-checks.c
+
+void test_vector_multiply_accumulate_by_scalar_s16(int32x4_t arg_i32x4, int16x8_t arg_i16x8, int16x4_t arg_i16x4) {
+	vmla_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 0);
+	vmla_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 3);
+	vmla_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 0);
+	vmlaq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 3);
+	vmlaq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmla_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 0);
+	vmla_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 7);
+	vmla_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 0);
+	vmlaq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 7);
+	vmlaq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 0);
+	vmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 3);
+	vmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 0);
+	vmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 3);
+	vmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 0);
+	vmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 7);
+	vmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 0);
+	vmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 7);
+	vmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_accumulate_by_scalar_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8, uint32x4_t arg_u32x4) {
+	vmla_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 0);
+	vmla_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 3);
+	vmla_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 0);
+	vmlaq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 3);
+	vmlaq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmla_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 0);
+	vmla_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 7);
+	vmla_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 0);
+	vmlaq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 7);
+	vmlaq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 0);
+	vmlal_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 3);
+	vmlal_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 0);
+	vmlal_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 3);
+	vmlal_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 0);
+	vmlal_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 7);
+	vmlal_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 0);
+	vmlal_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 7);
+	vmlal_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_accumulate_by_scalar_u32(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vmla_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 0);
+	vmla_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 1);
+	vmla_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 0);
+	vmlaq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 1);
+	vmlaq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmla_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 0);
+	vmla_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 3);
+	vmla_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 0);
+	vmlaq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 3);
+	vmlaq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 0);
+	vmlal_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 1);
+	vmlal_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 0);
+	vmlal_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 1);
+	vmlal_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 0);
+	vmlal_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 3);
+	vmlal_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 0);
+	vmlal_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 3);
+	vmlal_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_accumulate_by_scalar_f32(float32x4_t arg_f32x4, float32x2_t arg_f32x2) {
+	vmla_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 0);
+	vmla_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 1);
+	vmla_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 0);
+	vmlaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 1);
+	vmlaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmla_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 0);
+	vmla_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 3);
+	vmla_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 0);
+	vmlaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 3);
+	vmlaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar-and-widen.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar-and-widen.c
new file mode 100644
index 00000000000000..1363a4ec453347
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar-and-widen.c
@@ -0,0 +1,78 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vmull_lane_s32, vmull_high_lane_s32, vmull_laneq_s32, vmull_high_laneq_s32
+// are tested under clang/test/CodeGen/arm-neon-range-checks.c
+
+void test_vector_multiply_by_scalar_and_widen_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vmull_lane_s16(arg_i16x4, arg_i16x4, 0);
+	vmull_lane_s16(arg_i16x4, arg_i16x4, 3);
+	vmull_lane_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_lane_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_lane_s16(arg_i16x8, arg_i16x4, 0);
+	vmull_high_lane_s16(arg_i16x8, arg_i16x4, 3);
+	vmull_high_lane_s16(arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_lane_s16(arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_laneq_s16(arg_i16x4, arg_i16x8, 0);
+	vmull_laneq_s16(arg_i16x4, arg_i16x8, 7);
+	vmull_laneq_s16(arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_laneq_s16(arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_laneq_s16(arg_i16x8, arg_i16x8, 0);
+	vmull_high_laneq_s16(arg_i16x8, arg_i16x8, 7);
+	vmull_high_laneq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_laneq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+
+void test_vector_multiply_by_scalar_and_widen_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vmull_lane_u16(arg_u16x4, arg_u16x4, 0);
+	vmull_lane_u16(arg_u16x4, arg_u16x4, 3);
+	vmull_lane_u16(arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_lane_u16(arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_lane_u16(arg_u16x8, arg_u16x4, 0);
+	vmull_high_lane_u16(arg_u16x8, arg_u16x4, 3);
+	vmull_high_lane_u16(arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_lane_u16(arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_laneq_u16(arg_u16x4, arg_u16x8, 0);
+	vmull_laneq_u16(arg_u16x4, arg_u16x8, 7);
+	vmull_laneq_u16(arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_laneq_u16(arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_laneq_u16(arg_u16x8, arg_u16x8, 0);
+	vmull_high_laneq_u16(arg_u16x8, arg_u16x8, 7);
+	vmull_high_laneq_u16(arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_laneq_u16(arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_and_widen_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vmull_lane_u32(arg_u32x2, arg_u32x2, 0);
+	vmull_lane_u32(arg_u32x2, arg_u32x2, 1);
+	vmull_lane_u32(arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_lane_u32(arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_lane_u32(arg_u32x4, arg_u32x2, 0);
+	vmull_high_lane_u32(arg_u32x4, arg_u32x2, 1);
+	vmull_high_lane_u32(arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_lane_u32(arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_laneq_u32(arg_u32x2, arg_u32x4, 0);
+	vmull_laneq_u32(arg_u32x2, arg_u32x4, 3);
+	vmull_laneq_u32(arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_laneq_u32(arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_laneq_u32(arg_u32x4, arg_u32x4, 0);
+	vmull_high_laneq_u32(arg_u32x4, arg_u32x4, 3);
+	vmull_high_laneq_u32(arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_laneq_u32(arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar.c
new file mode 100644
index 00000000000000..8d762f32b35af5
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar.c
@@ -0,0 +1,158 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vmul_lane_f64, vmul_laneq_f64
+// are tested under clang/test/aarch64-neon-ranges.c
+
+void test_vector_multiply_by_scalar_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vmul_lane_s16(arg_i16x4, arg_i16x4, 0);
+	vmul_lane_s16(arg_i16x4, arg_i16x4, 3);
+	vmul_lane_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_s16(arg_i16x8, arg_i16x4, 0);
+	vmulq_lane_s16(arg_i16x8, arg_i16x4, 3);
+	vmulq_lane_s16(arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_s16(arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_s16(arg_i16x4, arg_i16x8, 0);
+	vmul_laneq_s16(arg_i16x4, arg_i16x8, 7);
+	vmul_laneq_s16(arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_s16(arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_s16(arg_i16x8, arg_i16x8, 0);
+	vmulq_laneq_s16(arg_i16x8, arg_i16x8, 7);
+	vmulq_laneq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vmul_lane_s32(arg_i32x2, arg_i32x2, 0);
+	vmul_lane_s32(arg_i32x2, arg_i32x2, 1);
+	vmul_lane_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_s32(arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_s32(arg_i32x4, arg_i32x2, 0);
+	vmulq_lane_s32(arg_i32x4, arg_i32x2, 1);
+	vmulq_lane_s32(arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_s32(arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_s32(arg_i32x2, arg_i32x4, 0);
+	vmul_laneq_s32(arg_i32x2, arg_i32x4, 3);
+	vmul_laneq_s32(arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_s32(arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_s32(arg_i32x4, arg_i32x4, 0);
+	vmulq_laneq_s32(arg_i32x4, arg_i32x4, 3);
+	vmulq_laneq_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_s32(arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vmul_lane_u16(arg_u16x4, arg_u16x4, 0);
+	vmul_lane_u16(arg_u16x4, arg_u16x4, 3);
+	vmul_lane_u16(arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_u16(arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_u16(arg_u16x8, arg_u16x4, 0);
+	vmulq_lane_u16(arg_u16x8, arg_u16x4, 3);
+	vmulq_lane_u16(arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_u16(arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_u16(arg_u16x4, arg_u16x8, 0);
+	vmul_laneq_u16(arg_u16x4, arg_u16x8, 7);
+	vmul_laneq_u16(arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_u16(arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_u16(arg_u16x8, arg_u16x8, 0);
+	vmulq_laneq_u16(arg_u16x8, arg_u16x8, 7);
+	vmulq_laneq_u16(arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_u16(arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vmul_lane_u32(arg_u32x2, arg_u32x2, 0);
+	vmul_lane_u32(arg_u32x2, arg_u32x2, 1);
+	vmul_lane_u32(arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_u32(arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_u32(arg_u32x4, arg_u32x2, 0);
+	vmulq_lane_u32(arg_u32x4, arg_u32x2, 1);
+	vmulq_lane_u32(arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_u32(arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_u32(arg_u32x2, arg_u32x4, 0);
+	vmul_laneq_u32(arg_u32x2, arg_u32x4, 3);
+	vmul_laneq_u32(arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_u32(arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_u32(arg_u32x4, arg_u32x4, 0);
+	vmulq_laneq_u32(arg_u32x4, arg_u32x4, 3);
+	vmulq_laneq_u32(arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_u32(arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_f32(float32_t arg_f32, float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vmul_lane_f32(arg_f32x2, arg_f32x2, 0);
+	vmul_lane_f32(arg_f32x2, arg_f32x2, 1);
+	vmul_lane_f32(arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_f32(arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_f32(arg_f32x4, arg_f32x2, 0);
+	vmulq_lane_f32(arg_f32x4, arg_f32x2, 1);
+	vmulq_lane_f32(arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_f32(arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmuls_lane_f32(arg_f32, arg_f32x2, 0);
+	vmuls_lane_f32(arg_f32, arg_f32x2, 1);
+	vmuls_lane_f32(arg_f32, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmuls_lane_f32(arg_f32, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_f32(arg_f32x2, arg_f32x4, 0);
+	vmul_laneq_f32(arg_f32x2, arg_f32x4, 3);
+	vmul_laneq_f32(arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_f32(arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_f32(arg_f32x4, arg_f32x4, 0);
+	vmulq_laneq_f32(arg_f32x4, arg_f32x4, 3);
+	vmulq_laneq_f32(arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_f32(arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmuls_laneq_f32(arg_f32, arg_f32x4, 0);
+	vmuls_laneq_f32(arg_f32, arg_f32x4, 3);
+	vmuls_laneq_f32(arg_f32, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmuls_laneq_f32(arg_f32, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_f64(float64x2_t arg_f64x2, float64_t arg_f64, float64x1_t arg_f64x1) {
+	vmul_lane_f64(arg_f64x1, arg_f64x1, 0);
+	vmul_lane_f64(arg_f64x1, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_f64(arg_f64x1, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_f64(arg_f64x2, arg_f64x1, 0);
+	vmulq_lane_f64(arg_f64x2, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_f64(arg_f64x2, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmuld_lane_f64(arg_f64, arg_f64x1, 0);
+	vmuld_lane_f64(arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmuld_lane_f64(arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_f64(arg_f64x2, arg_f64x2, 0);
+	vmulq_laneq_f64(arg_f64x2, arg_f64x2, 1);
+	vmulq_laneq_f64(arg_f64x2, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_f64(arg_f64x2, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmuld_laneq_f64(arg_f64, arg_f64x2, 0);
+	vmuld_laneq_f64(arg_f64, arg_f64x2, 1);
+	vmuld_laneq_f64(arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmuld_laneq_f64(arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-subtract-by-scalar.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-subtract-by-scalar.c
new file mode 100644
index 00000000000000..81194427b3b792
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-subtract-by-scalar.c
@@ -0,0 +1,161 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// clang/test/CodeGen/arm-neon-range-checks.c includes tests for:
+// vmls_lane_s32, vmlsq_lane_s3, vmls_laneq_s32, vmlsq_laneq_s32
+// vmlsl_lane_s32, vmlsl_high_lane_s32, vmlsl_laneq_s32, vmlsl_high_laneq_s32
+
+void test_vector_multiply_subtract_by_scalar_s16(int16x8_t arg_i16x8, int16x4_t arg_i16x4, int32x4_t arg_i32x4) {
+	vmls_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 0);
+	vmls_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 3);
+	vmls_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 0);
+	vmlsq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 3);
+	vmlsq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmls_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 0);
+	vmls_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 7);
+	vmls_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 0);
+	vmlsq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 7);
+	vmlsq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 0);
+	vmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 3);
+	vmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 0);
+	vmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 3);
+	vmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 0);
+	vmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 7);
+	vmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 0);
+	vmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 7);
+	vmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_subtract_by_scalar_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4, uint32x4_t arg_u32x4) {
+	vmls_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 0);
+	vmls_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 3);
+	vmls_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 0);
+	vmlsq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 3);
+	vmlsq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmls_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 0);
+	vmls_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 7);
+	vmls_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 0);
+	vmlsq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 7);
+	vmlsq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 0);
+	vmlsl_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 3);
+	vmlsl_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 0);
+	vmlsl_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 3);
+	vmlsl_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 0);
+	vmlsl_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 7);
+	vmlsl_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 0);
+	vmlsl_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 7);
+	vmlsl_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_subtract_by_scalar_u32(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vmls_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 0);
+	vmls_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 1);
+	vmls_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 0);
+	vmlsq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 1);
+	vmlsq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmls_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 0);
+	vmls_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 3);
+	vmls_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 0);
+	vmlsq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 3);
+	vmlsq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 0);
+	vmlsl_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 1);
+	vmlsl_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 0);
+	vmlsl_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 1);
+	vmlsl_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 0);
+	vmlsl_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 3);
+	vmlsl_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 0);
+	vmlsl_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 3);
+	vmlsl_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_subtract_by_scalar_f32(float32x4_t arg_f32x4, float32x2_t arg_f32x2) {
+	vmls_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 0);
+	vmls_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 1);
+	vmls_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 0);
+	vmlsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 1);
+	vmlsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmls_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 0);
+	vmls_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 3);
+	vmls_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 0);
+	vmlsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 3);
+	vmlsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-left.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-left.c
new file mode 100644
index 00000000000000..1def72fc843d9b
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-left.c
@@ -0,0 +1,542 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// Widening left-shifts should have a range of 0..(sizeinbits(arg)-1), this range has had
+// to be weakened to 0..((sizeinbits(arg)*2)-1) due to a use of vshll_n_s16 with an
+// out-of-bounds immediate in the defintiion of vcvt_f32_bf16. As a result, the upper bounds
+// of widening left-shift intrinsics are not currently tested here.
+
+void test_vector_shift_left_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vshl_n_s8(arg_i8x8, 0);
+	vshl_n_s8(arg_i8x8, 7);
+	vshl_n_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_s8(arg_i8x16, 0);
+	vshlq_n_s8(arg_i8x16, 7);
+	vshlq_n_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_s8(arg_i8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vshl_n_s16(arg_i16x4, 0);
+	vshl_n_s16(arg_i16x4, 15);
+	vshl_n_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_s16(arg_i16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_s16(arg_i16x8, 0);
+	vshlq_n_s16(arg_i16x8, 15);
+	vshlq_n_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_s16(arg_i16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vshl_n_s32(arg_i32x2, 0);
+	vshl_n_s32(arg_i32x2, 31);
+	vshl_n_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_s32(arg_i32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_s32(arg_i32x4, 0);
+	vshlq_n_s32(arg_i32x4, 31);
+	vshlq_n_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_s32(arg_i32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_s64(int64_t arg_i64, int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vshl_n_s64(arg_i64x1, 0);
+	vshl_n_s64(arg_i64x1, 63);
+	vshl_n_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_s64(arg_i64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_s64(arg_i64x2, 0);
+	vshlq_n_s64(arg_i64x2, 63);
+	vshlq_n_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_s64(arg_i64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshld_n_s64(arg_i64, 0);
+	vshld_n_s64(arg_i64, 63);
+	vshld_n_s64(arg_i64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshld_n_s64(arg_i64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vshl_n_u8(arg_u8x8, 0);
+	vshl_n_u8(arg_u8x8, 7);
+	vshl_n_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_u8(arg_u8x16, 0);
+	vshlq_n_u8(arg_u8x16, 7);
+	vshlq_n_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_u8(arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vshl_n_u16(arg_u16x4, 0);
+	vshl_n_u16(arg_u16x4, 15);
+	vshl_n_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_u16(arg_u16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_u16(arg_u16x8, 0);
+	vshlq_n_u16(arg_u16x8, 15);
+	vshlq_n_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_u16(arg_u16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vshl_n_u32(arg_u32x2, 0);
+	vshl_n_u32(arg_u32x2, 31);
+	vshl_n_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_u32(arg_u32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_u32(arg_u32x4, 0);
+	vshlq_n_u32(arg_u32x4, 31);
+	vshlq_n_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_u32(arg_u32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_u64(uint64x1_t arg_u64x1, uint64_t arg_u64, uint64x2_t arg_u64x2) {
+	vshl_n_u64(arg_u64x1, 0);
+	vshl_n_u64(arg_u64x1, 63);
+	vshl_n_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_u64(arg_u64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_u64(arg_u64x2, 0);
+	vshlq_n_u64(arg_u64x2, 63);
+	vshlq_n_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_u64(arg_u64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshld_n_u64(arg_u64, 0);
+	vshld_n_u64(arg_u64, 63);
+	vshld_n_u64(arg_u64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshld_n_u64(arg_u64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16, int8_t arg_i8) {
+	vqshl_n_s8(arg_i8x8, 0);
+	vqshl_n_s8(arg_i8x8, 7);
+	vqshl_n_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_s8(arg_i8x16, 0);
+	vqshlq_n_s8(arg_i8x16, 7);
+	vqshlq_n_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_s8(arg_i8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlb_n_s8(arg_i8, 0);
+	vqshlb_n_s8(arg_i8, 7);
+	vqshlb_n_s8(arg_i8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlb_n_s8(arg_i8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlu_n_s8(arg_i8x8, 0);
+	vqshlu_n_s8(arg_i8x8, 7);
+	vqshlu_n_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlu_n_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshluq_n_s8(arg_i8x16, 0);
+	vqshluq_n_s8(arg_i8x16, 7);
+	vqshluq_n_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshluq_n_s8(arg_i8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlub_n_s8(arg_i8, 0);
+	vqshlub_n_s8(arg_i8, 7);
+	vqshlub_n_s8(arg_i8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlub_n_s8(arg_i8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_s16(int16x4_t arg_i16x4, int16_t arg_i16, int16x8_t arg_i16x8) {
+	vqshl_n_s16(arg_i16x4, 0);
+	vqshl_n_s16(arg_i16x4, 15);
+	vqshl_n_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_s16(arg_i16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_s16(arg_i16x8, 0);
+	vqshlq_n_s16(arg_i16x8, 15);
+	vqshlq_n_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_s16(arg_i16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlh_n_s16(arg_i16, 0);
+	vqshlh_n_s16(arg_i16, 15);
+	vqshlh_n_s16(arg_i16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlh_n_s16(arg_i16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlu_n_s16(arg_i16x4, 0);
+	vqshlu_n_s16(arg_i16x4, 15);
+	vqshlu_n_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlu_n_s16(arg_i16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshluq_n_s16(arg_i16x8, 0);
+	vqshluq_n_s16(arg_i16x8, 15);
+	vqshluq_n_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshluq_n_s16(arg_i16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshluh_n_s16(arg_i16, 0);
+	vqshluh_n_s16(arg_i16, 15);
+	vqshluh_n_s16(arg_i16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshluh_n_s16(arg_i16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_s32(int32x2_t arg_i32x2, int32_t arg_i32, int32x4_t arg_i32x4) {
+	vqshl_n_s32(arg_i32x2, 0);
+	vqshl_n_s32(arg_i32x2, 31);
+	vqshl_n_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_s32(arg_i32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_s32(arg_i32x4, 0);
+	vqshlq_n_s32(arg_i32x4, 31);
+	vqshlq_n_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_s32(arg_i32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshls_n_s32(arg_i32, 0);
+	vqshls_n_s32(arg_i32, 31);
+	vqshls_n_s32(arg_i32, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshls_n_s32(arg_i32, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlu_n_s32(arg_i32x2, 0);
+	vqshlu_n_s32(arg_i32x2, 31);
+	vqshlu_n_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlu_n_s32(arg_i32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshluq_n_s32(arg_i32x4, 0);
+	vqshluq_n_s32(arg_i32x4, 31);
+	vqshluq_n_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshluq_n_s32(arg_i32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlus_n_s32(arg_i32, 0);
+	vqshlus_n_s32(arg_i32, 31);
+	vqshlus_n_s32(arg_i32, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlus_n_s32(arg_i32, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_s64(int64_t arg_i64, int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vqshl_n_s64(arg_i64x1, 0);
+	vqshl_n_s64(arg_i64x1, 63);
+	vqshl_n_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_s64(arg_i64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_s64(arg_i64x2, 0);
+	vqshlq_n_s64(arg_i64x2, 63);
+	vqshlq_n_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_s64(arg_i64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshld_n_s64(arg_i64, 0);
+	vqshld_n_s64(arg_i64, 63);
+	vqshld_n_s64(arg_i64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshld_n_s64(arg_i64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlu_n_s64(arg_i64x1, 0);
+	vqshlu_n_s64(arg_i64x1, 63);
+	vqshlu_n_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlu_n_s64(arg_i64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshluq_n_s64(arg_i64x2, 0);
+	vqshluq_n_s64(arg_i64x2, 63);
+	vqshluq_n_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshluq_n_s64(arg_i64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlud_n_s64(arg_i64, 0);
+	vqshlud_n_s64(arg_i64, 63);
+	vqshlud_n_s64(arg_i64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlud_n_s64(arg_i64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_u8(uint8x8_t arg_u8x8, uint8_t arg_u8, uint8x16_t arg_u8x16) {
+	vqshl_n_u8(arg_u8x8, 0);
+	vqshl_n_u8(arg_u8x8, 7);
+	vqshl_n_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_u8(arg_u8x16, 0);
+	vqshlq_n_u8(arg_u8x16, 7);
+	vqshlq_n_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_u8(arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlb_n_u8(arg_u8, 0);
+	vqshlb_n_u8(arg_u8, 7);
+	vqshlb_n_u8(arg_u8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlb_n_u8(arg_u8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_u16(uint16_t arg_u16, uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vqshl_n_u16(arg_u16x4, 0);
+	vqshl_n_u16(arg_u16x4, 15);
+	vqshl_n_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_u16(arg_u16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_u16(arg_u16x8, 0);
+	vqshlq_n_u16(arg_u16x8, 15);
+	vqshlq_n_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_u16(arg_u16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlh_n_u16(arg_u16, 0);
+	vqshlh_n_u16(arg_u16, 15);
+	vqshlh_n_u16(arg_u16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlh_n_u16(arg_u16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4, uint32_t arg_u32) {
+	vqshl_n_u32(arg_u32x2, 0);
+	vqshl_n_u32(arg_u32x2, 31);
+	vqshl_n_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_u32(arg_u32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_u32(arg_u32x4, 0);
+	vqshlq_n_u32(arg_u32x4, 31);
+	vqshlq_n_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_u32(arg_u32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshls_n_u32(arg_u32, 0);
+	vqshls_n_u32(arg_u32, 31);
+	vqshls_n_u32(arg_u32, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshls_n_u32(arg_u32, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_u64(uint64x1_t arg_u64x1, uint64_t arg_u64, uint64x2_t arg_u64x2) {
+	vqshl_n_u64(arg_u64x1, 0);
+	vqshl_n_u64(arg_u64x1, 63);
+	vqshl_n_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_u64(arg_u64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_u64(arg_u64x2, 0);
+	vqshlq_n_u64(arg_u64x2, 63);
+	vqshlq_n_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_u64(arg_u64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshld_n_u64(arg_u64, 0);
+	vqshld_n_u64(arg_u64, 63);
+	vqshld_n_u64(arg_u64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshld_n_u64(arg_u64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_widen_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vshll_n_s8(arg_i8x8, 0);
+	vshll_n_s8(arg_i8x8, 7);
+	vshll_n_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+
+	vshll_high_n_s8(arg_i8x16, 0);
+	vshll_high_n_s8(arg_i8x16, 7);
+	vshll_high_n_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_widen_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vshll_n_s16(arg_i16x4, 0);
+	vshll_n_s16(arg_i16x4, 15);
+	vshll_n_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshll_high_n_s16(arg_i16x8, 0);
+	vshll_high_n_s16(arg_i16x8, 15);
+	vshll_high_n_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_widen_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vshll_n_s32(arg_i32x2, 0);
+	vshll_n_s32(arg_i32x2, 31);
+	vshll_n_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshll_high_n_s32(arg_i32x4, 0);
+	vshll_high_n_s32(arg_i32x4, 31);
+	vshll_high_n_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_widen_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vshll_n_u8(arg_u8x8, 0);
+	vshll_n_u8(arg_u8x8, 7);
+	vshll_n_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshll_high_n_u8(arg_u8x16, 0);
+	vshll_high_n_u8(arg_u8x16, 7);
+	vshll_high_n_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_widen_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vshll_n_u16(arg_u16x4, 0);
+	vshll_n_u16(arg_u16x4, 15);
+	vshll_n_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshll_high_n_u16(arg_u16x8, 0);
+	vshll_high_n_u16(arg_u16x8, 15);
+	vshll_high_n_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_widen_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vshll_n_u32(arg_u32x2, 0);
+	vshll_n_u32(arg_u32x2, 31);
+	vshll_n_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshll_high_n_u32(arg_u32x4, 0);
+	vshll_high_n_u32(arg_u32x4, 31);
+	vshll_high_n_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_insert_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vsli_n_s8(arg_i8x8, arg_i8x8, 0);
+	vsli_n_s8(arg_i8x8, arg_i8x8, 7);
+	vsli_n_s8(arg_i8x8, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_s8(arg_i8x8, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_s8(arg_i8x16, arg_i8x16, 0);
+	vsliq_n_s8(arg_i8x16, arg_i8x16, 7);
+	vsliq_n_s8(arg_i8x16, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_s8(arg_i8x16, arg_i8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vsli_n_s16(arg_i16x4, arg_i16x4, 0);
+	vsli_n_s16(arg_i16x4, arg_i16x4, 15);
+	vsli_n_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_s16(arg_i16x4, arg_i16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_s16(arg_i16x8, arg_i16x8, 0);
+	vsliq_n_s16(arg_i16x8, arg_i16x8, 15);
+	vsliq_n_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_s16(arg_i16x8, arg_i16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vsli_n_s32(arg_i32x2, arg_i32x2, 0);
+	vsli_n_s32(arg_i32x2, arg_i32x2, 31);
+	vsli_n_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_s32(arg_i32x2, arg_i32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_s32(arg_i32x4, arg_i32x4, 0);
+	vsliq_n_s32(arg_i32x4, arg_i32x4, 31);
+	vsliq_n_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_s32(arg_i32x4, arg_i32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_s64(int64_t arg_i64, int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vsli_n_s64(arg_i64x1, arg_i64x1, 0);
+	vsli_n_s64(arg_i64x1, arg_i64x1, 63);
+	vsli_n_s64(arg_i64x1, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_s64(arg_i64x1, arg_i64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_s64(arg_i64x2, arg_i64x2, 0);
+	vsliq_n_s64(arg_i64x2, arg_i64x2, 63);
+	vsliq_n_s64(arg_i64x2, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_s64(arg_i64x2, arg_i64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vslid_n_s64(arg_i64, arg_i64, 0);
+	vslid_n_s64(arg_i64, arg_i64, 63);
+	vslid_n_s64(arg_i64, arg_i64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vslid_n_s64(arg_i64, arg_i64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vsli_n_u8(arg_u8x8, arg_u8x8, 0);
+	vsli_n_u8(arg_u8x8, arg_u8x8, 7);
+	vsli_n_u8(arg_u8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_u8(arg_u8x8, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_u8(arg_u8x16, arg_u8x16, 0);
+	vsliq_n_u8(arg_u8x16, arg_u8x16, 7);
+	vsliq_n_u8(arg_u8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_u8(arg_u8x16, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vsli_n_u16(arg_u16x4, arg_u16x4, 0);
+	vsli_n_u16(arg_u16x4, arg_u16x4, 15);
+	vsli_n_u16(arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_u16(arg_u16x4, arg_u16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_u16(arg_u16x8, arg_u16x8, 0);
+	vsliq_n_u16(arg_u16x8, arg_u16x8, 15);
+	vsliq_n_u16(arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_u16(arg_u16x8, arg_u16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vsli_n_u32(arg_u32x2, arg_u32x2, 0);
+	vsli_n_u32(arg_u32x2, arg_u32x2, 31);
+	vsli_n_u32(arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_u32(arg_u32x2, arg_u32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_u32(arg_u32x4, arg_u32x4, 0);
+	vsliq_n_u32(arg_u32x4, arg_u32x4, 31);
+	vsliq_n_u32(arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_u32(arg_u32x4, arg_u32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_u64(uint64x1_t arg_u64x1, uint64_t arg_u64, uint64x2_t arg_u64x2) {
+	vsli_n_u64(arg_u64x1, arg_u64x1, 0);
+	vsli_n_u64(arg_u64x1, arg_u64x1, 63);
+	vsli_n_u64(arg_u64x1, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_u64(arg_u64x1, arg_u64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_u64(arg_u64x2, arg_u64x2, 0);
+	vsliq_n_u64(arg_u64x2, arg_u64x2, 63);
+	vsliq_n_u64(arg_u64x2, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_u64(arg_u64x2, arg_u64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vslid_n_u64(arg_u64, arg_u64, 0);
+	vslid_n_u64(arg_u64, arg_u64, 63);
+	vslid_n_u64(arg_u64, arg_u64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vslid_n_u64(arg_u64, arg_u64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_p64(poly64x2_t arg_p64x2, poly64x1_t arg_p64x1) {
+	vsli_n_p64(arg_p64x1, arg_p64x1, 0);
+	vsli_n_p64(arg_p64x1, arg_p64x1, 63);
+	vsli_n_p64(arg_p64x1, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_p64(arg_p64x1, arg_p64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_p64(arg_p64x2, arg_p64x2, 0);
+	vsliq_n_p64(arg_p64x2, arg_p64x2, 63);
+	vsliq_n_p64(arg_p64x2, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_p64(arg_p64x2, arg_p64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vsli_n_p8(arg_p8x8, arg_p8x8, 0);
+	vsli_n_p8(arg_p8x8, arg_p8x8, 7);
+	vsli_n_p8(arg_p8x8, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_p8(arg_p8x8, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_p8(arg_p8x16, arg_p8x16, 0);
+	vsliq_n_p8(arg_p8x16, arg_p8x16, 7);
+	vsliq_n_p8(arg_p8x16, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_p8(arg_p8x16, arg_p8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_p16(poly16x4_t arg_p16x4, poly16x8_t arg_p16x8) {
+	vsli_n_p16(arg_p16x4, arg_p16x4, 0);
+	vsli_n_p16(arg_p16x4, arg_p16x4, 15);
+	vsli_n_p16(arg_p16x4, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_p16(arg_p16x4, arg_p16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_p16(arg_p16x8, arg_p16x8, 0);
+	vsliq_n_p16(arg_p16x8, arg_p16x8, 15);
+	vsliq_n_p16(arg_p16x8, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_p16(arg_p16x8, arg_p16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-right.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-right.c
new file mode 100644
index 00000000000000..ad4677fe436660
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-right.c
@@ -0,0 +1,1083 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_vector_shift_right_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vshr_n_s8(arg_i8x8, 1);
+	vshr_n_s8(arg_i8x8, 8);
+	vshr_n_s8(arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_s8(arg_i8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_s8(arg_i8x16, 1);
+	vshrq_n_s8(arg_i8x16, 8);
+	vshrq_n_s8(arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_s8(arg_i8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vshr_n_s16(arg_i16x4, 1);
+	vshr_n_s16(arg_i16x4, 16);
+	vshr_n_s16(arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_s16(arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_s16(arg_i16x8, 1);
+	vshrq_n_s16(arg_i16x8, 16);
+	vshrq_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_s16(arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vshr_n_s32(arg_i32x2, 1);
+	vshr_n_s32(arg_i32x2, 32);
+	vshr_n_s32(arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_s32(arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_s32(arg_i32x4, 1);
+	vshrq_n_s32(arg_i32x4, 32);
+	vshrq_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_s32(arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_s64(int64_t arg_i64, int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vshr_n_s64(arg_i64x1, 1);
+	vshr_n_s64(arg_i64x1, 64);
+	vshr_n_s64(arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_s64(arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_s64(arg_i64x2, 1);
+	vshrq_n_s64(arg_i64x2, 64);
+	vshrq_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_s64(arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrd_n_s64(arg_i64, 1);
+	vshrd_n_s64(arg_i64, 64);
+	vshrd_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrd_n_s64(arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vshr_n_u8(arg_u8x8, 1);
+	vshr_n_u8(arg_u8x8, 8);
+	vshr_n_u8(arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_u8(arg_u8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_u8(arg_u8x16, 1);
+	vshrq_n_u8(arg_u8x16, 8);
+	vshrq_n_u8(arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_u8(arg_u8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vshr_n_u16(arg_u16x4, 1);
+	vshr_n_u16(arg_u16x4, 16);
+	vshr_n_u16(arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_u16(arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_u16(arg_u16x8, 1);
+	vshrq_n_u16(arg_u16x8, 16);
+	vshrq_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_u16(arg_u16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vshr_n_u32(arg_u32x2, 1);
+	vshr_n_u32(arg_u32x2, 32);
+	vshr_n_u32(arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_u32(arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_u32(arg_u32x4, 1);
+	vshrq_n_u32(arg_u32x4, 32);
+	vshrq_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_u32(arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vshr_n_u64(arg_u64x1, 1);
+	vshr_n_u64(arg_u64x1, 64);
+	vshr_n_u64(arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_u64(arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_u64(arg_u64x2, 1);
+	vshrq_n_u64(arg_u64x2, 64);
+	vshrq_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_u64(arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrd_n_u64(arg_u64, 1);
+	vshrd_n_u64(arg_u64, 64);
+	vshrd_n_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrd_n_u64(arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vrshr_n_s8(arg_i8x8, 1);
+	vrshr_n_s8(arg_i8x8, 8);
+	vrshr_n_s8(arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_s8(arg_i8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_s8(arg_i8x16, 1);
+	vrshrq_n_s8(arg_i8x16, 8);
+	vrshrq_n_s8(arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_s8(arg_i8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vrshr_n_s16(arg_i16x4, 1);
+	vrshr_n_s16(arg_i16x4, 16);
+	vrshr_n_s16(arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_s16(arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_s16(arg_i16x8, 1);
+	vrshrq_n_s16(arg_i16x8, 16);
+	vrshrq_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_s16(arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vrshr_n_s32(arg_i32x2, 1);
+	vrshr_n_s32(arg_i32x2, 32);
+	vrshr_n_s32(arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_s32(arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_s32(arg_i32x4, 1);
+	vrshrq_n_s32(arg_i32x4, 32);
+	vrshrq_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_s32(arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_s64(int64_t arg_i64, int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vrshr_n_s64(arg_i64x1, 1);
+	vrshr_n_s64(arg_i64x1, 64);
+	vrshr_n_s64(arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_s64(arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_s64(arg_i64x2, 1);
+	vrshrq_n_s64(arg_i64x2, 64);
+	vrshrq_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_s64(arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrd_n_s64(arg_i64, 1);
+	vrshrd_n_s64(arg_i64, 64);
+	vrshrd_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrd_n_s64(arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vrshr_n_u8(arg_u8x8, 1);
+	vrshr_n_u8(arg_u8x8, 8);
+	vrshr_n_u8(arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_u8(arg_u8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_u8(arg_u8x16, 1);
+	vrshrq_n_u8(arg_u8x16, 8);
+	vrshrq_n_u8(arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_u8(arg_u8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vrshr_n_u16(arg_u16x4, 1);
+	vrshr_n_u16(arg_u16x4, 16);
+	vrshr_n_u16(arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_u16(arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_u16(arg_u16x8, 1);
+	vrshrq_n_u16(arg_u16x8, 16);
+	vrshrq_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_u16(arg_u16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vrshr_n_u32(arg_u32x2, 1);
+	vrshr_n_u32(arg_u32x2, 32);
+	vrshr_n_u32(arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_u32(arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_u32(arg_u32x4, 1);
+	vrshrq_n_u32(arg_u32x4, 32);
+	vrshrq_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_u32(arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vrshr_n_u64(arg_u64x1, 1);
+	vrshr_n_u64(arg_u64x1, 64);
+	vrshr_n_u64(arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_u64(arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_u64(arg_u64x2, 1);
+	vrshrq_n_u64(arg_u64x2, 64);
+	vrshrq_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_u64(arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrd_n_u64(arg_u64, 1);
+	vrshrd_n_u64(arg_u64, 64);
+	vrshrd_n_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrd_n_u64(arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vsra_n_s8(arg_i8x8, arg_i8x8, 1);
+	vsra_n_s8(arg_i8x8, arg_i8x8, 8);
+	vsra_n_s8(arg_i8x8, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_s8(arg_i8x8, arg_i8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_s8(arg_i8x16, arg_i8x16, 1);
+	vsraq_n_s8(arg_i8x16, arg_i8x16, 8);
+	vsraq_n_s8(arg_i8x16, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_s8(arg_i8x16, arg_i8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vsra_n_s16(arg_i16x4, arg_i16x4, 1);
+	vsra_n_s16(arg_i16x4, arg_i16x4, 16);
+	vsra_n_s16(arg_i16x4, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_s16(arg_i16x4, arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_s16(arg_i16x8, arg_i16x8, 1);
+	vsraq_n_s16(arg_i16x8, arg_i16x8, 16);
+	vsraq_n_s16(arg_i16x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_s16(arg_i16x8, arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vsra_n_s32(arg_i32x2, arg_i32x2, 1);
+	vsra_n_s32(arg_i32x2, arg_i32x2, 32);
+	vsra_n_s32(arg_i32x2, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_s32(arg_i32x2, arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_s32(arg_i32x4, arg_i32x4, 1);
+	vsraq_n_s32(arg_i32x4, arg_i32x4, 32);
+	vsraq_n_s32(arg_i32x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_s32(arg_i32x4, arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_s64(int64_t arg_i64, int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vsra_n_s64(arg_i64x1, arg_i64x1, 1);
+	vsra_n_s64(arg_i64x1, arg_i64x1, 64);
+	vsra_n_s64(arg_i64x1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_s64(arg_i64x1, arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_s64(arg_i64x2, arg_i64x2, 1);
+	vsraq_n_s64(arg_i64x2, arg_i64x2, 64);
+	vsraq_n_s64(arg_i64x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_s64(arg_i64x2, arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsrad_n_s64(arg_i64, arg_i64, 1);
+	vsrad_n_s64(arg_i64, arg_i64, 64);
+	vsrad_n_s64(arg_i64, arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsrad_n_s64(arg_i64, arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vsra_n_u8(arg_u8x8, arg_u8x8, 1);
+	vsra_n_u8(arg_u8x8, arg_u8x8, 8);
+	vsra_n_u8(arg_u8x8, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_u8(arg_u8x8, arg_u8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_u8(arg_u8x16, arg_u8x16, 1);
+	vsraq_n_u8(arg_u8x16, arg_u8x16, 8);
+	vsraq_n_u8(arg_u8x16, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_u8(arg_u8x16, arg_u8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vsra_n_u16(arg_u16x4, arg_u16x4, 1);
+	vsra_n_u16(arg_u16x4, arg_u16x4, 16);
+	vsra_n_u16(arg_u16x4, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_u16(arg_u16x4, arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_u16(arg_u16x8, arg_u16x8, 1);
+	vsraq_n_u16(arg_u16x8, arg_u16x8, 16);
+	vsraq_n_u16(arg_u16x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_u16(arg_u16x8, arg_u16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vsra_n_u32(arg_u32x2, arg_u32x2, 1);
+	vsra_n_u32(arg_u32x2, arg_u32x2, 32);
+	vsra_n_u32(arg_u32x2, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_u32(arg_u32x2, arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_u32(arg_u32x4, arg_u32x4, 1);
+	vsraq_n_u32(arg_u32x4, arg_u32x4, 32);
+	vsraq_n_u32(arg_u32x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_u32(arg_u32x4, arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vsra_n_u64(arg_u64x1, arg_u64x1, 1);
+	vsra_n_u64(arg_u64x1, arg_u64x1, 64);
+	vsra_n_u64(arg_u64x1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_u64(arg_u64x1, arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_u64(arg_u64x2, arg_u64x2, 1);
+	vsraq_n_u64(arg_u64x2, arg_u64x2, 64);
+	vsraq_n_u64(arg_u64x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_u64(arg_u64x2, arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsrad_n_u64(arg_u64, arg_u64, 1);
+	vsrad_n_u64(arg_u64, arg_u64, 64);
+	vsrad_n_u64(arg_u64, arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsrad_n_u64(arg_u64, arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vrsra_n_s8(arg_i8x8, arg_i8x8, 1);
+	vrsra_n_s8(arg_i8x8, arg_i8x8, 8);
+	vrsra_n_s8(arg_i8x8, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_s8(arg_i8x8, arg_i8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_s8(arg_i8x16, arg_i8x16, 1);
+	vrsraq_n_s8(arg_i8x16, arg_i8x16, 8);
+	vrsraq_n_s8(arg_i8x16, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_s8(arg_i8x16, arg_i8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vrsra_n_s16(arg_i16x4, arg_i16x4, 1);
+	vrsra_n_s16(arg_i16x4, arg_i16x4, 16);
+	vrsra_n_s16(arg_i16x4, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_s16(arg_i16x4, arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_s16(arg_i16x8, arg_i16x8, 1);
+	vrsraq_n_s16(arg_i16x8, arg_i16x8, 16);
+	vrsraq_n_s16(arg_i16x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_s16(arg_i16x8, arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vrsra_n_s32(arg_i32x2, arg_i32x2, 1);
+	vrsra_n_s32(arg_i32x2, arg_i32x2, 32);
+	vrsra_n_s32(arg_i32x2, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_s32(arg_i32x2, arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_s32(arg_i32x4, arg_i32x4, 1);
+	vrsraq_n_s32(arg_i32x4, arg_i32x4, 32);
+	vrsraq_n_s32(arg_i32x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_s32(arg_i32x4, arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_s64(int64_t arg_i64, int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vrsra_n_s64(arg_i64x1, arg_i64x1, 1);
+	vrsra_n_s64(arg_i64x1, arg_i64x1, 64);
+	vrsra_n_s64(arg_i64x1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_s64(arg_i64x1, arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_s64(arg_i64x2, arg_i64x2, 1);
+	vrsraq_n_s64(arg_i64x2, arg_i64x2, 64);
+	vrsraq_n_s64(arg_i64x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_s64(arg_i64x2, arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsrad_n_s64(arg_i64, arg_i64, 1);
+	vrsrad_n_s64(arg_i64, arg_i64, 64);
+	vrsrad_n_s64(arg_i64, arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsrad_n_s64(arg_i64, arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vrsra_n_u8(arg_u8x8, arg_u8x8, 1);
+	vrsra_n_u8(arg_u8x8, arg_u8x8, 8);
+	vrsra_n_u8(arg_u8x8, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_u8(arg_u8x8, arg_u8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_u8(arg_u8x16, arg_u8x16, 1);
+	vrsraq_n_u8(arg_u8x16, arg_u8x16, 8);
+	vrsraq_n_u8(arg_u8x16, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_u8(arg_u8x16, arg_u8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vrsra_n_u16(arg_u16x4, arg_u16x4, 1);
+	vrsra_n_u16(arg_u16x4, arg_u16x4, 16);
+	vrsra_n_u16(arg_u16x4, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_u16(arg_u16x4, arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_u16(arg_u16x8, arg_u16x8, 1);
+	vrsraq_n_u16(arg_u16x8, arg_u16x8, 16);
+	vrsraq_n_u16(arg_u16x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_u16(arg_u16x8, arg_u16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vrsra_n_u32(arg_u32x2, arg_u32x2, 1);
+	vrsra_n_u32(arg_u32x2, arg_u32x2, 32);
+	vrsra_n_u32(arg_u32x2, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_u32(arg_u32x2, arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_u32(arg_u32x4, arg_u32x4, 1);
+	vrsraq_n_u32(arg_u32x4, arg_u32x4, 32);
+	vrsraq_n_u32(arg_u32x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_u32(arg_u32x4, arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vrsra_n_u64(arg_u64x1, arg_u64x1, 1);
+	vrsra_n_u64(arg_u64x1, arg_u64x1, 64);
+	vrsra_n_u64(arg_u64x1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_u64(arg_u64x1, arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_u64(arg_u64x2, arg_u64x2, 1);
+	vrsraq_n_u64(arg_u64x2, arg_u64x2, 64);
+	vrsraq_n_u64(arg_u64x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_u64(arg_u64x2, arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsrad_n_u64(arg_u64, arg_u64, 1);
+	vrsrad_n_u64(arg_u64, arg_u64, 64);
+	vrsrad_n_u64(arg_u64, arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsrad_n_u64(arg_u64, arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_s16(int16x8_t arg_i16x8, int8x8_t arg_i8x8) {
+	vshrn_n_s16(arg_i16x8, 1);
+	vshrn_n_s16(arg_i16x8, 8);
+	vshrn_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_s16(arg_i8x8, arg_i16x8, 1);
+	vshrn_high_n_s16(arg_i8x8, arg_i16x8, 8);
+	vshrn_high_n_s16(arg_i8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_s16(arg_i8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_s32(int32x4_t arg_i32x4, int16x4_t arg_i16x4) {
+	vshrn_n_s32(arg_i32x4, 1);
+	vshrn_n_s32(arg_i32x4, 16);
+	vshrn_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_s32(arg_i16x4, arg_i32x4, 1);
+	vshrn_high_n_s32(arg_i16x4, arg_i32x4, 16);
+	vshrn_high_n_s32(arg_i16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_s32(arg_i16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_s64(int32x2_t arg_i32x2, int64x2_t arg_i64x2) {
+	vshrn_n_s64(arg_i64x2, 1);
+	vshrn_n_s64(arg_i64x2, 32);
+	vshrn_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_s64(arg_i32x2, arg_i64x2, 1);
+	vshrn_high_n_s64(arg_i32x2, arg_i64x2, 32);
+	vshrn_high_n_s64(arg_i32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_s64(arg_i32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_u16(uint16x8_t arg_u16x8, uint8x8_t arg_u8x8) {
+	vshrn_n_u16(arg_u16x8, 1);
+	vshrn_n_u16(arg_u16x8, 8);
+	vshrn_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_u16(arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_u16(arg_u8x8, arg_u16x8, 1);
+	vshrn_high_n_u16(arg_u8x8, arg_u16x8, 8);
+	vshrn_high_n_u16(arg_u8x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_u16(arg_u8x8, arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_u32(uint32x4_t arg_u32x4, uint16x4_t arg_u16x4) {
+	vshrn_n_u32(arg_u32x4, 1);
+	vshrn_n_u32(arg_u32x4, 16);
+	vshrn_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_u32(arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_u32(arg_u16x4, arg_u32x4, 1);
+	vshrn_high_n_u32(arg_u16x4, arg_u32x4, 16);
+	vshrn_high_n_u32(arg_u16x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_u32(arg_u16x4, arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_u64(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2) {
+	vshrn_n_u64(arg_u64x2, 1);
+	vshrn_n_u64(arg_u64x2, 32);
+	vshrn_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_u64(arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_u64(arg_u32x2, arg_u64x2, 1);
+	vshrn_high_n_u64(arg_u32x2, arg_u64x2, 32);
+	vshrn_high_n_u64(arg_u32x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_u64(arg_u32x2, arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_s16(int16x8_t arg_i16x8, uint8x8_t arg_u8x8, int16_t arg_i16, int8x8_t arg_i8x8) {
+	vqshrun_n_s16(arg_i16x8, 1);
+	vqshrun_n_s16(arg_i16x8, 8);
+	vqshrun_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrunh_n_s16(arg_i16, 1);
+	vqshrunh_n_s16(arg_i16, 8);
+	vqshrunh_n_s16(arg_i16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrunh_n_s16(arg_i16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrun_high_n_s16(arg_u8x8, arg_i16x8, 1);
+	vqshrun_high_n_s16(arg_u8x8, arg_i16x8, 8);
+	vqshrun_high_n_s16(arg_u8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_high_n_s16(arg_u8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_n_s16(arg_i16x8, 1);
+	vqshrn_n_s16(arg_i16x8, 8);
+	vqshrn_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrnh_n_s16(arg_i16, 1);
+	vqshrnh_n_s16(arg_i16, 8);
+	vqshrnh_n_s16(arg_i16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrnh_n_s16(arg_i16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_s16(arg_i8x8, arg_i16x8, 1);
+	vqshrn_high_n_s16(arg_i8x8, arg_i16x8, 8);
+	vqshrn_high_n_s16(arg_i8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_s16(arg_i8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_s32(int16x4_t arg_i16x4, int32_t arg_i32, int32x4_t arg_i32x4, uint16x4_t arg_u16x4) {
+	vqshrun_n_s32(arg_i32x4, 1);
+	vqshrun_n_s32(arg_i32x4, 16);
+	vqshrun_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshruns_n_s32(arg_i32, 1);
+	vqshruns_n_s32(arg_i32, 16);
+	vqshruns_n_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshruns_n_s32(arg_i32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrun_high_n_s32(arg_u16x4, arg_i32x4, 1);
+	vqshrun_high_n_s32(arg_u16x4, arg_i32x4, 16);
+	vqshrun_high_n_s32(arg_u16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_high_n_s32(arg_u16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_n_s32(arg_i32x4, 1);
+	vqshrn_n_s32(arg_i32x4, 16);
+	vqshrn_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrns_n_s32(arg_i32, 1);
+	vqshrns_n_s32(arg_i32, 16);
+	vqshrns_n_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrns_n_s32(arg_i32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_s32(arg_i16x4, arg_i32x4, 1);
+	vqshrn_high_n_s32(arg_i16x4, arg_i32x4, 16);
+	vqshrn_high_n_s32(arg_i16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_s32(arg_i16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_s64(uint32x2_t arg_u32x2, int64x2_t arg_i64x2, int32x2_t arg_i32x2, int64_t arg_i64) {
+	vqshrun_n_s64(arg_i64x2, 1);
+	vqshrun_n_s64(arg_i64x2, 32);
+	vqshrun_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrund_n_s64(arg_i64, 1);
+	vqshrund_n_s64(arg_i64, 32);
+	vqshrund_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrund_n_s64(arg_i64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrun_high_n_s64(arg_u32x2, arg_i64x2, 1);
+	vqshrun_high_n_s64(arg_u32x2, arg_i64x2, 32);
+	vqshrun_high_n_s64(arg_u32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_high_n_s64(arg_u32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_n_s64(arg_i64x2, 1);
+	vqshrn_n_s64(arg_i64x2, 32);
+	vqshrn_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrnd_n_s64(arg_i64, 1);
+	vqshrnd_n_s64(arg_i64, 32);
+	vqshrnd_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrnd_n_s64(arg_i64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_s64(arg_i32x2, arg_i64x2, 1);
+	vqshrn_high_n_s64(arg_i32x2, arg_i64x2, 32);
+	vqshrn_high_n_s64(arg_i32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_s64(arg_i32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_u16(uint16x8_t arg_u16x8, uint16_t arg_u16, uint8x8_t arg_u8x8) {
+	vqshrn_n_u16(arg_u16x8, 1);
+	vqshrn_n_u16(arg_u16x8, 8);
+	vqshrn_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_u16(arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrnh_n_u16(arg_u16, 1);
+	vqshrnh_n_u16(arg_u16, 8);
+	vqshrnh_n_u16(arg_u16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrnh_n_u16(arg_u16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_u16(arg_u8x8, arg_u16x8, 1);
+	vqshrn_high_n_u16(arg_u8x8, arg_u16x8, 8);
+	vqshrn_high_n_u16(arg_u8x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_u16(arg_u8x8, arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_u32(uint32x4_t arg_u32x4, uint32_t arg_u32, uint16x4_t arg_u16x4) {
+	vqshrn_n_u32(arg_u32x4, 1);
+	vqshrn_n_u32(arg_u32x4, 16);
+	vqshrn_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_u32(arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrns_n_u32(arg_u32, 1);
+	vqshrns_n_u32(arg_u32, 16);
+	vqshrns_n_u32(arg_u32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrns_n_u32(arg_u32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_u32(arg_u16x4, arg_u32x4, 1);
+	vqshrn_high_n_u32(arg_u16x4, arg_u32x4, 16);
+	vqshrn_high_n_u32(arg_u16x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_u32(arg_u16x4, arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_u64(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2, uint64_t arg_u64) {
+	vqshrn_n_u64(arg_u64x2, 1);
+	vqshrn_n_u64(arg_u64x2, 32);
+	vqshrn_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_u64(arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrnd_n_u64(arg_u64, 1);
+	vqshrnd_n_u64(arg_u64, 32);
+	vqshrnd_n_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrnd_n_u64(arg_u64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_u64(arg_u32x2, arg_u64x2, 1);
+	vqshrn_high_n_u64(arg_u32x2, arg_u64x2, 32);
+	vqshrn_high_n_u64(arg_u32x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_u64(arg_u32x2, arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_s16(int16x8_t arg_i16x8, uint8x8_t arg_u8x8,
+																int16_t arg_i16, int8x8_t arg_i8x8) {
+	vqrshrun_n_s16(arg_i16x8, 1);
+	vqrshrun_n_s16(arg_i16x8, 8);
+	vqrshrun_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrunh_n_s16(arg_i16, 1);
+	vqrshrunh_n_s16(arg_i16, 8);
+	vqrshrunh_n_s16(arg_i16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrunh_n_s16(arg_i16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrun_high_n_s16(arg_u8x8, arg_i16x8, 1);
+	vqrshrun_high_n_s16(arg_u8x8, arg_i16x8, 8);
+	vqrshrun_high_n_s16(arg_u8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_high_n_s16(arg_u8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_n_s16(arg_i16x8, 1);
+	vqrshrn_n_s16(arg_i16x8, 8);
+	vqrshrn_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrnh_n_s16(arg_i16, 1);
+	vqrshrnh_n_s16(arg_i16, 8);
+	vqrshrnh_n_s16(arg_i16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrnh_n_s16(arg_i16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_s16(arg_i8x8, arg_i16x8, 1);
+	vqrshrn_high_n_s16(arg_i8x8, arg_i16x8, 8);
+	vqrshrn_high_n_s16(arg_i8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_s16(arg_i8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_s32(int16x4_t arg_i16x4, int32_t arg_i32,
+																 int32x4_t arg_i32x4, uint16x4_t arg_u16x4) {
+	vqrshrun_n_s32(arg_i32x4, 1);
+	vqrshrun_n_s32(arg_i32x4, 16);
+	vqrshrun_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshruns_n_s32(arg_i32, 1);
+	vqrshruns_n_s32(arg_i32, 16);
+	vqrshruns_n_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshruns_n_s32(arg_i32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrun_high_n_s32(arg_u16x4, arg_i32x4, 1);
+	vqrshrun_high_n_s32(arg_u16x4, arg_i32x4, 16);
+	vqrshrun_high_n_s32(arg_u16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_high_n_s32(arg_u16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_n_s32(arg_i32x4, 1);
+	vqrshrn_n_s32(arg_i32x4, 16);
+	vqrshrn_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrns_n_s32(arg_i32, 1);
+	vqrshrns_n_s32(arg_i32, 16);
+	vqrshrns_n_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrns_n_s32(arg_i32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_s32(arg_i16x4, arg_i32x4, 1);
+	vqrshrn_high_n_s32(arg_i16x4, arg_i32x4, 16);
+	vqrshrn_high_n_s32(arg_i16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_s32(arg_i16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_s64(uint32x2_t arg_u32x2, int64x2_t arg_i64x2,
+																int32x2_t arg_i32x2, int64_t arg_i64) {
+	vqrshrun_n_s64(arg_i64x2, 1);
+	vqrshrun_n_s64(arg_i64x2, 32);
+	vqrshrun_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrund_n_s64(arg_i64, 1);
+	vqrshrund_n_s64(arg_i64, 32);
+	vqrshrund_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrund_n_s64(arg_i64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrun_high_n_s64(arg_u32x2, arg_i64x2, 1);
+	vqrshrun_high_n_s64(arg_u32x2, arg_i64x2, 32);
+	vqrshrun_high_n_s64(arg_u32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_high_n_s64(arg_u32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_n_s64(arg_i64x2, 1);
+	vqrshrn_n_s64(arg_i64x2, 32);
+	vqrshrn_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrnd_n_s64(arg_i64, 1);
+	vqrshrnd_n_s64(arg_i64, 32);
+	vqrshrnd_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrnd_n_s64(arg_i64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_s64(arg_i32x2, arg_i64x2, 1);
+	vqrshrn_high_n_s64(arg_i32x2, arg_i64x2, 32);
+	vqrshrn_high_n_s64(arg_i32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_s64(arg_i32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_u16(uint16x8_t arg_u16x8, uint16_t arg_u16,
+																uint8x8_t arg_u8x8) {
+	vqrshrn_n_u16(arg_u16x8, 1);
+	vqrshrn_n_u16(arg_u16x8, 8);
+	vqrshrn_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_u16(arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrnh_n_u16(arg_u16, 1);
+	vqrshrnh_n_u16(arg_u16, 8);
+	vqrshrnh_n_u16(arg_u16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrnh_n_u16(arg_u16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_u16(arg_u8x8, arg_u16x8, 1);
+	vqrshrn_high_n_u16(arg_u8x8, arg_u16x8, 8);
+	vqrshrn_high_n_u16(arg_u8x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_u16(arg_u8x8, arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_u32(uint32x4_t arg_u32x4, uint32_t arg_u32,
+																uint16x4_t arg_u16x4) {
+	vqrshrn_n_u32(arg_u32x4, 1);
+	vqrshrn_n_u32(arg_u32x4, 16);
+	vqrshrn_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_u32(arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrns_n_u32(arg_u32, 1);
+	vqrshrns_n_u32(arg_u32, 16);
+	vqrshrns_n_u32(arg_u32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrns_n_u32(arg_u32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_u32(arg_u16x4, arg_u32x4, 1);
+	vqrshrn_high_n_u32(arg_u16x4, arg_u32x4, 16);
+	vqrshrn_high_n_u32(arg_u16x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_u32(arg_u16x4, arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_u64(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2,
+																uint64_t arg_u64) {
+	vqrshrn_n_u64(arg_u64x2, 1);
+	vqrshrn_n_u64(arg_u64x2, 32);
+	vqrshrn_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_u64(arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrnd_n_u64(arg_u64, 1);
+	vqrshrnd_n_u64(arg_u64, 32);
+	vqrshrnd_n_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrnd_n_u64(arg_u64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_u64(arg_u32x2, arg_u64x2, 1);
+	vqrshrn_high_n_u64(arg_u32x2, arg_u64x2, 32);
+	vqrshrn_high_n_u64(arg_u32x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_u64(arg_u32x2, arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_s16(int16x8_t arg_i16x8, int8x8_t arg_i8x8) {
+	vrshrn_n_s16(arg_i16x8, 1);
+	vrshrn_n_s16(arg_i16x8, 8);
+	vrshrn_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_s16(arg_i8x8, arg_i16x8, 1);
+	vrshrn_high_n_s16(arg_i8x8, arg_i16x8, 8);
+	vrshrn_high_n_s16(arg_i8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_s16(arg_i8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_s32(int32x4_t arg_i32x4, int16x4_t arg_i16x4) {
+	vrshrn_n_s32(arg_i32x4, 1);
+	vrshrn_n_s32(arg_i32x4, 16);
+	vrshrn_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_s32(arg_i16x4, arg_i32x4, 1);
+	vrshrn_high_n_s32(arg_i16x4, arg_i32x4, 16);
+	vrshrn_high_n_s32(arg_i16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_s32(arg_i16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_s64(int32x2_t arg_i32x2, int64x2_t arg_i64x2) {
+	vrshrn_n_s64(arg_i64x2, 1);
+	vrshrn_n_s64(arg_i64x2, 32);
+	vrshrn_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_s64(arg_i32x2, arg_i64x2, 1);
+	vrshrn_high_n_s64(arg_i32x2, arg_i64x2, 32);
+	vrshrn_high_n_s64(arg_i32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_s64(arg_i32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_u16(uint16x8_t arg_u16x8, uint8x8_t arg_u8x8) {
+	vrshrn_n_u16(arg_u16x8, 1);
+	vrshrn_n_u16(arg_u16x8, 8);
+	vrshrn_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_u16(arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_u16(arg_u8x8, arg_u16x8, 1);
+	vrshrn_high_n_u16(arg_u8x8, arg_u16x8, 8);
+	vrshrn_high_n_u16(arg_u8x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_u16(arg_u8x8, arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_u32(uint32x4_t arg_u32x4, uint16x4_t arg_u16x4) {
+	vrshrn_n_u32(arg_u32x4, 1);
+	vrshrn_n_u32(arg_u32x4, 16);
+	vrshrn_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_u32(arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_u32(arg_u16x4, arg_u32x4, 1);
+	vrshrn_high_n_u32(arg_u16x4, arg_u32x4, 16);
+	vrshrn_high_n_u32(arg_u16x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_u32(arg_u16x4, arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_u64(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2) {
+	vrshrn_n_u64(arg_u64x2, 1);
+	vrshrn_n_u64(arg_u64x2, 32);
+	vrshrn_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_u64(arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_u64(arg_u32x2, arg_u64x2, 1);
+	vrshrn_high_n_u64(arg_u32x2, arg_u64x2, 32);
+	vrshrn_high_n_u64(arg_u32x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_u64(arg_u32x2, arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vsri_n_s8(arg_i8x8, arg_i8x8, 1);
+	vsri_n_s8(arg_i8x8, arg_i8x8, 8);
+	vsri_n_s8(arg_i8x8, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_s8(arg_i8x8, arg_i8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_s8(arg_i8x16, arg_i8x16, 1);
+	vsriq_n_s8(arg_i8x16, arg_i8x16, 8);
+	vsriq_n_s8(arg_i8x16, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_s8(arg_i8x16, arg_i8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vsri_n_s16(arg_i16x4, arg_i16x4, 1);
+	vsri_n_s16(arg_i16x4, arg_i16x4, 16);
+	vsri_n_s16(arg_i16x4, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_s16(arg_i16x4, arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_s16(arg_i16x8, arg_i16x8, 1);
+	vsriq_n_s16(arg_i16x8, arg_i16x8, 16);
+	vsriq_n_s16(arg_i16x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_s16(arg_i16x8, arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vsri_n_s32(arg_i32x2, arg_i32x2, 1);
+	vsri_n_s32(arg_i32x2, arg_i32x2, 32);
+	vsri_n_s32(arg_i32x2, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_s32(arg_i32x2, arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_s32(arg_i32x4, arg_i32x4, 1);
+	vsriq_n_s32(arg_i32x4, arg_i32x4, 32);
+	vsriq_n_s32(arg_i32x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_s32(arg_i32x4, arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_s64(int64_t arg_i64, int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vsri_n_s64(arg_i64x1, arg_i64x1, 1);
+	vsri_n_s64(arg_i64x1, arg_i64x1, 64);
+	vsri_n_s64(arg_i64x1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_s64(arg_i64x1, arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_s64(arg_i64x2, arg_i64x2, 1);
+	vsriq_n_s64(arg_i64x2, arg_i64x2, 64);
+	vsriq_n_s64(arg_i64x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_s64(arg_i64x2, arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsrid_n_s64(arg_i64, arg_i64, 1);
+	vsrid_n_s64(arg_i64, arg_i64, 64);
+	vsrid_n_s64(arg_i64, arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsrid_n_s64(arg_i64, arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vsri_n_u8(arg_u8x8, arg_u8x8, 1);
+	vsri_n_u8(arg_u8x8, arg_u8x8, 8);
+	vsri_n_u8(arg_u8x8, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_u8(arg_u8x8, arg_u8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_u8(arg_u8x16, arg_u8x16, 1);
+	vsriq_n_u8(arg_u8x16, arg_u8x16, 8);
+	vsriq_n_u8(arg_u8x16, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_u8(arg_u8x16, arg_u8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vsri_n_u16(arg_u16x4, arg_u16x4, 1);
+	vsri_n_u16(arg_u16x4, arg_u16x4, 16);
+	vsri_n_u16(arg_u16x4, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_u16(arg_u16x4, arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_u16(arg_u16x8, arg_u16x8, 1);
+	vsriq_n_u16(arg_u16x8, arg_u16x8, 16);
+	vsriq_n_u16(arg_u16x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_u16(arg_u16x8, arg_u16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vsri_n_u32(arg_u32x2, arg_u32x2, 1);
+	vsri_n_u32(arg_u32x2, arg_u32x2, 32);
+	vsri_n_u32(arg_u32x2, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_u32(arg_u32x2, arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_u32(arg_u32x4, arg_u32x4, 1);
+	vsriq_n_u32(arg_u32x4, arg_u32x4, 32);
+	vsriq_n_u32(arg_u32x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_u32(arg_u32x4, arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vsri_n_u64(arg_u64x1, arg_u64x1, 1);
+	vsri_n_u64(arg_u64x1, arg_u64x1, 64);
+	vsri_n_u64(arg_u64x1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_u64(arg_u64x1, arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_u64(arg_u64x2, arg_u64x2, 1);
+	vsriq_n_u64(arg_u64x2, arg_u64x2, 64);
+	vsriq_n_u64(arg_u64x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_u64(arg_u64x2, arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsrid_n_u64(arg_u64, arg_u64, 1);
+	vsrid_n_u64(arg_u64, arg_u64, 64);
+	vsrid_n_u64(arg_u64, arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsrid_n_u64(arg_u64, arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_p64(poly64x2_t arg_p64x2, poly64x1_t arg_p64x1) {
+	vsri_n_p64(arg_p64x1, arg_p64x1, 1);
+	vsri_n_p64(arg_p64x1, arg_p64x1, 64);
+	vsri_n_p64(arg_p64x1, arg_p64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_p64(arg_p64x1, arg_p64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_p64(arg_p64x2, arg_p64x2, 1);
+	vsriq_n_p64(arg_p64x2, arg_p64x2, 64);
+	vsriq_n_p64(arg_p64x2, arg_p64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_p64(arg_p64x2, arg_p64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vsri_n_p8(arg_p8x8, arg_p8x8, 1);
+	vsri_n_p8(arg_p8x8, arg_p8x8, 8);
+	vsri_n_p8(arg_p8x8, arg_p8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_p8(arg_p8x8, arg_p8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_p8(arg_p8x16, arg_p8x16, 1);
+	vsriq_n_p8(arg_p8x16, arg_p8x16, 8);
+	vsriq_n_p8(arg_p8x16, arg_p8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_p8(arg_p8x16, arg_p8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_p16(poly16x4_t arg_p16x4, poly16x8_t arg_p16x8) {
+	vsri_n_p16(arg_p16x4, arg_p16x4, 1);
+	vsri_n_p16(arg_p16x4, arg_p16x4, 16);
+	vsri_n_p16(arg_p16x4, arg_p16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_p16(arg_p16x4, arg_p16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_p16(arg_p16x8, arg_p16x8, 1);
+	vsriq_n_p16(arg_p16x8, arg_p16x8, 16);
+	vsriq_n_p16(arg_p16x8, arg_p16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_p16(arg_p16x8, arg_p16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-store.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-store.c
new file mode 100644
index 00000000000000..69469fd0d09cdd
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-store.c
@@ -0,0 +1,448 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// s8, s16, s32, and s64 variants are tested under 
+// clang/test/Sema/arm-neon-ranges.c 
+
+void test_store_u8(uint8_t* arg_u8_ptr, uint8x8x2_t arg_u8x8x2, uint8x16_t arg_u8x16,
+				   uint8x16x2_t arg_u8x16x2, uint8x16x4_t arg_u8x16x4, uint8x8_t arg_u8x8,
+				   uint8x16x3_t arg_u8x16x3, uint8x8x4_t arg_u8x8x4, uint8x8x3_t arg_u8x8x3) {
+	vst1_lane_u8(arg_u8_ptr, arg_u8x8, 0);
+	vst1_lane_u8(arg_u8_ptr, arg_u8x8, 7);
+	vst1_lane_u8(arg_u8_ptr, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_u8(arg_u8_ptr, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_u8(arg_u8_ptr, arg_u8x16, 0);
+	vst1q_lane_u8(arg_u8_ptr, arg_u8x16, 15);
+	vst1q_lane_u8(arg_u8_ptr, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_u8(arg_u8_ptr, arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_u8(arg_u8_ptr, arg_u8x8x2, 0);
+	vst2_lane_u8(arg_u8_ptr, arg_u8x8x2, 7);
+	vst2_lane_u8(arg_u8_ptr, arg_u8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_u8(arg_u8_ptr, arg_u8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_u8(arg_u8_ptr, arg_u8x8x3, 0);
+	vst3_lane_u8(arg_u8_ptr, arg_u8x8x3, 7);
+	vst3_lane_u8(arg_u8_ptr, arg_u8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_u8(arg_u8_ptr, arg_u8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_u8(arg_u8_ptr, arg_u8x8x4, 0);
+	vst4_lane_u8(arg_u8_ptr, arg_u8x8x4, 7);
+	vst4_lane_u8(arg_u8_ptr, arg_u8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_u8(arg_u8_ptr, arg_u8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 0);
+	vst2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 15);
+	vst2q_lane_u8(arg_u8_ptr, arg_u8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 0);
+	vst3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 15);
+	vst3q_lane_u8(arg_u8_ptr, arg_u8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 0);
+	vst4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 15);
+	vst4q_lane_u8(arg_u8_ptr, arg_u8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_u16(uint16_t* arg_u16_ptr, uint16x8x2_t arg_u16x8x2, uint16x8x3_t arg_u16x8x3,
+					uint16x4x3_t arg_u16x4x3, uint16x4x2_t arg_u16x4x2, uint16x4x4_t arg_u16x4x4,
+					uint16x8x4_t arg_u16x8x4, uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vst1_lane_u16(arg_u16_ptr, arg_u16x4, 0);
+	vst1_lane_u16(arg_u16_ptr, arg_u16x4, 3);
+	vst1_lane_u16(arg_u16_ptr, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_u16(arg_u16_ptr, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_u16(arg_u16_ptr, arg_u16x8, 0);
+	vst1q_lane_u16(arg_u16_ptr, arg_u16x8, 7);
+	vst1q_lane_u16(arg_u16_ptr, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_u16(arg_u16_ptr, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_u16(arg_u16_ptr, arg_u16x4x2, 0);
+	vst2_lane_u16(arg_u16_ptr, arg_u16x4x2, 3);
+	vst2_lane_u16(arg_u16_ptr, arg_u16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_u16(arg_u16_ptr, arg_u16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 0);
+	vst2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 7);
+	vst2q_lane_u16(arg_u16_ptr, arg_u16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_u16(arg_u16_ptr, arg_u16x4x3, 0);
+	vst3_lane_u16(arg_u16_ptr, arg_u16x4x3, 3);
+	vst3_lane_u16(arg_u16_ptr, arg_u16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_u16(arg_u16_ptr, arg_u16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 0);
+	vst3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 7);
+	vst3q_lane_u16(arg_u16_ptr, arg_u16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_u16(arg_u16_ptr, arg_u16x4x4, 0);
+	vst4_lane_u16(arg_u16_ptr, arg_u16x4x4, 3);
+	vst4_lane_u16(arg_u16_ptr, arg_u16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_u16(arg_u16_ptr, arg_u16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 0);
+	vst4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 7);
+	vst4q_lane_u16(arg_u16_ptr, arg_u16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_u32(uint32x2x3_t arg_u32x2x3, uint32_t* arg_u32_ptr, uint32x4x3_t arg_u32x4x3,
+					uint32x4_t arg_u32x4, uint32x2x4_t arg_u32x2x4, uint32x4x4_t arg_u32x4x4,
+					uint32x2_t arg_u32x2, uint32x2x2_t arg_u32x2x2, uint32x4x2_t arg_u32x4x2) {
+	vst1_lane_u32(arg_u32_ptr, arg_u32x2, 0);
+	vst1_lane_u32(arg_u32_ptr, arg_u32x2, 1);
+	vst1_lane_u32(arg_u32_ptr, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_u32(arg_u32_ptr, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_u32(arg_u32_ptr, arg_u32x4, 0);
+	vst1q_lane_u32(arg_u32_ptr, arg_u32x4, 3);
+	vst1q_lane_u32(arg_u32_ptr, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_u32(arg_u32_ptr, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_u32(arg_u32_ptr, arg_u32x2x2, 0);
+	vst2_lane_u32(arg_u32_ptr, arg_u32x2x2, 1);
+	vst2_lane_u32(arg_u32_ptr, arg_u32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_u32(arg_u32_ptr, arg_u32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 0);
+	vst2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 3);
+	vst2q_lane_u32(arg_u32_ptr, arg_u32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_u32(arg_u32_ptr, arg_u32x2x3, 0);
+	vst3_lane_u32(arg_u32_ptr, arg_u32x2x3, 1);
+	vst3_lane_u32(arg_u32_ptr, arg_u32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_u32(arg_u32_ptr, arg_u32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 0);
+	vst3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 3);
+	vst3q_lane_u32(arg_u32_ptr, arg_u32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_u32(arg_u32_ptr, arg_u32x2x4, 0);
+	vst4_lane_u32(arg_u32_ptr, arg_u32x2x4, 1);
+	vst4_lane_u32(arg_u32_ptr, arg_u32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_u32(arg_u32_ptr, arg_u32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 0);
+	vst4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 3);
+	vst4q_lane_u32(arg_u32_ptr, arg_u32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_u64(uint64x1_t arg_u64x1, uint64x2_t arg_u64x2, uint64x2x4_t arg_u64x2x4,
+					uint64x1x4_t arg_u64x1x4, uint64_t* arg_u64_ptr, uint64x1x3_t arg_u64x1x3,
+					uint64x2x2_t arg_u64x2x2, uint64x2x3_t arg_u64x2x3, uint64x1x2_t arg_u64x1x2) {
+	vst1_lane_u64(arg_u64_ptr, arg_u64x1, 0);
+	vst1_lane_u64(arg_u64_ptr, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_u64(arg_u64_ptr, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_u64(arg_u64_ptr, arg_u64x2, 0);
+	vst1q_lane_u64(arg_u64_ptr, arg_u64x2, 1);
+	vst1q_lane_u64(arg_u64_ptr, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_u64(arg_u64_ptr, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_u64(arg_u64_ptr, arg_u64x1x2, 0);
+	vst2_lane_u64(arg_u64_ptr, arg_u64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_u64(arg_u64_ptr, arg_u64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 0);
+	vst2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 1);
+	vst2q_lane_u64(arg_u64_ptr, arg_u64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_u64(arg_u64_ptr, arg_u64x1x3, 0);
+	vst3_lane_u64(arg_u64_ptr, arg_u64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_u64(arg_u64_ptr, arg_u64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 0);
+	vst3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 1);
+	vst3q_lane_u64(arg_u64_ptr, arg_u64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_u64(arg_u64_ptr, arg_u64x1x4, 0);
+	vst4_lane_u64(arg_u64_ptr, arg_u64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_u64(arg_u64_ptr, arg_u64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 0);
+	vst4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 1);
+	vst4q_lane_u64(arg_u64_ptr, arg_u64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_p64(poly64_t* arg_p64_ptr, poly64x1x4_t arg_p64x1x4, poly64x2x3_t arg_p64x2x3,
+					poly64x1x2_t arg_p64x1x2, poly64x2x4_t arg_p64x2x4, poly64x2_t arg_p64x2,
+					poly64x1x3_t arg_p64x1x3, poly64x1_t arg_p64x1, poly64x2x2_t arg_p64x2x2) {
+	vst1_lane_p64(arg_p64_ptr, arg_p64x1, 0);
+	vst1_lane_p64(arg_p64_ptr, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_p64(arg_p64_ptr, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_p64(arg_p64_ptr, arg_p64x2, 0);
+	vst1q_lane_p64(arg_p64_ptr, arg_p64x2, 1);
+	vst1q_lane_p64(arg_p64_ptr, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_p64(arg_p64_ptr, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_p64(arg_p64_ptr, arg_p64x1x2, 0);
+	vst2_lane_p64(arg_p64_ptr, arg_p64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_p64(arg_p64_ptr, arg_p64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 0);
+	vst2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 1);
+	vst2q_lane_p64(arg_p64_ptr, arg_p64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_p64(arg_p64_ptr, arg_p64x1x3, 0);
+	vst3_lane_p64(arg_p64_ptr, arg_p64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_p64(arg_p64_ptr, arg_p64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 0);
+	vst3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 1);
+	vst3q_lane_p64(arg_p64_ptr, arg_p64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_p64(arg_p64_ptr, arg_p64x1x4, 0);
+	vst4_lane_p64(arg_p64_ptr, arg_p64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_p64(arg_p64_ptr, arg_p64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 0);
+	vst4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 1);
+	vst4q_lane_p64(arg_p64_ptr, arg_p64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_f16(float16x4x2_t arg_f16x4x2, float16x8x3_t arg_f16x8x3, float16x8_t arg_f16x8,
+					float16x4_t arg_f16x4, float16x4x3_t arg_f16x4x3, float16x8x4_t arg_f16x8x4,
+					float16x8x2_t arg_f16x8x2, float16_t* arg_f16_ptr, float16x4x4_t arg_f16x4x4) {
+	vst1_lane_f16(arg_f16_ptr, arg_f16x4, 0);
+	vst1_lane_f16(arg_f16_ptr, arg_f16x4, 3);
+	vst1_lane_f16(arg_f16_ptr, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_f16(arg_f16_ptr, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_f16(arg_f16_ptr, arg_f16x8, 0);
+	vst1q_lane_f16(arg_f16_ptr, arg_f16x8, 7);
+	vst1q_lane_f16(arg_f16_ptr, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_f16(arg_f16_ptr, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_f16(arg_f16_ptr, arg_f16x4x2, 0);
+	vst2_lane_f16(arg_f16_ptr, arg_f16x4x2, 3);
+	vst2_lane_f16(arg_f16_ptr, arg_f16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_f16(arg_f16_ptr, arg_f16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 0);
+	vst2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 7);
+	vst2q_lane_f16(arg_f16_ptr, arg_f16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_f16(arg_f16_ptr, arg_f16x4x3, 0);
+	vst3_lane_f16(arg_f16_ptr, arg_f16x4x3, 3);
+	vst3_lane_f16(arg_f16_ptr, arg_f16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_f16(arg_f16_ptr, arg_f16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 0);
+	vst3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 7);
+	vst3q_lane_f16(arg_f16_ptr, arg_f16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_f16(arg_f16_ptr, arg_f16x4x4, 0);
+	vst4_lane_f16(arg_f16_ptr, arg_f16x4x4, 3);
+	vst4_lane_f16(arg_f16_ptr, arg_f16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_f16(arg_f16_ptr, arg_f16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 0);
+	vst4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 7);
+	vst4q_lane_f16(arg_f16_ptr, arg_f16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_f32(float32x2x4_t arg_f32x2x4, float32x4x4_t arg_f32x4x4, float32x4x3_t arg_f32x4x3,
+					float32x4x2_t arg_f32x4x2, float32_t* arg_f32_ptr, float32x4_t arg_f32x4,
+					float32x2_t arg_f32x2, float32x2x2_t arg_f32x2x2, float32x2x3_t arg_f32x2x3) {
+	vst1_lane_f32(arg_f32_ptr, arg_f32x2, 0);
+	vst1_lane_f32(arg_f32_ptr, arg_f32x2, 1);
+	vst1_lane_f32(arg_f32_ptr, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_f32(arg_f32_ptr, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_f32(arg_f32_ptr, arg_f32x4, 0);
+	vst1q_lane_f32(arg_f32_ptr, arg_f32x4, 3);
+	vst1q_lane_f32(arg_f32_ptr, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_f32(arg_f32_ptr, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_f32(arg_f32_ptr, arg_f32x2x2, 0);
+	vst2_lane_f32(arg_f32_ptr, arg_f32x2x2, 1);
+	vst2_lane_f32(arg_f32_ptr, arg_f32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_f32(arg_f32_ptr, arg_f32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 0);
+	vst2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 3);
+	vst2q_lane_f32(arg_f32_ptr, arg_f32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_f32(arg_f32_ptr, arg_f32x2x3, 0);
+	vst3_lane_f32(arg_f32_ptr, arg_f32x2x3, 1);
+	vst3_lane_f32(arg_f32_ptr, arg_f32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_f32(arg_f32_ptr, arg_f32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 0);
+	vst3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 3);
+	vst3q_lane_f32(arg_f32_ptr, arg_f32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_f32(arg_f32_ptr, arg_f32x2x4, 0);
+	vst4_lane_f32(arg_f32_ptr, arg_f32x2x4, 1);
+	vst4_lane_f32(arg_f32_ptr, arg_f32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_f32(arg_f32_ptr, arg_f32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 0);
+	vst4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 3);
+	vst4q_lane_f32(arg_f32_ptr, arg_f32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_p8(poly8x16_t arg_p8x16, poly8x16x4_t arg_p8x16x4, poly8x8_t arg_p8x8,
+				   poly8x16x2_t arg_p8x16x2, poly8x8x4_t arg_p8x8x4, poly8x16x3_t arg_p8x16x3,
+				   poly8_t* arg_p8_ptr, poly8x8x3_t arg_p8x8x3, poly8x8x2_t arg_p8x8x2) {
+	vst1_lane_p8(arg_p8_ptr, arg_p8x8, 0);
+	vst1_lane_p8(arg_p8_ptr, arg_p8x8, 7);
+	vst1_lane_p8(arg_p8_ptr, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_p8(arg_p8_ptr, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_p8(arg_p8_ptr, arg_p8x16, 0);
+	vst1q_lane_p8(arg_p8_ptr, arg_p8x16, 15);
+	vst1q_lane_p8(arg_p8_ptr, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_p8(arg_p8_ptr, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_p8(arg_p8_ptr, arg_p8x8x2, 0);
+	vst2_lane_p8(arg_p8_ptr, arg_p8x8x2, 7);
+	vst2_lane_p8(arg_p8_ptr, arg_p8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_p8(arg_p8_ptr, arg_p8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_p8(arg_p8_ptr, arg_p8x8x3, 0);
+	vst3_lane_p8(arg_p8_ptr, arg_p8x8x3, 7);
+	vst3_lane_p8(arg_p8_ptr, arg_p8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_p8(arg_p8_ptr, arg_p8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_p8(arg_p8_ptr, arg_p8x8x4, 0);
+	vst4_lane_p8(arg_p8_ptr, arg_p8x8x4, 7);
+	vst4_lane_p8(arg_p8_ptr, arg_p8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_p8(arg_p8_ptr, arg_p8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 0);
+	vst2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 15);
+	vst2q_lane_p8(arg_p8_ptr, arg_p8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 0);
+	vst3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 15);
+	vst3q_lane_p8(arg_p8_ptr, arg_p8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 0);
+	vst4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 15);
+	vst4q_lane_p8(arg_p8_ptr, arg_p8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_p16(poly16x4_t arg_p16x4, poly16x4x3_t arg_p16x4x3, poly16x4x2_t arg_p16x4x2,
+					poly16x8x2_t arg_p16x8x2, poly16x8x3_t arg_p16x8x3, poly16_t* arg_p16_ptr,
+					poly16x8x4_t arg_p16x8x4, poly16x8_t arg_p16x8, poly16x4x4_t arg_p16x4x4) {
+	vst1_lane_p16(arg_p16_ptr, arg_p16x4, 0);
+	vst1_lane_p16(arg_p16_ptr, arg_p16x4, 3);
+	vst1_lane_p16(arg_p16_ptr, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_p16(arg_p16_ptr, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_p16(arg_p16_ptr, arg_p16x8, 0);
+	vst1q_lane_p16(arg_p16_ptr, arg_p16x8, 7);
+	vst1q_lane_p16(arg_p16_ptr, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_p16(arg_p16_ptr, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_p16(arg_p16_ptr, arg_p16x4x2, 0);
+	vst2_lane_p16(arg_p16_ptr, arg_p16x4x2, 3);
+	vst2_lane_p16(arg_p16_ptr, arg_p16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_p16(arg_p16_ptr, arg_p16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 0);
+	vst2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 7);
+	vst2q_lane_p16(arg_p16_ptr, arg_p16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_p16(arg_p16_ptr, arg_p16x4x3, 0);
+	vst3_lane_p16(arg_p16_ptr, arg_p16x4x3, 3);
+	vst3_lane_p16(arg_p16_ptr, arg_p16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_p16(arg_p16_ptr, arg_p16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 0);
+	vst3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 7);
+	vst3q_lane_p16(arg_p16_ptr, arg_p16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_p16(arg_p16_ptr, arg_p16x4x4, 0);
+	vst4_lane_p16(arg_p16_ptr, arg_p16x4x4, 3);
+	vst4_lane_p16(arg_p16_ptr, arg_p16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_p16(arg_p16_ptr, arg_p16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 0);
+	vst4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 7);
+	vst4q_lane_p16(arg_p16_ptr, arg_p16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_f64(float64x1x2_t arg_f64x1x2, float64x1_t arg_f64x1, float64x2x2_t arg_f64x2x2,
+					float64x1x3_t arg_f64x1x3, float64x2x3_t arg_f64x2x3, float64x2_t arg_f64x2,
+					float64_t* arg_f64_ptr, float64x1x4_t arg_f64x1x4, float64x2x4_t arg_f64x2x4) {
+	vst1_lane_f64(arg_f64_ptr, arg_f64x1, 0);
+	vst1_lane_f64(arg_f64_ptr, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_f64(arg_f64_ptr, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_f64(arg_f64_ptr, arg_f64x2, 0);
+	vst1q_lane_f64(arg_f64_ptr, arg_f64x2, 1);
+	vst1q_lane_f64(arg_f64_ptr, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_f64(arg_f64_ptr, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_f64(arg_f64_ptr, arg_f64x1x2, 0);
+	vst2_lane_f64(arg_f64_ptr, arg_f64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_f64(arg_f64_ptr, arg_f64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	// Differs from current innacurate ACLE spec -----
+	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 0);
+	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 1);
+	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	// ----------------------------
+	
+	vst3_lane_f64(arg_f64_ptr, arg_f64x1x3, 0);
+	vst3_lane_f64(arg_f64_ptr, arg_f64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_f64(arg_f64_ptr, arg_f64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 0);
+	vst3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 1);
+	vst3q_lane_f64(arg_f64_ptr, arg_f64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_f64(arg_f64_ptr, arg_f64x1x4, 0);
+	vst4_lane_f64(arg_f64_ptr, arg_f64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_f64(arg_f64_ptr, arg_f64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 0);
+	vst4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 1);
+	vst4q_lane_f64(arg_f64_ptr, arg_f64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+

>From 6e00d04ee67dc4e5f6eb93c8c7e19b064d777c64 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 21 Aug 2024 10:23:39 +0000
Subject: [PATCH 07/13] Remove redundant immrange flags from NEON instructions

- Changes
	- clang/include/clang/Basic/arm_immcheck_incl.td
	- clang/include/clang/Basic/arm_neon.td
		- Restore immediate range of vcmla{_ROT}_laneq_f16 intrinsics to [0..3]
	- clang/include/clang/Basic/arm_fp16.td
	- clang/include/clang/Basic/arm_neon_incl.td
	- clang/utils/TableGen/NeonEmitter.cpp
		- The flags 'isVXAR', 'isVCVT_N', and 'isLaneQ' erve no justifiable purpose
	          following the work of this patch, and so are removed from NEON instructions.

	- clang/include/clang/Basic/arm_immcheck_incl.td
	- clang/utils/TableGen/SveEmitter.cpp
		- The fields of the ImmCheck tablegen class are also renamed to better explain
		  their purposes.
		- The description of ImmCheckTypes is updated to reflect that we are now sharing
		  them with NEON.
	- clang/test/CodeGen/aarch64-neon-vcmla.c
		- Revert file main following resolution of vcmla{_ROT}_laneq_f16 range
---
 clang/include/clang/Basic/arm_fp16.td         |   5 +-
 .../include/clang/Basic/arm_immcheck_incl.td  |  12 +-
 clang/include/clang/Basic/arm_neon.td         | 271 ++------
 clang/include/clang/Basic/arm_neon_incl.td    |   6 -
 clang/test/CodeGen/aarch64-neon-vcmla.c       | 624 +++++++-----------
 clang/utils/TableGen/NeonEmitter.cpp          |   9 +-
 clang/utils/TableGen/SveEmitter.cpp           |  14 +-
 7 files changed, 323 insertions(+), 618 deletions(-)

diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td
index 81d257fc73033e..ed26e84af075ed 100644
--- a/clang/include/clang/Basic/arm_fp16.td
+++ b/clang/include/clang/Basic/arm_fp16.td
@@ -76,12 +76,13 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def SCALAR_FCVTPUH  : SInst<"vcvtp_u16", "(1U)1", "Sh">;
   def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "(1U>)1", "Sh">;
   def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "(1U>>)1", "Sh">;
-  let isVCVT_N = 1, ImmChecks = [ImmCheck<1, ImmCheck1_16>] in {
+  let ImmChecks = [ImmCheck<1, ImmCheck1_16>] in {
     def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "(1F)(1!)I", "sUs">;
     def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "(1F<)(1!)I", "iUi">;
     def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "(1F<<)(1!)I", "lUl">;
-    def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "(1S)1I", "Sh">;
   }
+    def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "(1S)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
     def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "(1S>)1I", "Sh",
                                 [ImmCheck<1, ImmCheckCvt, 0>]>;
     def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "(1S>>)1I", "Sh",
diff --git a/clang/include/clang/Basic/arm_immcheck_incl.td b/clang/include/clang/Basic/arm_immcheck_incl.td
index 88440532799839..c6a7ca26855496 100644
--- a/clang/include/clang/Basic/arm_immcheck_incl.td
+++ b/clang/include/clang/Basic/arm_immcheck_incl.td
@@ -10,10 +10,10 @@ def ImmCheckShiftRight          : ImmCheckType<3>;  // 1..sizeinbits(elt)
 def ImmCheckShiftRightNarrow    : ImmCheckType<4>;  // 1..sizeinbits(elt)/2
 def ImmCheckShiftLeft           : ImmCheckType<5>;  // 0..(sizeinbits(elt) - 1)
 def ImmCheck0_7                 : ImmCheckType<6>;  // 0..7
-def ImmCheckLaneIndex           : ImmCheckType<7>;  // 0..(128/(1*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndex           : ImmCheckType<7>;  // 0..(sizeinbits(vec)/(sizeinbits(elt)) - 1)
 def ImmCheckCvt                 : ImmCheckType<8>;  // 1..sizeinbits(elt) (same as ShiftRight)
-def ImmCheckLaneIndexCompRotate : ImmCheckType<9>;  // 0..(128/(2*sizeinbits(elt)) - 1)
-def ImmCheckLaneIndexDot        : ImmCheckType<10>; // 0..(128/(4*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexCompRotate : ImmCheckType<9>;  // 0..(sizeinbits(vec)/(2*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexDot        : ImmCheckType<10>; // 0..(sizeinbits(vec)/(4*sizeinbits(elt)) - 1)
 def ImmCheckComplexRot90_270    : ImmCheckType<11>; // [90,270]
 def ImmCheckComplexRotAll90     : ImmCheckType<12>; // [0, 90, 180,270]
 def ImmCheck0_13                : ImmCheckType<13>; // 0..13
@@ -31,9 +31,9 @@ def ImmCheck1_32                : ImmCheckType<24>; // 1..32
 def ImmCheck1_64                : ImmCheckType<25>; // 1..64
 def ImmCheck0_63                : ImmCheckType<26>; // 0..63
 
-class ImmCheck<int arg, ImmCheckType kind, int typeArg = -1> {
-  int Arg = arg;
+class ImmCheck<int immArgIdx, ImmCheckType kind, int typeArgIdx = -1> {
+  int ImmArgIdx = immArgIdx;
   // The index of the argument whose type should be referred to when validating this immedaite.
-  int TypeContextArg = typeArg;
+  int TypeContextArgIdx = typeArgIdx;
   ImmCheckType Kind = kind;
 }
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index c77b428c3e92b5..89964ad0f4bc39 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -288,16 +288,13 @@ def SPLAT  : WInst<"splat_lane", ".(!q)I",
                     [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
                    "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl", 
-                   [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
-  let isLaneQ = 1;
-}
+                   [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+
 let TargetGuard = "bf16,neon" in {
   def SPLAT_BF  : WInst<"splat_lane", ".(!q)I", "bQb", 
                       [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
   def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb", 
-                      [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
-    let isLaneQ = 1;
-  }
+                      [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -612,14 +609,12 @@ let ArchGuard = "(__ARM_FP & 2)" in {
 def VCVT_S32     : SInst<"vcvt_s32", "S.",  "fQf">;
 def VCVT_U32     : SInst<"vcvt_u32", "U.",  "fQf">;
 def VCVT_F32     : SInst<"vcvt_f32", "F(.!)",  "iUiQiQUi">;
-let isVCVT_N = 1 in {
 def VCVT_N_S32   : SInst<"vcvt_n_s32", "S.I", "fQf", 
                         [ImmCheck<1, ImmCheck1_32>]>;
 def VCVT_N_U32   : SInst<"vcvt_n_u32", "U.I", "fQf", 
                         [ImmCheck<1, ImmCheck1_32>]>;
 def VCVT_N_F32   : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi", 
                         [ImmCheck<1, ImmCheck1_32>]>;
-}
 
 def VMOVN        : IInst<"vmovn", "<Q",  "silUsUiUl">;
 def VMOVL        : SInst<"vmovl", "(>Q).",  "csiUcUsUi">;
@@ -990,14 +985,12 @@ def QRSHRN_HIGH_N  : SOpInst<"vqrshrn_high_n", "<(<q).I",
 // Converting vectors
 def VMOVL_HIGH   : SOpInst<"vmovl_high", ">.", "HcHsHiHUcHUsHUi", OP_MOVL_HI>;
 
-let isVCVT_N = 1 in {
 def CVTF_N_F64   : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl", 
                         [ImmCheck<1, ImmCheck1_64>]>;
 def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd", 
                         [ImmCheck<1, ImmCheck1_64>]>;
 def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd", 
                         [ImmCheck<1, ImmCheck1_64>]>;
-}
 
 ////////////////////////////////////////////////////////////////////////////////
 // 3VDiff class using high 64-bit in operands
@@ -1044,22 +1037,16 @@ def COPY_LANE : IOpInst<"vcopy_lane", "..I.I",
 def COPYQ_LANE : IOpInst<"vcopy_lane", "..IqI",
                         "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>;
 def COPY_LANEQ : IOpInst<"vcopy_laneq", "..IQI",
-                     "csilPcPsPlUcUsUiUlfd", OP_COPY_LN> {
-  let isLaneQ = 1;
-}
+                     "csilPcPsPlUcUsUiUlfd", OP_COPY_LN>;
 def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "..I.I",
-                     "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN> {
-  let isLaneQ = 1;
-}
+                     "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Set all lanes to same value
 def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "dQdPlQPl", OP_DUP_LN>;
 def VDUP_LANE2: WOpInst<"vdup_laneq", ".QI",
                   "csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
-                        OP_DUP_LN> {
-  let isLaneQ = 1;
-}
+                        OP_DUP_LN>;
 def DUP_N   : WOpInst<"vdup_n", ".1", "dQdPlQPl", OP_DUP>;
 def MOV_N   : WOpInst<"vmov_n", ".1", "dQdPlQPl", OP_DUP>;
 
@@ -1075,62 +1062,36 @@ def CREATE : NoTestOpInst<"vcreate", ".(IU>)", "dPl", OP_CAST> {
 ////////////////////////////////////////////////////////////////////////////////
 
 def VMLA_LANEQ   : IOpInst<"vmla_laneq", "...QI",
-                           "siUsUifQsQiQUsQUiQf", OP_MLA_LN> {
-  let isLaneQ = 1;
-}
+                           "siUsUifQsQiQUsQUiQf", OP_MLA_LN>;
 def VMLS_LANEQ   : IOpInst<"vmls_laneq", "...QI",
-                           "siUsUifQsQiQUsQUiQf", OP_MLS_LN> {
-  let isLaneQ = 1;
-}
-
+                           "siUsUifQsQiQUsQUiQf", OP_MLS_LN>;
 def VFMA_LANE    : IInst<"vfma_lane", "...qI", "fdQfQd", 
                         [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 def VFMA_LANEQ   : IInst<"vfma_laneq", "...QI", "fdQfQd", 
-                        [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
-  let isLaneQ = 1;
-}
+                        [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 def VFMS_LANE    : IOpInst<"vfms_lane", "...qI", "fdQfQd", OP_FMS_LN>;
-def VFMS_LANEQ   : IOpInst<"vfms_laneq", "...QI", "fdQfQd", OP_FMS_LNQ> {
-  let isLaneQ = 1;
-}
+def VFMS_LANEQ   : IOpInst<"vfms_laneq", "...QI", "fdQfQd", OP_FMS_LNQ>;
 
-def VMLAL_LANEQ  : SOpInst<"vmlal_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLAL_LN> {
-  let isLaneQ = 1;
-}
+def VMLAL_LANEQ  : SOpInst<"vmlal_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLAL_LN>;
 def VMLAL_HIGH_LANE   : SOpInst<"vmlal_high_lane", "(>Q)(>Q)Q.I", "siUsUi",
                                 OP_MLALHi_LN>;
 def VMLAL_HIGH_LANEQ  : SOpInst<"vmlal_high_laneq", "(>Q)(>Q)QQI", "siUsUi",
-                                OP_MLALHi_LN> {
-  let isLaneQ = 1;
-}
-def VMLSL_LANEQ  : SOpInst<"vmlsl_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLSL_LN> {
-  let isLaneQ = 1;
-}
+                                OP_MLALHi_LN>;
+def VMLSL_LANEQ  : SOpInst<"vmlsl_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLSL_LN>;
 def VMLSL_HIGH_LANE   : SOpInst<"vmlsl_high_lane", "(>Q)(>Q)Q.I", "siUsUi",
                                 OP_MLSLHi_LN>;
 def VMLSL_HIGH_LANEQ  : SOpInst<"vmlsl_high_laneq", "(>Q)(>Q)QQI", "siUsUi",
-                                OP_MLSLHi_LN> {
-  let isLaneQ = 1;
-}
-
-def VQDMLAL_LANEQ  : SOpInst<"vqdmlal_laneq", "(>Q)(>Q).QI", "si", OP_QDMLAL_LN> {
-  let isLaneQ = 1;
-}
+                                OP_MLSLHi_LN>;
+def VQDMLAL_LANEQ  : SOpInst<"vqdmlal_laneq", "(>Q)(>Q).QI", "si", OP_QDMLAL_LN>;
 def VQDMLAL_HIGH_LANE   : SOpInst<"vqdmlal_high_lane", "(>Q)(>Q)Q.I", "si",
                                 OP_QDMLALHi_LN>;
 def VQDMLAL_HIGH_LANEQ  : SOpInst<"vqdmlal_high_laneq", "(>Q)(>Q)QQI", "si",
-                                OP_QDMLALHi_LN> {
-  let isLaneQ = 1;
-}
-def VQDMLSL_LANEQ  : SOpInst<"vqdmlsl_laneq", "(>Q)(>Q).QI", "si", OP_QDMLSL_LN> {
-  let isLaneQ = 1;
-}
+                                OP_QDMLALHi_LN>;
+def VQDMLSL_LANEQ  : SOpInst<"vqdmlsl_laneq", "(>Q)(>Q).QI", "si", OP_QDMLSL_LN>;
 def VQDMLSL_HIGH_LANE   : SOpInst<"vqdmlsl_high_lane", "(>Q)(>Q)Q.I", "si",
                                 OP_QDMLSLHi_LN>;
 def VQDMLSL_HIGH_LANEQ  : SOpInst<"vqdmlsl_high_laneq", "(>Q)(>Q)QQI", "si",
-                                OP_QDMLSLHi_LN> {
-  let isLaneQ = 1;
-}
+                                OP_QDMLSLHi_LN>;
 
 // Newly add double parameter for vmul_lane in aarch64
 // Note: d type is handled by SCALAR_VMUL_LANE
@@ -1138,50 +1099,31 @@ def VMUL_LANE_A64 : IOpInst<"vmul_lane", "..qI", "Qd", OP_MUL_LN>;
 
 // Note: d type is handled by SCALAR_VMUL_LANEQ
 def VMUL_LANEQ   : IOpInst<"vmul_laneq", "..QI",
-                           "sifUsUiQsQiQUsQUiQfQd", OP_MUL_LN> {
-  let isLaneQ = 1;
-}
-def VMULL_LANEQ  : SOpInst<"vmull_laneq", "(>Q).QI", "siUsUi", OP_MULL_LN> {
-  let isLaneQ = 1;
-}
+                           "sifUsUiQsQiQUsQUiQfQd", OP_MUL_LN>;
+def VMULL_LANEQ  : SOpInst<"vmull_laneq", "(>Q).QI", "siUsUi", OP_MULL_LN>;
 def VMULL_HIGH_LANE   : SOpInst<"vmull_high_lane", "(>Q)Q.I", "siUsUi",
                                 OP_MULLHi_LN>;
 def VMULL_HIGH_LANEQ  : SOpInst<"vmull_high_laneq", "(>Q)QQI", "siUsUi",
-                                OP_MULLHi_LN> {
-  let isLaneQ = 1;
-}
-
-def VQDMULL_LANEQ  : SOpInst<"vqdmull_laneq", "(>Q).QI", "si", OP_QDMULL_LN> {
-  let isLaneQ = 1;
-}
+                                OP_MULLHi_LN>;
+def VQDMULL_LANEQ  : SOpInst<"vqdmull_laneq", "(>Q).QI", "si", OP_QDMULL_LN>;
 def VQDMULL_HIGH_LANE   : SOpInst<"vqdmull_high_lane", "(>Q)Q.I", "si",
                                   OP_QDMULLHi_LN>;
 def VQDMULL_HIGH_LANEQ  : SOpInst<"vqdmull_high_laneq", "(>Q)QQI", "si",
-                                  OP_QDMULLHi_LN> {
-  let isLaneQ = 1;
-}
-
-let isLaneQ = 1 in {
+                                  OP_QDMULLHi_LN>;
 def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi", 
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi", 
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-}
+
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a,neon" in {
-def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN> {
-  let isLaneQ = 1;
-}
-def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", OP_QRDMLSH_LN> {
-  let isLaneQ = 1;
-}
+def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN>;
+def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", OP_QRDMLSH_LN>;
 } // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a"
 
 // Note: d type implemented by SCALAR_VMULX_LANE
 def VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "fQfQd", OP_MULX_LN>;
 // Note: d type is implemented by SCALAR_VMULX_LANEQ
-def VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "fQfQd", OP_MULX_LN> {
-  let isLaneQ = 1;
-}
+def VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "fQfQd", OP_MULX_LN>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Across vectors class
@@ -1224,11 +1166,8 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "s
 def BCAX : SInst<"vbcax", "....", "QUcQUsQUiQUlQcQsQiQl">;
 def EOR3 : SInst<"veor3", "....", "QUcQUsQUiQUlQcQsQiQl">;
 def RAX1 : SInst<"vrax1", "...", "QUl">;
-
-let isVXAR = 1 in {
 def XAR :  SInst<"vxar", "...I", "QUl", [ImmCheck<2, ImmCheck0_63>]>;
 }
-}
 
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sha3,neon" in {
 def SHA512SU0 : SInst<"vsha512su0", "...", "QUl">;
@@ -1658,15 +1597,11 @@ def SCALAR_UQXTN : SInst<"vqmovn", "(1<)1", "SUsSUiSUl">;
 
 // Scalar Floating Point  multiply (scalar, by element)
 def SCALAR_FMUL_LANE : IOpInst<"vmul_lane", "11.I", "SfSd", OP_SCALAR_MUL_LN>;
-def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "11QI", "SfSd", OP_SCALAR_MUL_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "11QI", "SfSd", OP_SCALAR_MUL_LN>;
 
 // Scalar Floating Point  multiply extended (scalar, by element)
 def SCALAR_FMULX_LANE : IOpInst<"vmulx_lane", "11.I", "SfSd", OP_SCALAR_MULX_LN>;
-def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_LN>;
 
 def SCALAR_VMUL_N : IInst<"vmul_n", "..1", "d">;
 
@@ -1676,86 +1611,58 @@ def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d",
 
 // VMUL_LANEQ d type implemented using scalar mul lane
 def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d", 
-                              [ImmCheck<2, ImmCheckLaneIndex, 1>]> {
-  let isLaneQ = 1;
-}
-
+                              [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 // VMULX_LANE d type implemented using scalar vmulx_lane
 def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "d", OP_SCALAR_VMULX_LN>;
 
 // VMULX_LANEQ d type implemented using scalar vmulx_laneq
-def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ> {
-  let isLaneQ = 1;
-}
-
+def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ>;
 // Scalar Floating Point fused multiply-add (scalar, by element)
 def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd", 
                             [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd", 
-                            [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
-  let isLaneQ = 1;
-}
+                            [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
 // Scalar Floating Point fused multiply-subtract (scalar, by element)
 def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "111.I", "SfSd", OP_FMS_LN>;
-def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "111QI", "SfSd", OP_FMS_LNQ> {
-  let isLaneQ = 1;
-}
+def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "111QI", "SfSd", OP_FMS_LNQ>;
 
 // Signed Saturating Doubling Multiply Long (scalar by element)
 def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "(1>)1.I", "SsSi", OP_SCALAR_QDMULL_LN>;
-def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR_QDMULL_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR_QDMULL_LN>;
 
 // Signed Saturating Doubling Multiply-Add Long (scalar by element)
 def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi", 
                                 [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
 def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi", 
-                                [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
-  let isLaneQ = 1;
-}
+                                [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
 // Signed Saturating Doubling Multiply-Subtract Long (scalar by element)
 def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi", 
                               [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
 def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi", 
-                              [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
-  let isLaneQ = 1;
-}
-
+                              [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 // Scalar Integer Saturating Doubling Multiply Half High (scalar by element)
 def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "11.I", "SsSi", OP_SCALAR_QDMULH_LN>;
-def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QDMULH_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QDMULH_LN>;
 
 // Scalar Integer Saturating Rounding Doubling Multiply Half High
 def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "11.I", "SsSi", OP_SCALAR_QRDMULH_LN>;
-def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QRDMULH_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QRDMULH_LN>;
 
 let TargetGuard = "v8.1a,neon" in {
 // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
 def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "111.I", "SsSi", OP_SCALAR_QRDMLAH_LN>;
-def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLAH_LN> {
-  let isLaneQ = 1;
-}
-
+def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLAH_LN>;
 // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half
 def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "111.I", "SsSi", OP_SCALAR_QRDMLSH_LN>;
-def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN>;
 } // TargetGuard = "v8.1a"
 
 def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
                             [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
-                            [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
-  let isLaneQ = 1;
-}
+                            [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
 } // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)"
 
@@ -1825,14 +1732,12 @@ let TargetGuard = "fullfp16,neon" in {
     def VCLTH      : SOpInst<"vclt", "U..", "hQh", OP_LT>;
 
   // Vector conversion
-  let isVCVT_N = 1 in {
     def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs", 
                           [ImmCheck<1, ImmCheck1_16>]>;
     def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh", 
                           [ImmCheck<1, ImmCheck1_16>]>;
     def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh", 
                           [ImmCheck<1, ImmCheck1_16>]>;
-  }
 
   // Max/Min
   def VMAXH         : SInst<"vmax", "...", "hQh">;
@@ -1913,9 +1818,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def VFMA_LANEH   : IInst<"vfma_lane", "...qI", "hQh", 
                           [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
   def VFMA_LANEQH  : IInst<"vfma_laneq", "...QI", "hQh", 
-                          [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
-    let isLaneQ = 1;
-  }
+                          [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
   // FMA lane with scalar argument
   def FMLA_NH      : SOpInst<"vfma_n", "...1", "hQh", OP_FMLA_N>;
@@ -1923,47 +1826,31 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def SCALAR_FMLA_LANEH  : IInst<"vfma_lane", "111.I", "Sh", 
                                 [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
   def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh", 
-                                [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
-    let isLaneQ = 1;
-  }
+                                [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
   // FMS lane
   def VFMS_LANEH   : IOpInst<"vfms_lane", "...qI", "hQh", OP_FMS_LN>;
-  def VFMS_LANEQH  : IOpInst<"vfms_laneq", "...QI", "hQh", OP_FMS_LNQ> {
-    let isLaneQ = 1;
-  }
+  def VFMS_LANEQH  : IOpInst<"vfms_laneq", "...QI", "hQh", OP_FMS_LNQ>;
   // FMS lane with scalar argument
   def FMLS_NH      : SOpInst<"vfms_n", "...1", "hQh", OP_FMLS_N>;
   // Scalar floating foint fused multiply-subtract (scalar, by element)
   def SCALAR_FMLS_LANEH  : IOpInst<"vfms_lane", "111.I", "Sh", OP_FMS_LN>;
-  def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "111QI", "Sh", OP_FMS_LNQ> {
-    let isLaneQ = 1;
-  }
-
+  def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "111QI", "Sh", OP_FMS_LNQ>;
   // Mul lane
-  def VMUL_LANEQH   : IOpInst<"vmul_laneq", "..QI", "hQh", OP_MUL_LN> {
-    let isLaneQ = 1;
-  }
+  def VMUL_LANEQH   : IOpInst<"vmul_laneq", "..QI", "hQh", OP_MUL_LN>;
   // Scalar floating point  multiply (scalar, by element)
   def SCALAR_FMUL_LANEH  : IOpInst<"vmul_lane", "11.I", "Sh", OP_SCALAR_MUL_LN>;
-  def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "11QI", "Sh", OP_SCALAR_MUL_LN> {
-    let isLaneQ = 1;
-  }
+  def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "11QI", "Sh", OP_SCALAR_MUL_LN>;
 
   // Mulx lane
   def VMULX_LANEH   : IOpInst<"vmulx_lane", "..qI", "hQh", OP_MULX_LN>;
-  def VMULX_LANEQH  : IOpInst<"vmulx_laneq", "..QI", "hQh", OP_MULX_LN> {
-    let isLaneQ = 1;
-  }
+  def VMULX_LANEQH  : IOpInst<"vmulx_laneq", "..QI", "hQh", OP_MULX_LN>;
   def VMULX_NH      : IOpInst<"vmulx_n", "..1", "hQh", OP_MULX_N>;
   // Scalar floating point  mulx (scalar, by element)
   def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh", 
                                 [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
   def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh", 
-                                [ImmCheck<2, ImmCheckLaneIndex, 1>]> {
-    let isLaneQ = 1;
-  }
-
+                                [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
   // ARMv8.2-A FP16 reduction vector intrinsics.
   def VMAXVH   : SInst<"vmaxv", "1.", "hQh">;
   def VMINVH   : SInst<"vminv", "1.", "hQh">;
@@ -1983,9 +1870,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
   def SCALAR_VDUP_LANEH  : IInst<"vdup_lane", "1.I", "Sh", 
                                 [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
   def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh", 
-                                [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
-    let isLaneQ = 1;
-  }
+                                [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 }
 
 // v8.2-A dot product instructions.
@@ -1995,9 +1880,7 @@ let TargetGuard = "dotprod,neon" in {
 }
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "dotprod,neon" in {
   // Variants indexing into a 128-bit vector are A64 only.
-  def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(<<Q)I", "iUiQiQUi", OP_DOT_LNQ> {
-    let isLaneQ = 1;
-  }
+  def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(<<Q)I", "iUiQiQUi", OP_DOT_LNQ>;
 }
 
 // v8.2-A FP16 fused multiply-add long instructions.
@@ -2012,18 +1895,10 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def VFMLAL_LANE_HIGH : SOpInst<"vfmlal_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLAL_LN_Hi>;
   def VFMLSL_LANE_HIGH : SOpInst<"vfmlsl_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLSL_LN_Hi>;
 
-  def VFMLAL_LANEQ_LOW  : SOpInst<"vfmlal_laneq_low",  "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN> {
-    let isLaneQ = 1;
-  }
-  def VFMLSL_LANEQ_LOW  : SOpInst<"vfmlsl_laneq_low",  "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN> {
-    let isLaneQ = 1;
-  }
-  def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN_Hi> {
-    let isLaneQ = 1;
-  }
-  def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi> {
-    let isLaneQ = 1;
-  }
+  def VFMLAL_LANEQ_LOW  : SOpInst<"vfmlal_laneq_low",  "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN>;
+  def VFMLSL_LANEQ_LOW  : SOpInst<"vfmlsl_laneq_low",  "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN>;
+  def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN_Hi>;
+  def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi>;
 }
 
 let TargetGuard = "i8mm,neon" in {
@@ -2036,19 +1911,15 @@ let TargetGuard = "i8mm,neon" in {
   def VSUDOT_LANE  : SOpInst<"vsudot_lane", "..(<<)(<<qU)I", "iQi", OP_SUDOT_LN>;
 
   let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
-    let isLaneQ = 1 in {
-      def VUSDOT_LANEQ  : SOpInst<"vusdot_laneq", "..(<<U)(<<Q)I", "iQi", OP_USDOT_LNQ>;
-      def VSUDOT_LANEQ  : SOpInst<"vsudot_laneq", "..(<<)(<<QU)I", "iQi", OP_SUDOT_LNQ>;
-    }
+    def VUSDOT_LANEQ  : SOpInst<"vusdot_laneq", "..(<<U)(<<Q)I", "iQi", OP_USDOT_LNQ>;
+    def VSUDOT_LANEQ  : SOpInst<"vsudot_laneq", "..(<<)(<<QU)I", "iQi", OP_SUDOT_LNQ>;
   }
 }
 
 let TargetGuard = "bf16,neon" in {
   def VDOT_BF : SInst<"vbfdot", "..BB", "fQf">;
   def VDOT_LANE_BF : SOpInst<"vbfdot_lane", "..B(Bq)I", "fQf", OP_BFDOT_LN>;
-  def VDOT_LANEQ_BF : SOpInst<"vbfdot_laneq", "..B(BQ)I", "fQf", OP_BFDOT_LNQ> {
-    let isLaneQ = 1;
-  }
+  def VDOT_LANEQ_BF : SOpInst<"vbfdot_laneq", "..B(BQ)I", "fQf", OP_BFDOT_LNQ>;
 
   def VFMMLA_BF : SInst<"vbfmmla", "..BB", "Qf">;
 
@@ -2074,17 +1945,13 @@ multiclass VCMLA_ROTS<string type, string lanety, string laneqty> {
     def : SOpInst<"vcmla" # ROT # "_lane", "...qI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
            (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast lanety, $p2), $p3))))>>;
 
-    let isLaneQ = 1 in  {
-      // ACLE specifies that the f16 variant of vcmla_#ROT_laneq has an immediate range 0<=lane<=1,
-      // whereas the f16 variant of vcmlaq_#ROT_laneq has an immediate range 0<=lane<=3.
-      // f16 is the only type for which these two differ.
-      defvar getlanety = !if(!eq(type, "h"), lanety, laneqty);
-      def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type,  Op<(call "vcmla" # ROT, $p0, $p1,
-                (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast getlanety, $p2), $p3))))>>;
-      // vcmlaq{ROT}_laneq
-      def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
-             (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
-    }
+    // vcmla_{ROT}_laneq
+    def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type,  Op<(call "vcmla" # ROT, $p0, $p1,
+            (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
+    // vcmlaq{ROT}_laneq
+    def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
+            (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
+
   }
 }
 
@@ -2121,9 +1988,7 @@ let TargetGuard = "bf16,neon" in {
   def VDUP_N_BF    : WOpInst<"vdup_n", ".1", "bQb", OP_DUP>;
 
   def VDUP_LANE_BF : WOpInst<"vdup_lane", ".qI", "bQb", OP_DUP_LN>;
-  def VDUP_LANEQ_BF: WOpInst<"vdup_laneq", ".QI", "bQb", OP_DUP_LN> {
-    let isLaneQ = 1;
-  }
+  def VDUP_LANEQ_BF: WOpInst<"vdup_laneq", ".QI", "bQb", OP_DUP_LN>;
 
   def VCOMBINE_BF  : NoTestOpInst<"vcombine", "Q..", "b", OP_CONC>;
 
@@ -2137,9 +2002,7 @@ let TargetGuard = "bf16,neon" in {
   def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb", 
                           [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
   def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb", 
-                          [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
-    let isLaneQ = 1;
-  }
+                          [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
   def VLD1_BF : WInst<"vld1", ".(c*!)", "bQb">;
   def VLD2_BF : WInst<"vld2", "2(c*!)", "bQb">;
diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td
index 2b5acd41e7bbd4..b088e0794cdea3 100644
--- a/clang/include/clang/Basic/arm_neon_incl.td
+++ b/clang/include/clang/Basic/arm_neon_incl.td
@@ -274,12 +274,6 @@ class Inst <string n, string p, string t, Operation o, list<ImmCheck> ch = []>{
   bit isShift = 0;
   bit isScalarShift = 0;
   bit isScalarNarrowShift = 0;
-  bit isVCVT_N = 0;
-  bit isVXAR = 0;
-  // For immediate checks: the immediate will be assumed to specify the lane of
-  // a Q register. Only used for intrinsics which end up calling polymorphic
-  // builtins.
-  bit isLaneQ = 0;
   list<ImmCheck> ImmChecks = ch;
 
   // Certain intrinsics have different names than their representative
diff --git a/clang/test/CodeGen/aarch64-neon-vcmla.c b/clang/test/CodeGen/aarch64-neon-vcmla.c
index d82d74d019c012..02171527cc6a32 100644
--- a/clang/test/CodeGen/aarch64-neon-vcmla.c
+++ b/clang/test/CodeGen/aarch64-neon-vcmla.c
@@ -1,596 +1,444 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon  \
-// RUN:            -target-feature +v8.3a -target-feature +fullfp16 \
-// RUN:            -disable-O0-optnone -emit-llvm -o - %s | opt -S -O1 | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-apple-ios -target-feature +neon \
+// RUN:        -target-feature +v8.3a \
+// RUN:        -target-feature +fullfp16 \
+// RUN:        -disable-O0-optnone -emit-llvm -o - %s | opt -S -O1 | FileCheck %s
 
 // REQUIRES: aarch64-registered-target
 
 #include <arm_neon.h>
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_f16(
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_f16(
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_f32(
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_f64(
-// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
-// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_F643_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_f64(
+// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
+// CHECK: ret <2 x double> [[RES]]
 float64x2_t test_vcmlaq_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT90_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT90_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot90_f16(
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot90_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot90_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT90_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT90_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot90_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot90_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot90_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT90_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot90_f16(
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot90_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot90_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT90_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot90_f32(
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot90_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot90_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot90_f64(
-// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT90_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
-// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_ROT90_F643_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot90_f64(
+// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
+// CHECK: ret <2 x double> [[RES]]
 float64x2_t test_vcmlaq_rot90_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_rot90_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT180_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT180_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot180_f16(
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot180_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot180_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT180_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT180_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot180_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot180_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot180_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT180_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot180_f16(
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot180_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot180_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT180_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot180_f32(
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot180_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot180_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot180_f64(
-// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT180_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
-// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_ROT180_F643_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot180_f64(
+// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
+// CHECK: ret <2 x double> [[RES]]
 float64x2_t test_vcmlaq_rot180_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_rot180_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT270_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT270_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot270_f16(
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot270_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot270_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT270_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT270_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot270_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot270_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot270_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT270_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot270_f16(
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot270_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot270_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT270_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot270_f32(
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot270_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot270_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot270_f64(
-// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT270_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
-// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_ROT270_F643_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot270_f64(
+// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
+// CHECK: ret <2 x double> [[RES]]
 float64x2_t test_vcmlaq_rot270_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_rot270_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_lane_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLA_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_laneq_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
-// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLA_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_F163_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_laneq_f16(acc, lhs, rhs, 1);
+  return vcmla_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_lane_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_laneq_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
-// CHECK-NEXT:    [[VCMLAQ_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_lane_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_lane_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_laneq_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
-// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
-// CHECK-NEXT:    [[VCMLA_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_F323_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_laneq_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
+// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_lane_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
-// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK-NEXT:    [[VCMLAQ_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_lane_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
+// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
+// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_laneq_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_laneq_f32(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_lane_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLA_ROT90_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT90_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot90_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot90_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot90_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_laneq_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
-// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK-NEXT:    [[VCMLA_ROT90_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT90_F163_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_rot90_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot90_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_rot90_laneq_f16(acc, lhs, rhs, 0);
+  return vcmla_rot90_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_lane_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_ROT90_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot90_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot90_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_rot90_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_laneq_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
-// CHECK-NEXT:    [[VCMLAQ_ROT90_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot90_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot90_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot90_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_lane_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT90_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT90_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot90_lane_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot90_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot90_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_laneq_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
-// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
-// CHECK-NEXT:    [[VCMLA_ROT90_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT90_F323_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_rot90_laneq_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
+// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot90_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_rot90_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_lane_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
-// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK-NEXT:    [[VCMLAQ_ROT90_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot90_lane_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
+// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
+// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot90_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_rot90_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_laneq_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_ROT90_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot90_laneq_f32(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot90_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot90_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_lane_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLA_ROT180_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT180_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot180_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot180_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot180_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_laneq_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
-// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLA_ROT180_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT180_F163_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_rot180_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot180_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_rot180_laneq_f16(acc, lhs, rhs, 1);
+  return vcmla_rot180_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_lane_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_ROT180_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot180_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot180_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_rot180_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_laneq_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
-// CHECK-NEXT:    [[VCMLAQ_ROT180_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot180_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot180_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot180_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_lane_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT180_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT180_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot180_lane_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot180_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot180_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_laneq_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
-// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
-// CHECK-NEXT:    [[VCMLA_ROT180_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT180_F323_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_rot180_laneq_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
+// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot180_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_rot180_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_lane_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
-// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK-NEXT:    [[VCMLAQ_ROT180_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot180_lane_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
+// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
+// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot180_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_rot180_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_laneq_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_ROT180_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot180_laneq_f32(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot180_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot180_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_lane_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLA_ROT270_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT270_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot270_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot270_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot270_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_laneq_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
-// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK-NEXT:    [[VCMLA_ROT270_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT270_F163_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_rot270_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot270_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_rot270_laneq_f16(acc, lhs, rhs, 0);
+  return vcmla_rot270_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_lane_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_ROT270_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot270_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot270_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_rot270_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_laneq_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
-// CHECK-NEXT:    [[VCMLAQ_ROT270_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot270_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot270_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot270_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_lane_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT270_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT270_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot270_lane_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot270_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot270_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_laneq_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
-// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
-// CHECK-NEXT:    [[VCMLA_ROT270_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT270_F323_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_rot270_laneq_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
+// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot270_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_rot270_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_lane_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
-// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK-NEXT:    [[VCMLAQ_ROT270_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot270_lane_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
+// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
+// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[DUP]] to <4 x float>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot270_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_rot270_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_laneq_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_ROT270_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot270_laneq_f32(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot270_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot270_laneq_f32(acc, lhs, rhs, 1);
 }
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index c790994665accd..eee5b95a601ab0 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -413,11 +413,11 @@ class Intrinsic {
     for (const auto *I : ImmCheckList) {
       unsigned EltSizeInBits = 0, VecSizeInBits = 0;
 
-      ArgIdx = I->getValueAsInt("Arg");
-      TypeArgIdx = I->getValueAsInt("TypeContextArg");
+      ArgIdx = I->getValueAsInt("ImmArgIdx");
+      TypeArgIdx = I->getValueAsInt("TypeContextArgIdx");
       Kind = I->getValueAsDef("Kind")->getValueAsInt("Value");
 
-      assert((ArgIdx >= 0 && Kind >= 0) && "Arg and Kind must be nonnegative");
+      assert((ArgIdx >= 0 && Kind >= 0) && "ImmArgIdx and Kind must be nonnegative");
 
       if (TypeArgIdx >= 0) {
         EltSizeInBits = getParamType(TypeArgIdx).getElementSizeInBits();
@@ -1492,7 +1492,7 @@ Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) {
     N = emitDagArg(DI->getArg(0), "").second;
   std::optional<std::string> MangledName;
   if (MatchMangledName) {
-    if (Intr.getRecord()->getValueAsBit("isLaneQ"))
+    if(Intr.getRecord()->getValueAsString("Name").ends_with("laneq"))
       N += "q";
     MangledName = Intr.mangleName(N, ClassS);
   }
@@ -1974,7 +1974,6 @@ void NeonEmitter::createIntrinsic(Record *R,
   bool BigEndianSafe  = R->getValueAsBit("BigEndianSafe");
   std::string ArchGuard = std::string(R->getValueAsString("ArchGuard"));
   std::string TargetGuard = std::string(R->getValueAsString("TargetGuard"));
-
   bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
   std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith"));
 
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 3a667e10bd8d72..2a34188a807eb0 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -372,7 +372,7 @@ class SVEEmitter {
   /// Emit all the range checks for the immediates.
   void createRangeChecks(raw_ostream &o);
 
-  // Emit all the ImmCheckTypes to arm_immcheck_types.h
+  // Emit all the ImmCheckTypes to arm_immcheck_types.inc
   void createImmCheckTypes(raw_ostream &OS);
 
   /// Create the SVETypeFlags used in CGBuiltins
@@ -1196,18 +1196,18 @@ void SVEEmitter::createIntrinsic(
     // Collate a list of range/option checks for the immediates.
     SmallVector<ImmCheck, 2> ImmChecks;
     for (auto *R : ImmCheckList) {
-      int64_t Arg = R->getValueAsInt("Arg");
-      int64_t EltSizeArg = R->getValueAsInt("TypeContextArg");
+      int64_t ArgIdx = R->getValueAsInt("ImmArgIdx");
+      int64_t EltSizeArgIdx = R->getValueAsInt("TypeContextArgIdx");
       int64_t Kind = R->getValueAsDef("Kind")->getValueAsInt("Value");
-      assert(Arg >= 0 && Kind >= 0 && "Arg and Kind must be nonnegative");
+      assert(ArgIdx >= 0 && Kind >= 0 && "ImmArgIdx and Kind must be nonnegative");
 
       unsigned ElementSizeInBits = 0;
       char Mod;
       unsigned NumVectors;
-      std::tie(Mod, NumVectors) = getProtoModifier(Proto, EltSizeArg + 1);
-      if (EltSizeArg >= 0)
+      std::tie(Mod, NumVectors) = getProtoModifier(Proto, EltSizeArgIdx + 1);
+      if (EltSizeArgIdx >= 0)
         ElementSizeInBits = SVEType(TS, Mod, NumVectors).getElementSizeInBits();
-      ImmChecks.push_back(ImmCheck(Arg, Kind, ElementSizeInBits));
+      ImmChecks.push_back(ImmCheck(ArgIdx, Kind, ElementSizeInBits));
     }
 
     Out.push_back(std::make_unique<Intrinsic>(

>From 07fb269c6373be7bb5d79311a71ca2a909e625e2 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 21 Aug 2024 10:49:41 +0000
Subject: [PATCH 08/13] Update and move vcmla immediate range tests

	- update tests for vcmla{_ROT}_laneq_f16 after immedaite range resolution
	- move test file to clang/test/Sema/aarc64-neon-immedaite-ranges
	- remove trailing whitespace from test lines
---
 .../vcmla.c}                                  | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)
 rename clang/test/Sema/{aarch64-neon-vcmla-ranges.c => aarch64-neon-immediate-ranges/vcmla.c} (93%)

diff --git a/clang/test/Sema/aarch64-neon-vcmla-ranges.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vcmla.c
similarity index 93%
rename from clang/test/Sema/aarch64-neon-vcmla-ranges.c
rename to clang/test/Sema/aarch64-neon-immediate-ranges/vcmla.c
index 9b42e68670da08..21c24975b38b37 100644
--- a/clang/test/Sema/aarch64-neon-vcmla-ranges.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vcmla.c
@@ -15,17 +15,18 @@ void test_vcmla_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
 void test_vcmla_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
   vcmla_laneq_f16(a, b, c, 0);
   vcmla_laneq_f16(a, b, c, 1);
+  vcmla_laneq_f16(a, b, c, 3);
 
-  vcmla_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
-  vcmla_laneq_f16(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+  vcmla_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_laneq_f16(a, b, c, 4);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
 }
 
 void test_vcmlaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c){
   vcmlaq_lane_f16(a, b, c, 0);
   vcmlaq_lane_f16(a, b, c, 1);
 
-  vcmlaq_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
-  vcmlaq_lane_f16(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+  vcmlaq_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_lane_f16(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
 }
 
 void test_vcmlaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
@@ -33,8 +34,8 @@ void test_vcmlaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
   vcmlaq_laneq_f16(a, b, c, 1);
   vcmlaq_laneq_f16(a, b, c, 3);
 
-  vcmlaq_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
-  vcmlaq_laneq_f16(a, b, c, 4);  // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+  vcmlaq_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_laneq_f16(a, b, c, 4);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
 }
 
 void test_vcmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
@@ -70,10 +71,10 @@ void test_vcmla_rot90_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
 
 void test_vcmla_rot90_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
   vcmla_rot90_laneq_f16(a, b, c, 0);
-  vcmla_rot90_laneq_f16(a, b, c, 1);
+  vcmla_rot90_laneq_f16(a, b, c, 3);
 
   vcmla_rot90_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
-  vcmla_rot90_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot90_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
 }
 
 void test_vcmlaq_rot90_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
@@ -94,10 +95,10 @@ void test_vcmla_rot180_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
 
 void test_vcmla_rot180_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
   vcmla_rot180_laneq_f16(a, b, c, 0);
-  vcmla_rot180_laneq_f16(a, b, c, 1);
+  vcmla_rot180_laneq_f16(a, b, c, 3);
 
   vcmla_rot180_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
-  vcmla_rot180_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot180_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
 }
 
 void test_vcmlaq_rot180_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
@@ -118,10 +119,10 @@ void test_vcmla_rot270_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
 
 void test_vcmla_rot270_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
   vcmla_rot270_laneq_f16(a, b, c, 0);
-  vcmla_rot270_laneq_f16(a, b, c, 1);
+  vcmla_rot270_laneq_f16(a, b, c, 3);
 
   vcmla_rot270_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
-  vcmla_rot270_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot270_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
 }
 
 void test_vcmlaq_rot270_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){

>From bfdc917f9d5c015ce991a96a631cf4b34668df3b Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 21 Aug 2024 11:31:17 +0000
Subject: [PATCH 09/13] [Fixup] tablegen command descr and SemaArm

---
 clang/include/clang/Sema/SemaARM.h | 1 -
 clang/lib/Sema/SemaARM.cpp         | 3 ++-
 clang/utils/TableGen/TableGen.cpp  | 7 ++++---
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index c7f4abb822e2f3..33a2fa1c91776e 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -15,7 +15,6 @@
 
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/Expr.h"
-#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Sema/SemaBase.h"
 #include "llvm/ADT/SmallVector.h"
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index f6e00f957a51f4..7e63833c6129c3 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -12,6 +12,7 @@
 
 #include "clang/Sema/SemaARM.h"
 #include "clang/Basic/DiagnosticSema.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/ParsedAttr.h"
 #include "clang/Sema/Sema.h"
@@ -446,7 +447,7 @@ bool SemaARM::CheckImmediateArg(CallExpr *TheCall, unsigned CheckTy,
     break;
   case ImmCheckType::ImmCheckLaneIndex:
     if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
-                                        (VecBitWidth / (1 * EltBitWidth)) - 1))
+                                        (VecBitWidth / EltBitWidth) - 1))
       return true;
     break;
   case ImmCheckType::ImmCheckLaneIndexCompRotate:
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index dab447ff7d944f..84afd4c0afb269 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -235,9 +235,10 @@ cl::opt<ActionType> Action(
                    "Generate ARM NEON sema support for clang"),
         clEnumValN(GenArmNeonTest, "gen-arm-neon-test",
                    "Generate ARM NEON tests for clang"),
-        clEnumValN(GenArmImmCheckTypes, "gen-arm-immcheck-types",
-                   "Generate arm_immchecktypes.h (immediate range check types)"
-                   " for clang"),
+        clEnumValN(
+            GenArmImmCheckTypes, "gen-arm-immcheck-types",
+            "Generate arm_immcheck_types.inc (immediate range check types)"
+            " for clang"),
         clEnumValN(GenArmSveHeader, "gen-arm-sve-header",
                    "Generate arm_sve.h for clang"),
         clEnumValN(GenArmSveBuiltins, "gen-arm-sve-builtins",

>From b9b3835be189a45634c87535c2bdeed240e55e9b Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 21 Aug 2024 18:20:42 +0000
Subject: [PATCH 10/13] Add check to ensure builtin range is not redefined

- Changes
	- llvm/inlcude/llvm/TableGen/AArch64ImmCheck.h
		- Add TypeArgIdx field for ImmCheck (third argument in tablegen)
		- introduce equality operator for ImmCheck
		- rename 'Arg' to 'ImmArgIdx' for consistency with the tablegen class.
	- clang/utils/TableGen/NeonEmitter.cpp
		- Add assertion to ensure that a neon builtin's range is not
		  redefined. This problem could arise when two intrinsics share the same builtin
		  but define different immediate checks in tablgen (range checking is done via
		  the builtin rather than the intrinsic).
	- clang/include/clang/Basic/arm_neon.td
		- ensure consistent TypeContextArg is used for vsri/vsli intrinsics.
---
 clang/include/clang/Basic/arm_neon.td        |  8 ++--
 clang/utils/TableGen/NeonEmitter.cpp         | 39 +++++++++++++-------
 clang/utils/TableGen/SveEmitter.cpp          | 10 +++--
 llvm/include/llvm/TableGen/AArch64ImmCheck.h | 18 ++++++---
 4 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 89964ad0f4bc39..1535b20347f286 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -449,10 +449,10 @@ def VSHLL_N    : SInst<"vshll_n", "(>Q).I", "csiUcUsUi",
 // E.3.13 Shifts with insert
 def VSRI_N : WInst<"vsri_n", "...I",
                    "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs", 
-                    [ImmCheck<2, ImmCheckShiftRight>]>;
+                    [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 def VSLI_N : WInst<"vsli_n", "...I",
                    "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs", 
-                   [ImmCheck<2, ImmCheckShiftLeft>]>;
+                   [ImmCheck<2, ImmCheckShiftLeft, 0>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -963,8 +963,8 @@ def SHLL_HIGH_N    : SOpInst<"vshll_high_n", ">.I", "HcHsHiHUcHUsHUi",
                              OP_LONG_HI>;
 
 ////////////////////////////////////////////////////////////////////////////////
-def SRI_N : WInst<"vsri_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftRight, 1>]>;
-def SLI_N : WInst<"vsli_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftLeft, 1>]>;
+def SRI_N : WInst<"vsri_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftRight, 0>]>;
+def SLI_N : WInst<"vsli_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftLeft, 0>]>;
 
 // Right shift narrow high
 def SHRN_HIGH_N    : IOpInst<"vshrn_high_n", "<(<q).I",
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index eee5b95a601ab0..df9875e8784d7e 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -425,8 +425,13 @@ class Intrinsic {
       }
 
       ImmChecks.emplace_back(
-          ImmCheck(ArgIdx, Kind, EltSizeInBits, VecSizeInBits));
+          ImmCheck(ArgIdx, Kind, EltSizeInBits, VecSizeInBits, TypeArgIdx));
     }
+    llvm::sort(ImmChecks.begin(), ImmChecks.end(),
+               [](const ImmCheck &a, const ImmCheck &b) {
+                 return a.getImmArgIdx() < b.getImmArgIdx();
+               }); // Sort for comparison with other intrinsics which map to the
+                   // same builtin
   }
 
   /// Get the Record that this intrinsic is based off.
@@ -2166,27 +2171,33 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
 
 void NeonEmitter::genIntrinsicRangeCheckCode(
     raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
-  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
-  // Ensure these are only emitted once.
-  std::set<std::string> Emitted;
+  std::map<std::string, ArrayRef<ImmCheck>> Emitted;
 
+  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
   for (auto &Def : Defs) {
-    if (Emitted.find(Def->getMangledName()) != Emitted.end() ||
-        !Def->hasImmediate())
-      continue;
-
     // If the Def has a body (operation DAGs), it is not a __builtin_neon_
-    if (Def->hasBody())
+    if (Def->hasBody() || !Def->hasImmediate())
       continue;
 
+    // Sorted by immediate argument index
+    ArrayRef<ImmCheck> Checks = Def->getImmChecks();
+
+    const auto it = Emitted.find(Def->getMangledName());
+    if (it != Emitted.end()) {
+      assert(it->second.equals(Checks) &&
+             "Neon builtin's immediate range checks cannot be redefined.");
+      continue; // Ensure this is emitted only once
+    }
+
+    // Emit builtin's range checks
     OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
-    for (const auto &Check : Def->getImmChecks()) {
-      OS << " ImmChecks.push_back(std::make_tuple(" << Check.getArg() << ", "
-         << Check.getKind() << ", " << Check.getElementSizeInBits() << ", "
-         << Check.getVecSizeInBits() << "));\n"
+    for (const auto &Check : Checks) {
+      OS << " ImmChecks.push_back(std::make_tuple(" << Check.getImmArgIdx()
+         << ", " << Check.getKind() << ", " << Check.getElementSizeInBits()
+         << ", " << Check.getVecSizeInBits() << "));\n"
          << " break;\n";
     }
-    Emitted.insert(Def->getMangledName());
+    Emitted[Def->getMangledName()] = Checks;
   }
 
   OS << "#endif\n\n";
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 2a34188a807eb0..370b04464f0d09 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1527,8 +1527,9 @@ void SVEEmitter::createRangeChecks(raw_ostream &OS) {
 
     OS << "case SVE::BI__builtin_sve_" << Def->getMangledName() << ":\n";
     for (auto &Check : Def->getImmChecks())
-      OS << "ImmChecks.push_back(std::make_tuple(" << Check.getArg() << ", "
-         << Check.getKind() << ", " << Check.getElementSizeInBits() << "));\n";
+      OS << "ImmChecks.push_back(std::make_tuple(" << Check.getImmArgIdx()
+         << ", " << Check.getKind() << ", " << Check.getElementSizeInBits()
+         << "));\n";
     OS << "  break;\n";
 
     Emitted.insert(Def->getMangledName());
@@ -1722,8 +1723,9 @@ void SVEEmitter::createSMERangeChecks(raw_ostream &OS) {
 
     OS << "case SME::BI__builtin_sme_" << Def->getMangledName() << ":\n";
     for (auto &Check : Def->getImmChecks())
-      OS << "ImmChecks.push_back(std::make_tuple(" << Check.getArg() << ", "
-         << Check.getKind() << ", " << Check.getElementSizeInBits() << "));\n";
+      OS << "ImmChecks.push_back(std::make_tuple(" << Check.getImmArgIdx()
+         << ", " << Check.getKind() << ", " << Check.getElementSizeInBits()
+         << "));\n";
     OS << "  break;\n";
 
     Emitted.insert(Def->getMangledName());
diff --git a/llvm/include/llvm/TableGen/AArch64ImmCheck.h b/llvm/include/llvm/TableGen/AArch64ImmCheck.h
index ff8c4a1865ea34..16d88142d038ad 100644
--- a/llvm/include/llvm/TableGen/AArch64ImmCheck.h
+++ b/llvm/include/llvm/TableGen/AArch64ImmCheck.h
@@ -15,20 +15,26 @@
 #define AARCH64_IMMCHECK_H
 
 class ImmCheck {
-  unsigned Arg;
+  int ImmArgIdx;
   unsigned Kind;
   unsigned ElementSizeInBits;
   unsigned VecSizeInBits;
+  int TypeArgIdx;
 
 public:
-  ImmCheck(unsigned Arg, unsigned Kind, unsigned ElementSizeInBits = 0,
-           unsigned VecSizeInBits = 128)
-      : Arg(Arg), Kind(Kind), ElementSizeInBits(ElementSizeInBits),
-        VecSizeInBits(VecSizeInBits) {}
+  ImmCheck(int ImmArgIdx, unsigned Kind, unsigned ElementSizeInBits = 0,
+           unsigned VecSizeInBits = 128, int TypeArgIdx = -1)
+      : ImmArgIdx(ImmArgIdx), Kind(Kind), ElementSizeInBits(ElementSizeInBits),
+        VecSizeInBits(VecSizeInBits), TypeArgIdx(TypeArgIdx) {}
   ImmCheck(const ImmCheck &Other) = default;
   ~ImmCheck() = default;
 
-  unsigned getArg() const { return Arg; }
+  bool operator==(const ImmCheck &other) const {
+    return other.getImmArgIdx() == ImmArgIdx && other.getKind() == Kind &&
+           other.getTypeArgIdx() == TypeArgIdx;
+  }
+  int getImmArgIdx() const { return ImmArgIdx; }
+  int getTypeArgIdx() const { return TypeArgIdx; }
   unsigned getKind() const { return Kind; }
   unsigned getElementSizeInBits() const { return ElementSizeInBits; }
   unsigned getVecSizeInBits() const { return VecSizeInBits; }

>From 330c1c671398adf20d0375d922732b17870f1709 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Thu, 22 Aug 2024 08:51:18 +0000
Subject: [PATCH 11/13] [NFC] fix format

---
 clang/utils/TableGen/NeonEmitter.cpp | 4 ++--
 clang/utils/TableGen/SveEmitter.cpp  | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index df9875e8784d7e..c66a6c2f4f1dd0 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -416,8 +416,8 @@ class Intrinsic {
       ArgIdx = I->getValueAsInt("ImmArgIdx");
       TypeArgIdx = I->getValueAsInt("TypeContextArgIdx");
       Kind = I->getValueAsDef("Kind")->getValueAsInt("Value");
-
-      assert((ArgIdx >= 0 && Kind >= 0) && "ImmArgIdx and Kind must be nonnegative");
+      assert((ArgIdx >= 0 && Kind >= 0) &&
+             "ImmArgIdx and Kind must be nonnegative");
 
       if (TypeArgIdx >= 0) {
         EltSizeInBits = getParamType(TypeArgIdx).getElementSizeInBits();
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 370b04464f0d09..e1ff6684c5b257 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1199,7 +1199,8 @@ void SVEEmitter::createIntrinsic(
       int64_t ArgIdx = R->getValueAsInt("ImmArgIdx");
       int64_t EltSizeArgIdx = R->getValueAsInt("TypeContextArgIdx");
       int64_t Kind = R->getValueAsDef("Kind")->getValueAsInt("Value");
-      assert(ArgIdx >= 0 && Kind >= 0 && "ImmArgIdx and Kind must be nonnegative");
+      assert(ArgIdx >= 0 && Kind >= 0 &&
+             "ImmArgIdx and Kind must be nonnegative");
 
       unsigned ElementSizeInBits = 0;
       char Mod;

>From 110c690802690707d567604cbe8acfd9d95d9605 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Thu, 22 Aug 2024 16:28:09 +0000
Subject: [PATCH 12/13] Fix builtin range-check compatabilty

---
 clang/utils/TableGen/NeonEmitter.cpp         | 46 +++++++++++++++++---
 llvm/include/llvm/TableGen/AArch64ImmCheck.h | 10 +----
 2 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index c66a6c2f4f1dd0..2c8fdda9b3dc1f 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -48,6 +48,7 @@
 #include <set>
 #include <sstream>
 #include <string>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -416,16 +417,23 @@ class Intrinsic {
       ArgIdx = I->getValueAsInt("ImmArgIdx");
       TypeArgIdx = I->getValueAsInt("TypeContextArgIdx");
       Kind = I->getValueAsDef("Kind")->getValueAsInt("Value");
+
       assert((ArgIdx >= 0 && Kind >= 0) &&
              "ImmArgIdx and Kind must be nonnegative");
 
       if (TypeArgIdx >= 0) {
-        EltSizeInBits = getParamType(TypeArgIdx).getElementSizeInBits();
-        VecSizeInBits = getParamType(TypeArgIdx).getSizeInBits();
+        Type ContextType = getParamType(TypeArgIdx);
+
+        // Element size cannot be set for intrinscs that map to polymorphic
+        // builtins.
+        if (CK != ClassB)
+          EltSizeInBits = ContextType.getElementSizeInBits();
+
+        VecSizeInBits = ContextType.getSizeInBits();
       }
 
       ImmChecks.emplace_back(
-          ImmCheck(ArgIdx, Kind, EltSizeInBits, VecSizeInBits, TypeArgIdx));
+          ImmCheck(ArgIdx, Kind, EltSizeInBits, VecSizeInBits));
     }
     llvm::sort(ImmChecks.begin(), ImmChecks.end(),
                [](const ImmCheck &a, const ImmCheck &b) {
@@ -581,6 +589,8 @@ class NeonEmitter {
                                      SmallVectorImpl<Intrinsic *> &Defs);
   void genOverloadTypeCheckCode(raw_ostream &OS,
                                 SmallVectorImpl<Intrinsic *> &Defs);
+  bool areCompatableRangeChecks(const ArrayRef<ImmCheck> ChecksA,
+                                const ArrayRef<ImmCheck> ChecksB);
   void genIntrinsicRangeCheckCode(raw_ostream &OS,
                                   SmallVectorImpl<Intrinsic *> &Defs);
 
@@ -1497,7 +1507,7 @@ Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) {
     N = emitDagArg(DI->getArg(0), "").second;
   std::optional<std::string> MangledName;
   if (MatchMangledName) {
-    if(Intr.getRecord()->getValueAsString("Name").ends_with("laneq"))
+    if (Intr.getRecord()->getValueAsString("Name").ends_with("laneq"))
       N += "q";
     MangledName = Intr.mangleName(N, ClassS);
   }
@@ -2169,9 +2179,30 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
   OS << "#endif\n\n";
 }
 
+bool NeonEmitter::areCompatableRangeChecks(const ArrayRef<ImmCheck> ChecksA,
+                                           const ArrayRef<ImmCheck> ChecksB) {
+  // If multiple intrinsics map to the same builtin, we must ensure that the
+  // intended range checks performed in SemaArm.cpp do not contradict eachother,
+  // as these are emitted once per-buitlin.
+  //
+  // The arguments to be checked and type of each check to be performed must be
+  // the same. The element types may differ as they will be resolved
+  // per-intrinsic as overloaded types by SemaArm.cpp, though the vector sizes
+  // are not and so must be the same.
+  bool compat =
+      std::equal(ChecksA.begin(), ChecksA.end(), ChecksB.begin(), ChecksB.end(),
+                 [](const auto A, const auto B) {
+                   return A.getImmArgIdx() == B.getImmArgIdx() &&
+                          A.getKind() == B.getKind() &&
+                          A.getVecSizeInBits() == B.getVecSizeInBits();
+                 });
+
+  return compat;
+}
+
 void NeonEmitter::genIntrinsicRangeCheckCode(
     raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
-  std::map<std::string, ArrayRef<ImmCheck>> Emitted;
+  std::unordered_map<std::string, ArrayRef<ImmCheck>> Emitted;
 
   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
   for (auto &Def : Defs) {
@@ -2184,8 +2215,9 @@ void NeonEmitter::genIntrinsicRangeCheckCode(
 
     const auto it = Emitted.find(Def->getMangledName());
     if (it != Emitted.end()) {
-      assert(it->second.equals(Checks) &&
-             "Neon builtin's immediate range checks cannot be redefined.");
+      assert(areCompatableRangeChecks(Checks, it->second) &&
+             "Neon intrinsics with incompatable immediate range checks cannot "
+             "share a builtin.");
       continue; // Ensure this is emitted only once
     }
 
diff --git a/llvm/include/llvm/TableGen/AArch64ImmCheck.h b/llvm/include/llvm/TableGen/AArch64ImmCheck.h
index 16d88142d038ad..0242620c9c60ea 100644
--- a/llvm/include/llvm/TableGen/AArch64ImmCheck.h
+++ b/llvm/include/llvm/TableGen/AArch64ImmCheck.h
@@ -19,22 +19,16 @@ class ImmCheck {
   unsigned Kind;
   unsigned ElementSizeInBits;
   unsigned VecSizeInBits;
-  int TypeArgIdx;
 
 public:
   ImmCheck(int ImmArgIdx, unsigned Kind, unsigned ElementSizeInBits = 0,
-           unsigned VecSizeInBits = 128, int TypeArgIdx = -1)
+           unsigned VecSizeInBits = 128)
       : ImmArgIdx(ImmArgIdx), Kind(Kind), ElementSizeInBits(ElementSizeInBits),
-        VecSizeInBits(VecSizeInBits), TypeArgIdx(TypeArgIdx) {}
+        VecSizeInBits(VecSizeInBits) {}
   ImmCheck(const ImmCheck &Other) = default;
   ~ImmCheck() = default;
 
-  bool operator==(const ImmCheck &other) const {
-    return other.getImmArgIdx() == ImmArgIdx && other.getKind() == Kind &&
-           other.getTypeArgIdx() == TypeArgIdx;
-  }
   int getImmArgIdx() const { return ImmArgIdx; }
-  int getTypeArgIdx() const { return TypeArgIdx; }
   unsigned getKind() const { return Kind; }
   unsigned getElementSizeInBits() const { return ElementSizeInBits; }
   unsigned getVecSizeInBits() const { return VecSizeInBits; }

>From ba7554d8ab24ebebfd940b3d2f8cea8369fc2d61 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 28 Aug 2024 11:14:40 +0000
Subject: [PATCH 13/13] [Fixup] Improve comments and remove trailing whitespace

---
 clang/include/clang/Basic/TargetBuiltins.h    |   2 +-
 .../include/clang/Basic/arm_immcheck_incl.td  |   6 +-
 clang/include/clang/Basic/arm_neon.td         | 160 +++++++++---------
 clang/utils/TableGen/NeonEmitter.cpp          |   6 +-
 4 files changed, 88 insertions(+), 86 deletions(-)

diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index a85e070cad0383..02b4a4b39bbf43 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -215,7 +215,7 @@ namespace clang {
       return ET == Poly8 || ET == Poly16 || ET == Poly64;
     }
     bool isUnsigned() const { return (Flags & UnsignedFlag) != 0; }
-    bool isQuad() const { return (Flags & QuadFlag) != 0; };
+    bool isQuad() const { return (Flags & QuadFlag) != 0; }
     unsigned getEltSizeInBits() const {
       switch (getEltType()) {
       case Int8:
diff --git a/clang/include/clang/Basic/arm_immcheck_incl.td b/clang/include/clang/Basic/arm_immcheck_incl.td
index c6a7ca26855496..9d7f74a35aaa87 100644
--- a/clang/include/clang/Basic/arm_immcheck_incl.td
+++ b/clang/include/clang/Basic/arm_immcheck_incl.td
@@ -32,8 +32,12 @@ def ImmCheck1_64                : ImmCheckType<25>; // 1..64
 def ImmCheck0_63                : ImmCheckType<26>; // 0..63
 
 class ImmCheck<int immArgIdx, ImmCheckType kind, int typeArgIdx = -1> {
+  // Parameter index of immediate argument to be verified
   int ImmArgIdx = immArgIdx;
-  // The index of the argument whose type should be referred to when validating this immedaite.
+
+  // Parameter index of argument whose type determines the context of this immediate check -
+  // element type for SVE/SME, element type and vector size for NEON (ignoring element type for
+  // ClassB NEON intrinsics).
   int TypeContextArgIdx = typeArgIdx;
   ImmCheckType Kind = kind;
 }
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 1535b20347f286..1349dbc4a7557b 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -284,10 +284,10 @@ def OP_CVT_F32_BF16
 
 // Splat operation - performs a range-checked splat over a vector
 def SPLAT  : WInst<"splat_lane", ".(!q)I",
-                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl", 
+                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl",
                     [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
-                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl", 
+                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl",
                    [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
 let TargetGuard = "bf16,neon" in {
@@ -404,19 +404,19 @@ def VQRSHL : SInst<"vqrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
 let isShift = 1 in {
 
 
-def VSHR_N     : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+def VSHR_N     : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
                       [ImmCheck<1, ImmCheckShiftRight>]>;
-def VSHL_N     : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+def VSHL_N     : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
                       [ImmCheck<1, ImmCheckShiftLeft>]>;
-def VRSHR_N    : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+def VRSHR_N    : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
                       [ImmCheck<1, ImmCheckShiftRight>]>;
-def VSRA_N     : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+def VSRA_N     : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
                       [ImmCheck<2, ImmCheckShiftRight>]>;
-def VRSRA_N    : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+def VRSRA_N    : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
                       [ImmCheck<2, ImmCheckShiftRight>]>;
-def VQSHL_N    : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+def VQSHL_N    : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
                       [ImmCheck<1, ImmCheckShiftLeft>]>;
-def VQSHLU_N   : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl", 
+def VQSHLU_N   : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl",
                       [ImmCheck<1, ImmCheckShiftLeft>]>;
 
 // Narrowing right shifts should have an immediate range of 1..(sizeinbits(arg)/2).
@@ -442,16 +442,16 @@ def VQRSHRN_N  : SInst<"vqrshrn_n", "<QI", "silUsUiUl",
 // base type, so the range here is actually 0..(sizeinbits(arg)*2).
 // This cannot be rectified currently due to a use of vshll_n_s16 with an
 // out-of-bounds immediate in the defintiion of vcvt_f32_bf16.
-def VSHLL_N    : SInst<"vshll_n", "(>Q).I", "csiUcUsUi", 
+def VSHLL_N    : SInst<"vshll_n", "(>Q).I", "csiUcUsUi",
                       [ImmCheck<1, ImmCheckShiftLeft>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.13 Shifts with insert
 def VSRI_N : WInst<"vsri_n", "...I",
-                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs", 
+                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs",
                     [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 def VSLI_N : WInst<"vsli_n", "...I",
-                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs", 
+                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs",
                    [ImmCheck<2, ImmCheckShiftLeft, 0>]>;
 }
 
@@ -552,14 +552,14 @@ def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh",
 // E.3.16 Extract lanes from a vector
 let InstName = "vmov" in
 def VGET_LANE : IInst<"vget_lane", "1.I",
-                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", 
+                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
                       [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.17 Set lanes within a vector
 let InstName = "vmov" in
 def VSET_LANE : IInst<"vset_lane", ".1.I",
-                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", 
+                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
                       [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -609,9 +609,9 @@ let ArchGuard = "(__ARM_FP & 2)" in {
 def VCVT_S32     : SInst<"vcvt_s32", "S.",  "fQf">;
 def VCVT_U32     : SInst<"vcvt_u32", "U.",  "fQf">;
 def VCVT_F32     : SInst<"vcvt_f32", "F(.!)",  "iUiQiQUi">;
-def VCVT_N_S32   : SInst<"vcvt_n_s32", "S.I", "fQf", 
+def VCVT_N_S32   : SInst<"vcvt_n_s32", "S.I", "fQf",
                         [ImmCheck<1, ImmCheck1_32>]>;
-def VCVT_N_U32   : SInst<"vcvt_n_u32", "U.I", "fQf", 
+def VCVT_N_U32   : SInst<"vcvt_n_u32", "U.I", "fQf",
                         [ImmCheck<1, ImmCheck1_32>]>;
 def VCVT_N_F32   : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi", 
                         [ImmCheck<1, ImmCheck1_32>]>;
@@ -682,7 +682,7 @@ def VQDMLSL_N     : SOpInst<"vqdmlsl_n", "(>Q)(>Q).1", "si", OP_QDMLSL_N>;
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.26 Vector Extract
 def VEXT : WInst<"vext", "...I",
-                 "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf", 
+                 "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf",
                  [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -792,21 +792,21 @@ def ST1_X2 : WInst<"vst1_x2", "v*(2!)", "dQdPlQPl">;
 def ST1_X3 : WInst<"vst1_x3", "v*(3!)", "dQdPlQPl">;
 def ST1_X4 : WInst<"vst1_x4", "v*(4!)", "dQdPlQPl">;
 
-def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl", 
+def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl",
                     [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl",
                     [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
-def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl",
                     [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
-def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl",
                     [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
-def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl", 
+def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl",
                     [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl",
                     [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
-def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl",
                     [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
-def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl",
                     [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 
 def LD1_DUP  : WInst<"vld1_dup", ".(c*!)", "dQdPlQPl">;
@@ -987,9 +987,9 @@ def VMOVL_HIGH   : SOpInst<"vmovl_high", ">.", "HcHsHiHUcHUsHUi", OP_MOVL_HI>;
 
 def CVTF_N_F64   : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl", 
                         [ImmCheck<1, ImmCheck1_64>]>;
-def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd", 
+def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd",
                         [ImmCheck<1, ImmCheck1_64>]>;
-def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd", 
+def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd",
                         [ImmCheck<1, ImmCheck1_64>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1028,9 +1028,9 @@ let TargetGuard = "aes,neon" in {
 
 ////////////////////////////////////////////////////////////////////////////////
 // Extract or insert element from vector
-def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl", 
+def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl",
                       [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
-def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl", 
+def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl",
                       [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def COPY_LANE : IOpInst<"vcopy_lane", "..I.I",
                         "csilUcUsUiUlPcPsPlfd", OP_COPY_LN>;
@@ -1110,9 +1110,9 @@ def VQDMULL_HIGH_LANE   : SOpInst<"vqdmull_high_lane", "(>Q)Q.I", "si",
                                   OP_QDMULLHi_LN>;
 def VQDMULL_HIGH_LANEQ  : SOpInst<"vqdmull_high_laneq", "(>Q)QQI", "si",
                                   OP_QDMULLHi_LN>;
-def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi", 
+def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi",
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi", 
+def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi",
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a,neon" in {
@@ -1343,67 +1343,67 @@ def SCALAR_RSHL: SInst<"vrshl", "11(S1)", "SlSUl">;
 // Scalar Shift (Immediate)
 let isScalarShift = 1 in {
 // Signed/Unsigned Shift Right (Immediate)
-def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl", 
+def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl",
                         [ImmCheck<1, ImmCheckShiftRight, 0>]>;
 // Signed/Unsigned Rounding Shift Right (Immediate)
-def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl", 
+def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl",
                           [ImmCheck<1, ImmCheckShiftRight, 0>]>;
 
 // Signed/Unsigned Shift Right and Accumulate (Immediate)
-def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl", 
+def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl",
                         [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 // Signed/Unsigned Rounding Shift Right and Accumulate (Immediate)
-def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl", 
+def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl",
                         [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 
 // Shift Left (Immediate)
-def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl", 
+def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl",
                       [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
 // Signed/Unsigned Saturating Shift Left (Immediate)
-def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl", 
+def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl",
                       [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
 // Signed Saturating Shift Left Unsigned (Immediate)
-def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl", 
+def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl",
                       [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
 
 // Shift Right And Insert (Immediate)
-def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl", 
+def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl",
                         [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 // Shift Left And Insert (Immediate)
-def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl", 
+def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl",
                         [ImmCheck<2, ImmCheckShiftLeft, 0>]>;
 
 let isScalarNarrowShift = 1 in {
   // Signed/Unsigned Saturating Shift Right Narrow (Immediate)
-  def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl", 
+  def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl",
                             [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
   // Signed/Unsigned Saturating Rounded Shift Right Narrow (Immediate)
-  def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl", 
+  def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl",
                             [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
   // Signed Saturating Shift Right Unsigned Narrow (Immediate)
-  def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<U)1I", "SsSiSl", 
+  def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<U)1I", "SsSiSl",
                             [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
   // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
-  def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<U)1I", "SsSiSl", 
+  def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<U)1I", "SsSiSl",
                             [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 // Scalar Signed/Unsigned Fixed-point Convert To Floating-Point (Immediate)
-def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi", 
+def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi",
                               [ImmCheck<1, ImmCheck1_32>]>;
-def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl", 
+def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl",
                               [ImmCheck<1, ImmCheck1_64>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Scalar Floating-point Convert To Signed/Unsigned Fixed-point (Immediate)
-def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf", 
+def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf",
                                 [ImmCheck<1, ImmCheck1_32>]>;
-def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf", 
+def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf",
                                 [ImmCheck<1, ImmCheck1_32>]>;
-def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd", 
+def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd",
                                 [ImmCheck<1, ImmCheck1_64>]>;
-def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd", 
+def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd",
                                 [ImmCheck<1, ImmCheck1_64>]>;
 }
 
@@ -1606,11 +1606,11 @@ def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_L
 def SCALAR_VMUL_N : IInst<"vmul_n", "..1", "d">;
 
 // VMUL_LANE_A64 d type implemented using scalar mul lane
-def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d", 
+def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d",
                             [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 
 // VMUL_LANEQ d type implemented using scalar mul lane
-def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d", 
+def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d",
                               [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 // VMULX_LANE d type implemented using scalar vmulx_lane
 def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "d", OP_SCALAR_VMULX_LN>;
@@ -1618,9 +1618,9 @@ def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "d", OP_SCALAR_VMULX_LN>;
 // VMULX_LANEQ d type implemented using scalar vmulx_laneq
 def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ>;
 // Scalar Floating Point fused multiply-add (scalar, by element)
-def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd", 
+def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd",
                             [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
-def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd", 
+def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd",
                             [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
 // Scalar Floating Point fused multiply-subtract (scalar, by element)
@@ -1632,15 +1632,15 @@ def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "(1>)1.I", "SsSi", OP_SCALAR_Q
 def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR_QDMULL_LN>;
 
 // Signed Saturating Doubling Multiply-Add Long (scalar by element)
-def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi", 
+def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi",
                                 [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
-def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi", 
+def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi",
                                 [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
 // Signed Saturating Doubling Multiply-Subtract Long (scalar by element)
-def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi", 
+def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi",
                               [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
-def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi", 
+def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi",
                               [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 // Scalar Integer Saturating Doubling Multiply Half High (scalar by element)
 def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "11.I", "SsSi", OP_SCALAR_QDMULH_LN>;
@@ -1659,9 +1659,9 @@ def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "111.I", "SsSi", OP_SCALAR_Q
 def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN>;
 } // TargetGuard = "v8.1a"
 
-def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
+def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs",
                             [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
-def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
+def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs",
                             [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
 } // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)"
@@ -1815,17 +1815,17 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   // ARMv8.2-A FP16 lane vector intrinsics.
 
   // FMA lane
-  def VFMA_LANEH   : IInst<"vfma_lane", "...qI", "hQh", 
+  def VFMA_LANEH   : IInst<"vfma_lane", "...qI", "hQh",
                           [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
-  def VFMA_LANEQH  : IInst<"vfma_laneq", "...QI", "hQh", 
+  def VFMA_LANEQH  : IInst<"vfma_laneq", "...QI", "hQh",
                           [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
   // FMA lane with scalar argument
   def FMLA_NH      : SOpInst<"vfma_n", "...1", "hQh", OP_FMLA_N>;
   // Scalar floating point fused multiply-add (scalar, by element)
-  def SCALAR_FMLA_LANEH  : IInst<"vfma_lane", "111.I", "Sh", 
+  def SCALAR_FMLA_LANEH  : IInst<"vfma_lane", "111.I", "Sh",
                                 [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
-  def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh", 
+  def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh",
                                 [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
   // FMS lane
@@ -1847,9 +1847,9 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def VMULX_LANEQH  : IOpInst<"vmulx_laneq", "..QI", "hQh", OP_MULX_LN>;
   def VMULX_NH      : IOpInst<"vmulx_n", "..1", "hQh", OP_MULX_N>;
   // Scalar floating point  mulx (scalar, by element)
-  def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh", 
+  def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh",
                                 [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-  def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh", 
+  def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh",
                                 [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
   // ARMv8.2-A FP16 reduction vector intrinsics.
   def VMAXVH   : SInst<"vmaxv", "1.", "hQh">;
@@ -1940,18 +1940,16 @@ multiclass VCMLA_ROTS<string type, string lanety, string laneqty> {
     // vcmla{ROT}_lane
     def : SOpInst<"vcmla" # ROT # "_lane", "...qI", type, Op<(call "vcmla" # ROT, $p0, $p1,
            (bitcast $p0, (dup_typed lanety , (call "vget_lane", (bitcast lanety, $p2), $p3))))>>;
-
     // vcmlaq{ROT}_lane
     def : SOpInst<"vcmla" # ROT # "_lane", "...qI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
            (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast lanety, $p2), $p3))))>>;
 
-    // vcmla_{ROT}_laneq
+    // vcmla{ROT}_laneq
     def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type,  Op<(call "vcmla" # ROT, $p0, $p1,
             (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
     // vcmlaq{ROT}_laneq
     def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
             (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
-
   }
 }
 
@@ -1995,13 +1993,13 @@ let TargetGuard = "bf16,neon" in {
   def VGET_HIGH_BF : NoTestOpInst<"vget_high", ".Q", "b", OP_HI>;
   def VGET_LOW_BF  : NoTestOpInst<"vget_low", ".Q", "b", OP_LO>;
 
-  def VGET_LANE_BF : IInst<"vget_lane", "1.I", "bQb", 
+  def VGET_LANE_BF : IInst<"vget_lane", "1.I", "bQb",
                           [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
-  def VSET_LANE_BF : IInst<"vset_lane", ".1.I", "bQb", 
+  def VSET_LANE_BF : IInst<"vset_lane", ".1.I", "bQb",
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-  def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb", 
+  def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb",
                           [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
-  def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb", 
+  def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb",
                           [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
   def VLD1_BF : WInst<"vld1", ".(c*!)", "bQb">;
@@ -2022,21 +2020,21 @@ let TargetGuard = "bf16,neon" in {
   def VST1_X3_BF : WInst<"vst1_x3", "v*(3!)", "bQb">;
   def VST1_X4_BF : WInst<"vst1_x4", "v*(4!)", "bQb">;
 
-  def VLD1_LANE_BF : WInst<"vld1_lane", ".(c*!).I", "bQb", 
+  def VLD1_LANE_BF : WInst<"vld1_lane", ".(c*!).I", "bQb",
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-  def VLD2_LANE_BF : WInst<"vld2_lane", "2(c*!)2I", "bQb", 
+  def VLD2_LANE_BF : WInst<"vld2_lane", "2(c*!)2I", "bQb",
                           [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
-  def VLD3_LANE_BF : WInst<"vld3_lane", "3(c*!)3I", "bQb", 
+  def VLD3_LANE_BF : WInst<"vld3_lane", "3(c*!)3I", "bQb",
                           [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
-  def VLD4_LANE_BF : WInst<"vld4_lane", "4(c*!)4I", "bQb", 
+  def VLD4_LANE_BF : WInst<"vld4_lane", "4(c*!)4I", "bQb",
                           [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
-  def VST1_LANE_BF : WInst<"vst1_lane", "v*(.!)I", "bQb", 
+  def VST1_LANE_BF : WInst<"vst1_lane", "v*(.!)I", "bQb",
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-  def VST2_LANE_BF : WInst<"vst2_lane", "v*(2!)I", "bQb", 
+  def VST2_LANE_BF : WInst<"vst2_lane", "v*(2!)I", "bQb",
                           [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
   def VST3_LANE_BF : WInst<"vst3_lane", "v*(3!)I", "bQb",
                           [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
-  def VST4_LANE_BF : WInst<"vst4_lane", "v*(4!)I", "bQb", 
+  def VST4_LANE_BF : WInst<"vst4_lane", "v*(4!)I", "bQb",
                           [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 
   def VLD1_DUP_BF : WInst<"vld1_dup", ".(c*!)", "bQb">;
@@ -2089,6 +2087,6 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "b
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "rcpc3,neon" in {
   def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl",
                         [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-  def VSTL1_LANE  : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl", 
+  def VSTL1_LANE  : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl",
                         [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 }
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 2c8fdda9b3dc1f..68aa9d8a0890a9 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -462,7 +462,6 @@ class Intrinsic {
 
   // Return if the supplied argument is an immediate
   bool isArgImmediate(unsigned idx) const {
-    assert((idx + 1) < Types.size() && "Argument type index out of range!");
     return Types[idx + 1].isImmediate();
   }
 
@@ -2179,8 +2178,9 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
   OS << "#endif\n\n";
 }
 
-bool NeonEmitter::areCompatableRangeChecks(const ArrayRef<ImmCheck> ChecksA,
-                                           const ArrayRef<ImmCheck> ChecksB) {
+inline bool
+NeonEmitter::areCompatableRangeChecks(const ArrayRef<ImmCheck> ChecksA,
+                                      const ArrayRef<ImmCheck> ChecksB) {
   // If multiple intrinsics map to the same builtin, we must ensure that the
   // intended range checks performed in SemaArm.cpp do not contradict eachother,
   // as these are emitted once per-buitlin.