[clang] [llvm] [Clang][AArch64] Add customisable immediate range checking to NEON (PR #100278)

Fri Sep 6 03:55:10 PDT 2024

https://github.com/SpencerAbson updated https://github.com/llvm/llvm-project/pull/100278

>From cb3d0b15a711459b4708a79a10fc764dc6415f84 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Tue, 23 Jul 2024 08:38:32 +0000
Subject: [PATCH 01/17] Rebase to resolve arm_neon.td conflict

---
 clang/include/clang/Basic/TargetBuiltins.h    |  39 +-
 clang/include/clang/Basic/arm_fp16.td         |   2 +-
 .../include/clang/Basic/arm_immcheck_incl.td  |  39 ++
 clang/include/clang/Basic/arm_neon.td         | 374 ++++++++++++------
 clang/include/clang/Basic/arm_neon_incl.td    |  11 +-
 clang/include/clang/Basic/arm_sve_sme_incl.td |  36 +-
 clang/include/clang/Sema/SemaARM.h            |   3 +
 clang/lib/Sema/SemaARM.cpp                    | 151 ++++---
 clang/test/CodeGen/aarch64-neon-vcmla.c       |  60 ++-
 clang/test/Sema/aarch64-neon-vcmla-ranges.c   | 202 ++++++++++
 clang/utils/TableGen/NeonEmitter.cpp          | 133 +++----
 clang/utils/TableGen/SveEmitter.cpp           |   2 +-
 12 files changed, 700 insertions(+), 352 deletions(-)
 create mode 100644 clang/include/clang/Basic/arm_immcheck_incl.td
 create mode 100644 clang/test/Sema/aarch64-neon-vcmla-ranges.c

diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 4333830bf34f24..50e17ad7e16284 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -209,15 +209,45 @@ namespace clang {
         Flags |= QuadFlag;
     }
 
-    EltType getEltType() const { return (EltType)(Flags & EltTypeMask); }
+    EltType getEltType() const { return (EltType)(Flags & EltTypeMask); } 
     bool isPoly() const {
       EltType ET = getEltType();
       return ET == Poly8 || ET == Poly16 || ET == Poly64;
     }
     bool isUnsigned() const { return (Flags & UnsignedFlag) != 0; }
-    bool isQuad() const { return (Flags & QuadFlag) != 0; }
+    bool isQuad() const { return (Flags & QuadFlag) != 0; };
+    unsigned getEltSizeInBits() const {
+      switch(getEltType()){
+        case Int8:
+        case Poly8:
+          return 8;
+        case Int16:
+        case Float16:
+        case Poly16:
+        case BFloat16:
+          return 16;
+        case Int32:
+        case Float32:
+          return 32;
+        case Int64:
+        case Float64:
+        case Poly64:
+          return 64;
+        case Poly128:
+          return 128;
+        default:
+          llvm_unreachable("Invalid NeonTypeFlag!");
+      }
+    }
   };
 
+    // Shared between SVE/SME and NEON
+    enum ArmImmCheckType {
+#define LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
+#include "clang/Basic/arm_sve_typeflags.inc"
+#undef  LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
+    };
+
   /// Flags to identify the types for overloaded SVE builtins.
   class SVETypeFlags {
     uint64_t Flags;
@@ -249,11 +279,6 @@ namespace clang {
 #undef LLVM_GET_SVE_MERGETYPES
     };
 
-    enum ImmCheckType {
-#define LLVM_GET_SVE_IMMCHECKTYPES
-#include "clang/Basic/arm_sve_typeflags.inc"
-#undef LLVM_GET_SVE_IMMCHECKTYPES
-    };
 
     SVETypeFlags(uint64_t F) : Flags(F) {
       EltTypeShift = llvm::countr_zero(EltTypeMask);
diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td
index d36b4617bef5d2..42228a3ba1ffad 100644
--- a/clang/include/clang/Basic/arm_fp16.td
+++ b/clang/include/clang/Basic/arm_fp16.td
@@ -76,7 +76,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def SCALAR_FCVTPUH  : SInst<"vcvtp_u16", "(1U)1", "Sh">;
   def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "(1U>)1", "Sh">;
   def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "(1U>>)1", "Sh">;
-  let isVCVT_N = 1 in {
+  let isVCVT_N = 1, ImmChecks = [ImmCheck<1, ImmCheck1_16>] in {
     def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "(1F)(1!)I", "sUs">;
     def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "(1F<)(1!)I", "iUi">;
     def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "(1F<<)(1!)I", "lUl">;
diff --git a/clang/include/clang/Basic/arm_immcheck_incl.td b/clang/include/clang/Basic/arm_immcheck_incl.td
new file mode 100644
index 00000000000000..3b20248f650400
--- /dev/null
+++ b/clang/include/clang/Basic/arm_immcheck_incl.td
@@ -0,0 +1,39 @@
+class ImmCheckType<int val> {
+  int Value = val;
+}
+
+// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
+def ImmCheck0_31                : ImmCheckType<0>;  // 0..31 (used for e.g. predicate patterns)
+def ImmCheck1_16                : ImmCheckType<1>;  // 1..16
+def ImmCheckExtract             : ImmCheckType<2>;  // 0..(2048/sizeinbits(elt) - 1)
+def ImmCheckShiftRight          : ImmCheckType<3>;  // 1..sizeinbits(elt)
+def ImmCheckShiftRightNarrow    : ImmCheckType<4>;  // 1..sizeinbits(elt)/2
+def ImmCheckShiftLeft           : ImmCheckType<5>;  // 0..(sizeinbits(elt) - 1)
+def ImmCheck0_7                 : ImmCheckType<6>;  // 0..7
+def ImmCheckLaneIndex           : ImmCheckType<7>;  // 0..(128/(1*sizeinbits(elt)) - 1)
+def ImmCheckLaneQIndex          : ImmCheckType<8>;  // (Neon) treat type as Quad
+def ImmCheckLaneIndexCompRotate : ImmCheckType<9>;  // 0..(128/(2*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexDot        : ImmCheckType<10>; // 0..(128/(4*sizeinbits(elt)) - 1)
+def ImmCheckComplexRot90_270    : ImmCheckType<11>; // [90,270]
+def ImmCheckComplexRotAll90     : ImmCheckType<12>; // [0, 90, 180,270]
+def ImmCheck0_13                : ImmCheckType<13>; // 0..13
+def ImmCheck0_1                 : ImmCheckType<14>; // 0..1
+def ImmCheck0_2                 : ImmCheckType<15>; // 0..2
+def ImmCheck0_3                 : ImmCheckType<16>; // 0..3
+def ImmCheck0_0                 : ImmCheckType<17>; // 0..0
+def ImmCheck0_15                : ImmCheckType<18>; // 0..15
+def ImmCheck0_255               : ImmCheckType<19>; // 0..255
+def ImmCheck2_4_Mul2            : ImmCheckType<20>; // 2, 4
+def ImmCheck1_1                 : ImmCheckType<21>; // 1..1
+def ImmCheck1_3                 : ImmCheckType<22>; // 1..3
+def ImmCheck1_7                 : ImmCheckType<23>; // 1..7
+def ImmCheck1_32                : ImmCheckType<24>; // 1..32
+def ImmCheck1_64                : ImmCheckType<25>; // 1..64
+def ImmCheck0_63                : ImmCheckType<26>; // 0..63
+
+class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
+  int Arg = arg;
+  // The index of the argument whose type should be referred to when validating this immedaite.
+  int EltSizeArg = eltSizeArg;
+  ImmCheckType Kind = kind;
+}
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 536c0652280b9d..f92e405b1d8691 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -284,14 +284,18 @@ def OP_CVT_F32_BF16
 
 // Splat operation - performs a range-checked splat over a vector
 def SPLAT  : WInst<"splat_lane", ".(!q)I",
-                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl">;
+                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl", 
+                    [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
-                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl"> {
+                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl", 
+                   [ImmCheck<1, ImmCheckLaneQIndex, 1>]> {
   let isLaneQ = 1;
 }
 let TargetGuard = "bf16,neon" in {
-  def SPLAT_BF  : WInst<"splat_lane", ".(!q)I", "bQb">;
-  def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb"> {
+  def SPLAT_BF  : WInst<"splat_lane", ".(!q)I", "bQb", 
+                      [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+  def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb", 
+                      [ImmCheck<1, ImmCheckLaneQIndex, 1>]> {
     let isLaneQ = 1;
   }
 }
@@ -401,27 +405,45 @@ def VQRSHL : SInst<"vqrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.12 Shifts by constant
 let isShift = 1 in {
-def VSHR_N     : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VSHL_N     : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VRSHR_N    : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VSRA_N     : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VRSRA_N    : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VQSHL_N    : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VQSHLU_N   : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl">;
-def VSHRN_N    : IInst<"vshrn_n", "<QI", "silUsUiUl">;
-def VQSHRUN_N  : SInst<"vqshrun_n", "(<U)QI", "sil">;
-def VQRSHRUN_N : SInst<"vqrshrun_n", "(<U)QI", "sil">;
-def VQSHRN_N   : SInst<"vqshrn_n", "<QI", "silUsUiUl">;
-def VRSHRN_N   : IInst<"vrshrn_n", "<QI", "silUsUiUl">;
-def VQRSHRN_N  : SInst<"vqrshrn_n", "<QI", "silUsUiUl">;
-def VSHLL_N    : SInst<"vshll_n", "(>Q).I", "csiUcUsUi">;
+
+
+def VSHR_N     : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VSHL_N     : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+                      [ImmCheck<1, ImmCheckShiftLeft>]>;
+def VRSHR_N    : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VSRA_N     : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+                      [ImmCheck<2, ImmCheckShiftRight>]>;
+def VRSRA_N    : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+                      [ImmCheck<2, ImmCheckShiftRight>]>;
+def VQSHL_N    : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+                      [ImmCheck<1, ImmCheckShiftLeft>]>;
+def VQSHLU_N   : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl", 
+                      [ImmCheck<1, ImmCheckShiftLeft>]>;
+def VSHRN_N    : IInst<"vshrn_n", "<QI", "silUsUiUl", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQSHRUN_N  : SInst<"vqshrun_n", "(<U)QI", "sil", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQRSHRUN_N : SInst<"vqrshrun_n", "(<U)QI", "sil", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQSHRN_N   : SInst<"vqshrn_n", "<QI", "silUsUiUl", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VRSHRN_N   : IInst<"vrshrn_n", "<QI", "silUsUiUl", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQRSHRN_N  : SInst<"vqrshrn_n", "<QI", "silUsUiUl", 
+                      [ImmCheck<1, ImmCheckShiftRight>]>;
+def VSHLL_N    : SInst<"vshll_n", "(>Q).I", "csiUcUsUi", 
+                      [ImmCheck<1, ImmCheckShiftLeft>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.13 Shifts with insert
 def VSRI_N : WInst<"vsri_n", "...I",
-                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">;
+                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs", 
+                    [ImmCheck<2, ImmCheckShiftRight>]>;
 def VSLI_N : WInst<"vsli_n", "...I",
-                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">;
+                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs", 
+                   [ImmCheck<2, ImmCheckShiftLeft>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -435,7 +457,8 @@ def VLD1_X3   : WInst<"vld1_x3", "3(c*!)",
 def VLD1_X4   : WInst<"vld1_x4", "4(c*!)",
                       "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
 def VLD1_LANE : WInst<"vld1_lane", ".(c*!).I",
-                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
+                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs", 
+                      [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def VLD1_DUP  : WInst<"vld1_dup", ".(c*!)",
                       "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
 def VST1      : WInst<"vst1", "v*(.!)",
@@ -447,19 +470,23 @@ def VST1_X3   : WInst<"vst1_x3", "v*(3!)",
 def VST1_X4   : WInst<"vst1_x4", "v*(4!)",
                       "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
 def VST1_LANE : WInst<"vst1_lane", "v*(.!)I",
-                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
+                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs", 
+                      [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+
 let ArchGuard = "(__ARM_FP & 2)" in {
 def VLD1_F16      : WInst<"vld1", ".(c*!)", "hQh">;
 def VLD1_X2_F16   : WInst<"vld1_x2", "2(c*!)", "hQh">;
 def VLD1_X3_F16   : WInst<"vld1_x3", "3(c*!)", "hQh">;
 def VLD1_X4_F16   : WInst<"vld1_x4", "4(c*!)", "hQh">;
-def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh">;
+def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh", 
+                          [ImmCheck<2, ImmCheck0_3, 1>]>;
 def VLD1_DUP_F16  : WInst<"vld1_dup", ".(c*!)", "hQh">;
 def VST1_F16      : WInst<"vst1", "v*(.!)", "hQh">;
 def VST1_X2_F16   : WInst<"vst1_x2", "v*(2!)", "hQh">;
 def VST1_X3_F16   : WInst<"vst1_x3", "v*(3!)", "hQh">;
 def VST1_X4_F16   : WInst<"vst1_x4", "v*(4!)", "hQh">;
-def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh">;
+def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh", 
+                          [ImmCheck<2, ImmCheck0_3, 1>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -473,15 +500,21 @@ def VLD3_DUP  : WInst<"vld3_dup", "3(c*!)",
                       "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
 def VLD4_DUP  : WInst<"vld4_dup", "4(c*!)",
                       "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
-def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
+def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+                      [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+                      [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
+def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+                      [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
 def VST2 : WInst<"vst2", "v*(2!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
 def VST3 : WInst<"vst3", "v*(3!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
 def VST4 : WInst<"vst4", "v*(4!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
-def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
+def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+                      [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+                      [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+                      [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 let ArchGuard = "(__ARM_FP & 2)" in {
 def VLD2_F16      : WInst<"vld2", "2(c*!)", "hQh">;
 def VLD3_F16      : WInst<"vld3", "3(c*!)", "hQh">;
@@ -489,28 +522,36 @@ def VLD4_F16      : WInst<"vld4", "4(c*!)", "hQh">;
 def VLD2_DUP_F16  : WInst<"vld2_dup", "2(c*!)", "hQh">;
 def VLD3_DUP_F16  : WInst<"vld3_dup", "3(c*!)", "hQh">;
 def VLD4_DUP_F16  : WInst<"vld4_dup", "4(c*!)", "hQh">;
-def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh">;
-def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh">;
-def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh">;
+def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh", 
+                          [ImmCheck<4, ImmCheck0_3, 1>]>;
+def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh", 
+                          [ImmCheck<5, ImmCheck0_3, 1>]>;
+def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh", 
+                          [ImmCheck<6, ImmCheck0_3, 1>]>;
 def VST2_F16      : WInst<"vst2", "v*(2!)", "hQh">;
 def VST3_F16      : WInst<"vst3", "v*(3!)", "hQh">;
 def VST4_F16      : WInst<"vst4", "v*(4!)", "hQh">;
-def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh">;
-def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh">;
-def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh">;
+def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh", 
+                          [ImmCheck<3, ImmCheck0_3, 1>]>;
+def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh", 
+                         [ImmCheck<4, ImmCheck0_3, 1>]>;
+def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh", 
+                          [ImmCheck<5, ImmCheck0_3, 1>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.16 Extract lanes from a vector
 let InstName = "vmov" in
 def VGET_LANE : IInst<"vget_lane", "1.I",
-                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">;
+                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", 
+                      [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.17 Set lanes within a vector
 let InstName = "vmov" in
 def VSET_LANE : IInst<"vset_lane", ".1.I",
-                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">;
+                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", 
+                      [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.18 Initialize a vector from bit pattern
@@ -560,9 +601,12 @@ def VCVT_S32     : SInst<"vcvt_s32", "S.",  "fQf">;
 def VCVT_U32     : SInst<"vcvt_u32", "U.",  "fQf">;
 def VCVT_F32     : SInst<"vcvt_f32", "F(.!)",  "iUiQiQUi">;
 let isVCVT_N = 1 in {
-def VCVT_N_S32   : SInst<"vcvt_n_s32", "S.I", "fQf">;
-def VCVT_N_U32   : SInst<"vcvt_n_u32", "U.I", "fQf">;
-def VCVT_N_F32   : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi">;
+def VCVT_N_S32   : SInst<"vcvt_n_s32", "S.I", "fQf", 
+                        [ImmCheck<1, ImmCheck1_32>]>;
+def VCVT_N_U32   : SInst<"vcvt_n_u32", "U.I", "fQf", 
+                        [ImmCheck<1, ImmCheck1_32>]>;
+def VCVT_N_F32   : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi", 
+                        [ImmCheck<1, ImmCheck1_32>]>;
 }
 
 def VMOVN        : IInst<"vmovn", "<Q",  "silUsUiUl">;
@@ -610,8 +654,10 @@ def VQDMULH_LANE  : SOpInst<"vqdmulh_lane", "..qI", "siQsQi", OP_QDMULH_LN>;
 def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "..qI", "siQsQi", OP_QRDMULH_LN>;
 }
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
-def A64_VQDMULH_LANE  : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi">;
-def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi">;
+def A64_VQDMULH_LANE  : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi", 
+                              [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
+def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi", 
+                              [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 }
 
 let TargetGuard = "v8.1a,neon" in {
@@ -629,7 +675,8 @@ def VQDMLSL_N     : SOpInst<"vqdmlsl_n", "(>Q)(>Q).1", "si", OP_QDMLSL_N>;
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.26 Vector Extract
 def VEXT : WInst<"vext", "...I",
-                 "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf">;
+                 "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf", 
+                 [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.27 Reverse vector elements
@@ -738,14 +785,22 @@ def ST1_X2 : WInst<"vst1_x2", "v*(2!)", "dQdPlQPl">;
 def ST1_X3 : WInst<"vst1_x3", "v*(3!)", "dQdPlQPl">;
 def ST1_X4 : WInst<"vst1_x4", "v*(4!)", "dQdPlQPl">;
 
-def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl">;
-def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl">;
-def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl">;
+def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl", 
+                    [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+                    [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+                    [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
+def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+                    [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
+def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl", 
+                    [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+                    [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+                    [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+                    [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 
 def LD1_DUP  : WInst<"vld1_dup", ".(c*!)", "dQdPlQPl">;
 def LD2_DUP  : WInst<"vld2_dup", "2(c*!)", "dQdPlQPl">;
@@ -901,8 +956,8 @@ def SHLL_HIGH_N    : SOpInst<"vshll_high_n", ">.I", "HcHsHiHUcHUsHUi",
                              OP_LONG_HI>;
 
 ////////////////////////////////////////////////////////////////////////////////
-def SRI_N : WInst<"vsri_n", "...I", "PlQPl">;
-def SLI_N : WInst<"vsli_n", "...I", "PlQPl">;
+def SRI_N : WInst<"vsri_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftRight, 1>]>;
+def SLI_N : WInst<"vsli_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftLeft, 1>]>;
 
 // Right shift narrow high
 def SHRN_HIGH_N    : IOpInst<"vshrn_high_n", "<(<q).I",
@@ -924,9 +979,12 @@ def QRSHRN_HIGH_N  : SOpInst<"vqrshrn_high_n", "<(<q).I",
 def VMOVL_HIGH   : SOpInst<"vmovl_high", ">.", "HcHsHiHUcHUsHUi", OP_MOVL_HI>;
 
 let isVCVT_N = 1 in {
-def CVTF_N_F64   : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl">;
-def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd">;
-def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd">;
+def CVTF_N_F64   : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl", 
+                        [ImmCheck<1, ImmCheck1_64>]>;
+def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd", 
+                        [ImmCheck<1, ImmCheck1_64>]>;
+def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd", 
+                        [ImmCheck<1, ImmCheck1_64>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -965,8 +1023,10 @@ let TargetGuard = "aes,neon" in {
 
 ////////////////////////////////////////////////////////////////////////////////
 // Extract or insert element from vector
-def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl">;
-def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl">;
+def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl", 
+                      [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl", 
+                      [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def COPY_LANE : IOpInst<"vcopy_lane", "..I.I",
                         "csilUcUsUiUlPcPsPlfd", OP_COPY_LN>;
 def COPYQ_LANE : IOpInst<"vcopy_lane", "..IqI",
@@ -1011,8 +1071,10 @@ def VMLS_LANEQ   : IOpInst<"vmls_laneq", "...QI",
   let isLaneQ = 1;
 }
 
-def VFMA_LANE    : IInst<"vfma_lane", "...qI", "fdQfQd">;
-def VFMA_LANEQ   : IInst<"vfma_laneq", "...QI", "fdQfQd"> {
+def VFMA_LANE    : IInst<"vfma_lane", "...qI", "fdQfQd", 
+                        [ImmCheck<3, ImmCheckLaneIndex, 0>]>;
+def VFMA_LANEQ   : IInst<"vfma_laneq", "...QI", "fdQfQd", 
+                        [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
   let isLaneQ = 1;
 }
 def VFMS_LANE    : IOpInst<"vfms_lane", "...qI", "fdQfQd", OP_FMS_LN>;
@@ -1088,8 +1150,10 @@ def VQDMULL_HIGH_LANEQ  : SOpInst<"vqdmull_high_laneq", "(>Q)QQI", "si",
 }
 
 let isLaneQ = 1 in {
-def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi">;
-def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi">;
+def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi", 
+                          [ImmCheck<2, ImmCheckLaneQIndex, 1>]>;
+def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi", 
+                          [ImmCheck<2, ImmCheckLaneQIndex, 1>]>;
 }
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a,neon" in {
 def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN> {
@@ -1118,7 +1182,8 @@ def FMINNMV : SInst<"vminnmv", "1.", "fQfQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Newly added Vector Extract for f64
-def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl">;
+def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl", 
+                    [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Crypto
@@ -1149,7 +1214,7 @@ def EOR3 : SInst<"veor3", "....", "QUcQUsQUiQUlQcQsQiQl">;
 def RAX1 : SInst<"vrax1", "...", "QUl">;
 
 let isVXAR = 1 in {
-def XAR :  SInst<"vxar", "...I", "QUl">;
+def XAR :  SInst<"vxar", "...I", "QUl", [ImmCheck<2, ImmCheck0_63>]>;
 }
 }
 
@@ -1162,10 +1227,10 @@ def SHA512H2 : SInst<"vsha512h2", "....", "QUl">;
 
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sm4,neon" in {
 def SM3SS1 : SInst<"vsm3ss1", "....", "QUi">;
-def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi">;
-def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi">;
-def SM3TT2A : SInst<"vsm3tt2a", "....I", "QUi">;
-def SM3TT2B : SInst<"vsm3tt2b", "....I", "QUi">;
+def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
+def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
+def SM3TT2A : SInst<"vsm3tt2a", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
+def SM3TT2B : SInst<"vsm3tt2b", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
 def SM3PARTW1 : SInst<"vsm3partw1", "....", "QUi">;
 def SM3PARTW2 : SInst<"vsm3partw2", "....", "QUi">;
 }
@@ -1327,49 +1392,68 @@ def SCALAR_RSHL: SInst<"vrshl", "11(S1)", "SlSUl">;
 // Scalar Shift (Immediate)
 let isScalarShift = 1 in {
 // Signed/Unsigned Shift Right (Immediate)
-def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl">;
+def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl", 
+                        [ImmCheck<1, ImmCheckShiftRight, 0>]>;
 // Signed/Unsigned Rounding Shift Right (Immediate)
-def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl">;
+def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl", 
+                          [ImmCheck<1, ImmCheckShiftRight, 0>]>;
 
 // Signed/Unsigned Shift Right and Accumulate (Immediate)
-def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl">;
+def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl", 
+                        [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 // Signed/Unsigned Rounding Shift Right and Accumulate (Immediate)
-def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl">;
+def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl", 
+                        [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 
 // Shift Left (Immediate)
-def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl">;
+def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl", 
+                      [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
 // Signed/Unsigned Saturating Shift Left (Immediate)
-def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl">;
+def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl", 
+                      [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
 // Signed Saturating Shift Left Unsigned (Immediate)
-def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl">;
+def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl", 
+                      [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
 
 // Shift Right And Insert (Immediate)
-def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl">;
+def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl", 
+                        [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 // Shift Left And Insert (Immediate)
-def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl">;
+def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl", 
+                        [ImmCheck<2, ImmCheckShiftLeft, 0>]>;
 
 let isScalarNarrowShift = 1 in {
   // Signed/Unsigned Saturating Shift Right Narrow (Immediate)
-  def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">;
+  def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl", 
+                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
   // Signed/Unsigned Saturating Rounded Shift Right Narrow (Immediate)
-  def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">;
+  def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl", 
+                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
   // Signed Saturating Shift Right Unsigned Narrow (Immediate)
-  def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<U)1I", "SsSiSl">;
+  def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<U)1I", "SsSiSl", 
+                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
   // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
-  def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<U)1I", "SsSiSl">;
+  def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<U)1I", "SsSiSl", 
+                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 // Scalar Signed/Unsigned Fixed-point Convert To Floating-Point (Immediate)
-def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi">;
-def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl">;
+def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi", 
+                              [ImmCheck<1, ImmCheck1_32>]>;
+def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl", 
+                              [ImmCheck<1, ImmCheck1_64>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Scalar Floating-point Convert To Signed/Unsigned Fixed-point (Immediate)
-def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf">;
-def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf">;
-def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd">;
-def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd">;
+def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf", 
+                                [ImmCheck<1, ImmCheck1_32>]>;
+def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf", 
+                                [ImmCheck<1, ImmCheck1_32>]>;
+def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd", 
+                                [ImmCheck<1, ImmCheck1_64>]>;
+def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd", 
+                                [ImmCheck<1, ImmCheck1_64>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1575,10 +1659,12 @@ def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_L
 def SCALAR_VMUL_N : IInst<"vmul_n", "..1", "d">;
 
 // VMUL_LANE_A64 d type implemented using scalar mul lane
-def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d">;
+def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d", 
+                            [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 
 // VMUL_LANEQ d type implemented using scalar mul lane
-def SCALAR_VMUL_LANEQ   : IInst<"vmul_laneq", "..QI", "d"> {
+def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d", 
+                              [ImmCheck<2, ImmCheckLaneQIndex, 1>]> {
   let isLaneQ = 1;
 }
 
@@ -1591,8 +1677,10 @@ def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ
 }
 
 // Scalar Floating Point fused multiply-add (scalar, by element)
-def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd">;
-def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd"> {
+def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd", 
+                            [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd", 
+                            [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
   let isLaneQ = 1;
 }
 
@@ -1609,14 +1697,18 @@ def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR
 }
 
 // Signed Saturating Doubling Multiply-Add Long (scalar by element)
-def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi">;
-def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi"> {
+def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi", 
+                                [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi", 
+                                [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
   let isLaneQ = 1;
 }
 
 // Signed Saturating Doubling Multiply-Subtract Long (scalar by element)
-def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi">;
-def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi"> {
+def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi", 
+                              [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi", 
+                              [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
   let isLaneQ = 1;
 }
 
@@ -1646,8 +1738,10 @@ def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR
 }
 } // TargetGuard = "v8.1a"
 
-def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
-def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs"> {
+def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
+                            [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
+                            [ImmCheck<1, ImmCheckLaneQIndex, 0>]> {
   let isLaneQ = 1;
 }
 
@@ -1720,9 +1814,12 @@ let TargetGuard = "fullfp16,neon" in {
 
   // Vector conversion
   let isVCVT_N = 1 in {
-    def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs">;
-    def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh">;
-    def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh">;
+    def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs", 
+                          [ImmCheck<1, ImmCheck1_16>]>;
+    def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh", 
+                          [ImmCheck<1, ImmCheck1_16>]>;
+    def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh", 
+                          [ImmCheck<1, ImmCheck1_16>]>;
   }
 
   // Max/Min
@@ -1770,7 +1867,7 @@ def VZIPH    : WInst<"vzip", "2..", "hQh">;
 def VUZPH    : WInst<"vuzp", "2..", "hQh">;
 def VTRNH    : WInst<"vtrn", "2..", "hQh">;
 // Vector Extract
-def VEXTH      : WInst<"vext", "...I", "hQh">;
+def VEXTH      : WInst<"vext", "...I", "hQh", [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 // Reverse vector elements
 def VREV64H    : WOpInst<"vrev64", "..", "hQh", OP_REV64>;
 
@@ -1801,16 +1898,20 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   // ARMv8.2-A FP16 lane vector intrinsics.
 
   // FMA lane
-  def VFMA_LANEH   : IInst<"vfma_lane", "...qI", "hQh">;
-  def VFMA_LANEQH  : IInst<"vfma_laneq", "...QI", "hQh"> {
+  def VFMA_LANEH   : IInst<"vfma_lane", "...qI", "hQh", 
+                          [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+  def VFMA_LANEQH  : IInst<"vfma_laneq", "...QI", "hQh", 
+                          [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
     let isLaneQ = 1;
   }
 
   // FMA lane with scalar argument
   def FMLA_NH      : SOpInst<"vfma_n", "...1", "hQh", OP_FMLA_N>;
   // Scalar floating point fused multiply-add (scalar, by element)
-  def SCALAR_FMLA_LANEH  : IInst<"vfma_lane", "111.I", "Sh">;
-  def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh"> {
+  def SCALAR_FMLA_LANEH  : IInst<"vfma_lane", "111.I", "Sh", 
+                                [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+  def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh", 
+                                [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
     let isLaneQ = 1;
   }
 
@@ -1844,8 +1945,10 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   }
   def VMULX_NH      : IOpInst<"vmulx_n", "..1", "hQh", OP_MULX_N>;
   // Scalar floating point  mulx (scalar, by element)
-  def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh">;
-  def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh"> {
+  def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh", 
+                                [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+  def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh", 
+                                [ImmCheck<2, ImmCheckLaneQIndex, 1>]> {
     let isLaneQ = 1;
   }
 
@@ -1865,8 +1968,10 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
   def VZIP2H     : SOpInst<"vzip2", "...", "hQh", OP_ZIP2>;
   def VUZP2H     : SOpInst<"vuzp2", "...", "hQh", OP_UZP2>;
 
-  def SCALAR_VDUP_LANEH  : IInst<"vdup_lane", "1.I", "Sh">;
-  def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh"> {
+  def SCALAR_VDUP_LANEH  : IInst<"vdup_lane", "1.I", "Sh", 
+                                [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+  def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh", 
+                                [ImmCheck<1, ImmCheckLaneQIndex, 0>]> {
     let isLaneQ = 1;
   }
 }
@@ -1959,9 +2064,12 @@ multiclass VCMLA_ROTS<string type, string lanety, string laneqty> {
 
     let isLaneQ = 1 in  {
       // vcmla{ROT}_laneq
+      // ACLE specifies that the fp16 vcmla_#ROT_laneq variant has an immedaite range of 0 <= lane <= 1.
+      // fp16 is the only variant for which these two differ.
+      // https://developer.arm.com/documentation/ihi0073/latest/ 
+      defvar getlanety = !if(!eq(type, "h"), lanety, laneqty);
       def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type,  Op<(call "vcmla" # ROT, $p0, $p1,
-              (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
-
+                (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast getlanety, $p2), $p3))))>>;
       // vcmlaq{ROT}_laneq
       def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
              (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
@@ -2011,10 +2119,14 @@ let TargetGuard = "bf16,neon" in {
   def VGET_HIGH_BF : NoTestOpInst<"vget_high", ".Q", "b", OP_HI>;
   def VGET_LOW_BF  : NoTestOpInst<"vget_low", ".Q", "b", OP_LO>;
 
-  def VGET_LANE_BF : IInst<"vget_lane", "1.I", "bQb">;
-  def VSET_LANE_BF : IInst<"vset_lane", ".1.I", "bQb">;
-  def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb">;
-  def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb"> {
+  def VGET_LANE_BF : IInst<"vget_lane", "1.I", "bQb", 
+                          [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+  def VSET_LANE_BF : IInst<"vset_lane", ".1.I", "bQb", 
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+  def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb", 
+                          [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+  def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb", 
+                          [ImmCheck<1, ImmCheckLaneQIndex, 0>]> {
     let isLaneQ = 1;
   }
 
@@ -2036,14 +2148,22 @@ let TargetGuard = "bf16,neon" in {
   def VST1_X3_BF : WInst<"vst1_x3", "v*(3!)", "bQb">;
   def VST1_X4_BF : WInst<"vst1_x4", "v*(4!)", "bQb">;
 
-  def VLD1_LANE_BF : WInst<"vld1_lane", ".(c*!).I", "bQb">;
-  def VLD2_LANE_BF : WInst<"vld2_lane", "2(c*!)2I", "bQb">;
-  def VLD3_LANE_BF : WInst<"vld3_lane", "3(c*!)3I", "bQb">;
-  def VLD4_LANE_BF : WInst<"vld4_lane", "4(c*!)4I", "bQb">;
-  def VST1_LANE_BF : WInst<"vst1_lane", "v*(.!)I", "bQb">;
-  def VST2_LANE_BF : WInst<"vst2_lane", "v*(2!)I", "bQb">;
-  def VST3_LANE_BF : WInst<"vst3_lane", "v*(3!)I", "bQb">;
-  def VST4_LANE_BF : WInst<"vst4_lane", "v*(4!)I", "bQb">;
+  def VLD1_LANE_BF : WInst<"vld1_lane", ".(c*!).I", "bQb", 
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+  def VLD2_LANE_BF : WInst<"vld2_lane", "2(c*!)2I", "bQb", 
+                          [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+  def VLD3_LANE_BF : WInst<"vld3_lane", "3(c*!)3I", "bQb", 
+                          [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
+  def VLD4_LANE_BF : WInst<"vld4_lane", "4(c*!)4I", "bQb", 
+                          [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
+  def VST1_LANE_BF : WInst<"vst1_lane", "v*(.!)I", "bQb", 
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+  def VST2_LANE_BF : WInst<"vst2_lane", "v*(2!)I", "bQb", 
+                          [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+  def VST3_LANE_BF : WInst<"vst3_lane", "v*(3!)I", "bQb",
+                          [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+  def VST4_LANE_BF : WInst<"vst4_lane", "v*(4!)I", "bQb", 
+                          [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 
   def VLD1_DUP_BF : WInst<"vld1_dup", ".(c*!)", "bQb">;
   def VLD2_DUP_BF : WInst<"vld2_dup", "2(c*!)", "bQb">;
@@ -2093,8 +2213,10 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "b
 
 // v8.9a/v9.4a LRCPC3 intrinsics
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "rcpc3,neon" in {
-  def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl">;
-  def VSTL1_LANE  : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl">;
+  def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl",
+                        [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+  def VSTL1_LANE  : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl", 
+                        [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 }
 
 // Lookup table read with 2-bit/4-bit indices
diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td
index 3b8015daee6d90..2b5acd41e7bbd4 100644
--- a/clang/include/clang/Basic/arm_neon_incl.td
+++ b/clang/include/clang/Basic/arm_neon_incl.td
@@ -21,6 +21,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+include "arm_immcheck_incl.td"
+
 // The base Operation class. All operations must subclass this.
 class Operation<list<dag> ops=[]> {
   list<dag> Ops = ops;
@@ -260,7 +262,7 @@ def OP_UNAVAILABLE : Operation {
 
 
 // Every intrinsic subclasses Inst.
-class Inst <string n, string p, string t, Operation o> {
+class Inst <string n, string p, string t, Operation o, list<ImmCheck> ch = []>{
   string Name = n;
   string Prototype = p;
   string Types = t;
@@ -278,6 +280,7 @@ class Inst <string n, string p, string t, Operation o> {
   // a Q register. Only used for intrinsics which end up calling polymorphic
   // builtins.
   bit isLaneQ = 0;
+  list<ImmCheck> ImmChecks = ch;
 
   // Certain intrinsics have different names than their representative
   // instructions. This field allows us to handle this correctly when we
@@ -300,9 +303,9 @@ class Inst <string n, string p, string t, Operation o> {
 // SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8")
 // IInst: Instruction with generic integer suffix (e.g., "i8")
 // WInst: Instruction with only bit size suffix (e.g., "8")
-class SInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
-class IInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
-class WInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
+class SInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
+class IInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
+class WInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
 
 // The following instruction classes are implemented via operators
 // instead of builtins. As such these declarations are only used for
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td
index 6ec357825a132a..fdf4ba55fe9382 100644
--- a/clang/include/clang/Basic/arm_sve_sme_incl.td
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -13,6 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+include "arm_immcheck_incl.td"
+
 //===----------------------------------------------------------------------===//
 // Instruction definitions
 //===----------------------------------------------------------------------===//
@@ -233,40 +235,6 @@ def IsInZT0                         : FlagType<0x400000000000>;
 def IsOutZT0                        : FlagType<0x800000000000>;
 def IsInOutZT0                      : FlagType<0x1000000000000>;
 
-// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
-class ImmCheckType<int val> {
-  int Value = val;
-}
-def ImmCheck0_31                : ImmCheckType<0>;  // 0..31 (used for e.g. predicate patterns)
-def ImmCheck1_16                : ImmCheckType<1>;  // 1..16
-def ImmCheckExtract             : ImmCheckType<2>;  // 0..(2048/sizeinbits(elt) - 1)
-def ImmCheckShiftRight          : ImmCheckType<3>;  // 1..sizeinbits(elt)
-def ImmCheckShiftRightNarrow    : ImmCheckType<4>;  // 1..sizeinbits(elt)/2
-def ImmCheckShiftLeft           : ImmCheckType<5>;  // 0..(sizeinbits(elt) - 1)
-def ImmCheck0_7                 : ImmCheckType<6>;  // 0..7
-def ImmCheckLaneIndex           : ImmCheckType<7>;  // 0..(128/(1*sizeinbits(elt)) - 1)
-def ImmCheckLaneIndexCompRotate : ImmCheckType<8>;  // 0..(128/(2*sizeinbits(elt)) - 1)
-def ImmCheckLaneIndexDot        : ImmCheckType<9>;  // 0..(128/(4*sizeinbits(elt)) - 1)
-def ImmCheckComplexRot90_270    : ImmCheckType<10>; // [90,270]
-def ImmCheckComplexRotAll90     : ImmCheckType<11>; // [0, 90, 180,270]
-def ImmCheck0_13                : ImmCheckType<12>; // 0..13
-def ImmCheck0_1                 : ImmCheckType<13>; // 0..1
-def ImmCheck0_2                 : ImmCheckType<14>; // 0..2
-def ImmCheck0_3                 : ImmCheckType<15>; // 0..3
-def ImmCheck0_0                 : ImmCheckType<16>; // 0..0
-def ImmCheck0_15                : ImmCheckType<17>; // 0..15
-def ImmCheck0_255               : ImmCheckType<18>; // 0..255
-def ImmCheck2_4_Mul2            : ImmCheckType<19>; // 2, 4
-def ImmCheck1_1                 : ImmCheckType<20>; // 1..1
-def ImmCheck1_3                 : ImmCheckType<21>; // 1..3
-def ImmCheck1_7                 : ImmCheckType<22>; // 1..7
-
-class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
-  int Arg = arg;
-  int EltSizeArg = eltSizeArg;
-  ImmCheckType Kind = kind;
-}
-
 defvar InvalidMode = "";
 
 class Inst<string n, string p, string t, MergeType mt, string i,
diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index b8196a3170d63c..e1cc36f0644e8d 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -44,6 +44,9 @@ class SemaARM : public SemaBase {
                                     unsigned MaxWidth);
   bool CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
                                     CallExpr *TheCall);
+  bool ParseNeonImmChecks(CallExpr *TheCall, 
+                          SmallVector<std::tuple<int, int, int>, 2> &ImmChecks, 
+                          int OverloadType);
   bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 185e0427d5c995..f0d943c6cc2f55 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -315,40 +315,6 @@ bool SemaARM::BuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
   return false;
 }
 
-// Get the valid immediate range for the specified NEON type code.
-static unsigned RFT(unsigned t, bool shift = false, bool ForceQuad = false) {
-  NeonTypeFlags Type(t);
-  int IsQuad = ForceQuad ? true : Type.isQuad();
-  switch (Type.getEltType()) {
-  case NeonTypeFlags::Int8:
-  case NeonTypeFlags::Poly8:
-    return shift ? 7 : (8 << IsQuad) - 1;
-  case NeonTypeFlags::Int16:
-  case NeonTypeFlags::Poly16:
-    return shift ? 15 : (4 << IsQuad) - 1;
-  case NeonTypeFlags::Int32:
-    return shift ? 31 : (2 << IsQuad) - 1;
-  case NeonTypeFlags::Int64:
-  case NeonTypeFlags::Poly64:
-    return shift ? 63 : (1 << IsQuad) - 1;
-  case NeonTypeFlags::Poly128:
-    return shift ? 127 : (1 << IsQuad) - 1;
-  case NeonTypeFlags::Float16:
-    assert(!shift && "cannot shift float types!");
-    return (4 << IsQuad) - 1;
-  case NeonTypeFlags::Float32:
-    assert(!shift && "cannot shift float types!");
-    return (2 << IsQuad) - 1;
-  case NeonTypeFlags::Float64:
-    assert(!shift && "cannot shift float types!");
-    return (1 << IsQuad) - 1;
-  case NeonTypeFlags::BFloat16:
-    assert(!shift && "cannot shift float types!");
-    return (4 << IsQuad) - 1;
-  }
-  llvm_unreachable("Invalid NeonTypeFlag!");
-}
-
 /// getNeonEltType - Return the QualType corresponding to the elements of
 /// the vector type specified by the NeonTypeFlags.  This is used to check
 /// the pointer arguments for Neon load/store intrinsics.
@@ -404,6 +370,62 @@ enum ArmSMEState : unsigned {
   ArmZT0Mask = 0b11 << 2
 };
 
+bool SemaARM::ParseNeonImmChecks(CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 2> 
+                                &ImmChecks, int OverloadType = -1) {
+  int ArgIdx, CheckTy, ElementType;
+  bool hasError = false;
+
+  for (auto &I : ImmChecks) {
+    std::tie(ArgIdx, CheckTy, ElementType) = I;
+
+    NeonTypeFlags Type = (OverloadType != -1) ? 
+                          NeonTypeFlags(OverloadType) : NeonTypeFlags(ElementType); 
+          
+    switch((ArmImmCheckType)CheckTy) {
+      case ArmImmCheckType::ImmCheck0_3:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 3);
+        break;
+      case ArmImmCheckType::ImmCheck0_63:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 63);
+        break;
+      case ArmImmCheckType::ImmCheck0_7:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 7);
+        break;
+      case ArmImmCheckType::ImmCheck1_16:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 16);
+        break;
+      case ArmImmCheckType::ImmCheck1_32:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 32);
+        break;
+      case ArmImmCheckType::ImmCheck1_64:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 64);
+        break;
+      case ArmImmCheckType::ImmCheckLaneIndex:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,  (64 << Type.isQuad()) / 
+                                                    Type.getEltSizeInBits() - 1);
+        break; 
+      case ArmImmCheckType::ImmCheckLaneQIndex:    // force to use quad
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 
+                                                    (128/Type.getEltSizeInBits()) - 1);
+        break;
+      case ArmImmCheckType::ImmCheckShiftLeft:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 
+                                                    Type.getEltSizeInBits() - 1);
+        break;
+      case ArmImmCheckType::ImmCheckShiftRight:
+        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 
+                                                    1, Type.getEltSizeInBits());
+        break;
+      default:
+        llvm_unreachable("Invalid Neon immediate range typeflag!");
+        break;
+    }
+  }
+
+  return hasError;
+}
+
+
 bool SemaARM::ParseSVEImmChecks(
     CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) {
   // Perform all the immediate checks for this builtin call.
@@ -433,76 +455,76 @@ bool SemaARM::ParseSVEImmChecks(
       return false;
     };
 
-    switch ((SVETypeFlags::ImmCheckType)CheckTy) {
-    case SVETypeFlags::ImmCheck0_31:
+    switch ((ArmImmCheckType)CheckTy) {
+    case ArmImmCheckType::ImmCheck0_31:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 31))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_13:
+    case ArmImmCheckType::ImmCheck0_13:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 13))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck1_16:
+    case ArmImmCheckType::ImmCheck1_16:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 16))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_7:
+    case ArmImmCheckType::ImmCheck0_7:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck1_1:
+    case ArmImmCheckType::ImmCheck1_1:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck1_3:
+    case ArmImmCheckType::ImmCheck1_3:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck1_7:
+    case ArmImmCheckType::ImmCheck1_7:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckExtract:
+    case ArmImmCheckType::ImmCheckExtract:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
                                           (2048 / ElementSizeInBits) - 1))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckShiftRight:
+    case ArmImmCheckType::ImmCheckShiftRight:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1,
                                           ElementSizeInBits))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckShiftRightNarrow:
+    case ArmImmCheckType::ImmCheckShiftRightNarrow:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1,
                                           ElementSizeInBits / 2))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckShiftLeft:
+    case ArmImmCheckType::ImmCheckShiftLeft:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
                                           ElementSizeInBits - 1))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckLaneIndex:
+    case ArmImmCheckType::ImmCheckLaneIndex:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
                                           (128 / (1 * ElementSizeInBits)) - 1))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckLaneIndexCompRotate:
+    case ArmImmCheckType::ImmCheckLaneIndexCompRotate:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
                                           (128 / (2 * ElementSizeInBits)) - 1))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckLaneIndexDot:
+    case ArmImmCheckType::ImmCheckLaneIndexDot:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
                                           (128 / (4 * ElementSizeInBits)) - 1))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckComplexRot90_270:
+    case ArmImmCheckType::ImmCheckComplexRot90_270:
       if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; },
                               diag::err_rotation_argument_to_cadd))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheckComplexRotAll90:
+    case ArmImmCheckType::ImmCheckComplexRotAll90:
       if (CheckImmediateInSet(
               [](int64_t V) {
                 return V == 0 || V == 90 || V == 180 || V == 270;
@@ -510,35 +532,38 @@ bool SemaARM::ParseSVEImmChecks(
               diag::err_rotation_argument_to_cmla))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_1:
+    case ArmImmCheckType::ImmCheck0_1:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 1))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_2:
+    case ArmImmCheckType::ImmCheck0_2:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 2))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_3:
+    case ArmImmCheckType::ImmCheck0_3:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 3))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_0:
+    case ArmImmCheckType::ImmCheck0_0:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 0))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_15:
+    case ArmImmCheckType::ImmCheck0_15:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 15))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck0_255:
+    case ArmImmCheckType::ImmCheck0_255:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 255))
         HasError = true;
       break;
-    case SVETypeFlags::ImmCheck2_4_Mul2:
+    case ArmImmCheckType::ImmCheck2_4_Mul2:
       if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
           SemaRef.BuiltinConstantArgMultiple(TheCall, ArgNum, 2))
         HasError = true;
       break;
+    default:
+      llvm_unreachable("Invalid SVE immediate range typeflag!");
+      break;
     }
   }
 
@@ -749,7 +774,7 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 
   llvm::APSInt Result;
   uint64_t mask = 0;
-  unsigned TV = 0;
+  int TV = -1;
   int PtrArgNum = -1;
   bool HasConstPtr = false;
   switch (BuiltinID) {
@@ -802,7 +827,7 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 
   // For NEON intrinsics which take an immediate value as part of the
   // instruction, range check them here.
-  unsigned i = 0, l = 0, u = 0;
+  SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
   switch (BuiltinID) {
   default:
     return false;
@@ -810,9 +835,9 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 #include "clang/Basic/arm_fp16.inc"
 #include "clang/Basic/arm_neon.inc"
 #undef GET_NEON_IMMEDIATE_CHECK
-  }
-
-  return SemaRef.BuiltinConstantArgRange(TheCall, i, l, u + l);
+    }
+    
+  return ParseNeonImmChecks(TheCall, ImmChecks, TV);
 }
 
 bool SemaARM::CheckMVEBuiltinFunctionCall(unsigned BuiltinID,
diff --git a/clang/test/CodeGen/aarch64-neon-vcmla.c b/clang/test/CodeGen/aarch64-neon-vcmla.c
index 02171527cc6a32..2ff48fd97b4271 100644
--- a/clang/test/CodeGen/aarch64-neon-vcmla.c
+++ b/clang/test/CodeGen/aarch64-neon-vcmla.c
@@ -155,15 +155,14 @@ float16x4_t test_vcmla_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rh
   return vcmla_lane_f16(acc, lhs, rhs, 1);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
 // CHECK-LABEL: @test_vcmla_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK:  %0 = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK:  %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
+// CHECK:  %2 = bitcast <2 x i32> %1 to <4 x half>
+// CHECK:  %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK:  %vcmla_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> 
 float16x4_t test_vcmla_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_laneq_f16(acc, lhs, rhs, 3);
+  return vcmla_laneq_f16(acc, lhs, rhs, 1);
 }
 
 // CHECK-LABEL: @test_vcmlaq_lane_f16(
@@ -191,7 +190,6 @@ float32x2_t test_vcmla_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rh
   return vcmla_lane_f32(acc, lhs, rhs, 0);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
 // CHECK-LABEL: @test_vcmla_laneq_f32(
 // CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
 // CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
@@ -229,15 +227,14 @@ float16x4_t test_vcmla_rot90_lane_f16(float16x4_t acc, float16x4_t lhs, float16x
   return vcmla_rot90_lane_f16(acc, lhs, rhs, 1);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
-// CHECK-LABEL: @test_vcmla_rot90_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK: %0 = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
+// CHECK: %2 = bitcast <2 x i32> %1 to <4 x half>
+// CHECK: %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK: %vcmla_rot90_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
+// CHECK: ret <4 x half> %vcmla_rot90_f163.i
 float16x4_t test_vcmla_rot90_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_rot90_laneq_f16(acc, lhs, rhs, 3);
+  return vcmla_rot90_laneq_f16(acc, lhs, rhs, 0);
 }
 
 // CHECK-LABEL: @test_vcmlaq_rot90_lane_f16(
@@ -265,7 +262,6 @@ float32x2_t test_vcmla_rot90_lane_f32(float32x2_t acc, float32x2_t lhs, float32x
   return vcmla_rot90_lane_f32(acc, lhs, rhs, 0);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
 // CHECK-LABEL: @test_vcmla_rot90_laneq_f32(
 // CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
 // CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
@@ -303,15 +299,15 @@ float16x4_t test_vcmla_rot180_lane_f16(float16x4_t acc, float16x4_t lhs, float16
   return vcmla_rot180_lane_f16(acc, lhs, rhs, 1);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
 // CHECK-LABEL: @test_vcmla_rot180_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK:  %0 = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK:  %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
+// CHECK:  %2 = bitcast <2 x i32> %1 to <4 x half>
+// CHECK:  %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK:  %vcmla_rot180_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
+// CHECK:  ret <4 x half> %vcmla_rot180_f163.i
 float16x4_t test_vcmla_rot180_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_rot180_laneq_f16(acc, lhs, rhs, 3);
+  return vcmla_rot180_laneq_f16(acc, lhs, rhs, 1);
 }
 
 // CHECK-LABEL: @test_vcmlaq_rot180_lane_f16(
@@ -339,7 +335,6 @@ float32x2_t test_vcmla_rot180_lane_f32(float32x2_t acc, float32x2_t lhs, float32
   return vcmla_rot180_lane_f32(acc, lhs, rhs, 0);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
 // CHECK-LABEL: @test_vcmla_rot180_laneq_f32(
 // CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
 // CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
@@ -377,15 +372,15 @@ float16x4_t test_vcmla_rot270_lane_f16(float16x4_t acc, float16x4_t lhs, float16
   return vcmla_rot270_lane_f16(acc, lhs, rhs, 1);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
 // CHECK-LABEL: @test_vcmla_rot270_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK:  %0 = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK:  %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
+// CHECK:  %2 = bitcast <2 x i32> %1 to <4 x half>
+// CHECK:  %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK:  %vcmla_rot270_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
+// CHECK:  ret <4 x half> %vcmla_rot270_f163.
 float16x4_t test_vcmla_rot270_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_rot270_laneq_f16(acc, lhs, rhs, 3);
+  return vcmla_rot270_laneq_f16(acc, lhs, rhs, 0);
 }
 
 // CHECK-LABEL: @test_vcmlaq_rot270_lane_f16(
@@ -413,7 +408,6 @@ float32x2_t test_vcmla_rot270_lane_f32(float32x2_t acc, float32x2_t lhs, float32
   return vcmla_rot270_lane_f32(acc, lhs, rhs, 0);
 }
 
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
 // CHECK-LABEL: @test_vcmla_rot270_laneq_f32(
 // CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
 // CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
diff --git a/clang/test/Sema/aarch64-neon-vcmla-ranges.c b/clang/test/Sema/aarch64-neon-vcmla-ranges.c
new file mode 100644
index 00000000000000..9b42e68670da08
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-vcmla-ranges.c
@@ -0,0 +1,202 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.3a -ffreestanding -fsyntax-only -verify %s
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+#include <arm_fp16.h>
+
+void test_vcmla_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+  vcmla_lane_f16(a, b, c, 0);
+  vcmla_lane_f16(a, b, c, 1);
+
+  vcmla_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+  vcmla_laneq_f16(a, b, c, 0);
+  vcmla_laneq_f16(a, b, c, 1);
+
+  vcmla_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+  vcmla_laneq_f16(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+}
+
+void test_vcmlaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c){
+  vcmlaq_lane_f16(a, b, c, 0);
+  vcmlaq_lane_f16(a, b, c, 1);
+
+  vcmlaq_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+  vcmlaq_lane_f16(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+}
+
+void test_vcmlaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+  vcmlaq_laneq_f16(a, b, c, 0);
+  vcmlaq_laneq_f16(a, b, c, 1);
+  vcmlaq_laneq_f16(a, b, c, 3);
+
+  vcmlaq_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+  vcmlaq_laneq_f16(a, b, c, 4);  // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+}
+
+void test_vcmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+  vcmla_lane_f32(a, b, c, 0);
+
+  vcmla_lane_f32(a, b, c, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_lane_f32(a, b, c, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_lane_f32(a, b, c, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+  vcmla_laneq_f32(a, b, c, 0);
+
+  vcmla_laneq_f32(a, b, c, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_laneq_f32(a, b, c, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+  vcmlaq_laneq_f32(a, b, c, 0);
+  vcmlaq_laneq_f32(a, b, c, 1);
+
+  vcmlaq_laneq_f32(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_laneq_f32(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+  vcmla_rot90_lane_f16(a, b, c, 0);
+  vcmla_rot90_lane_f16(a, b, c, 1);
+
+  vcmla_rot90_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot90_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+  vcmla_rot90_laneq_f16(a, b, c, 0);
+  vcmla_rot90_laneq_f16(a, b, c, 1);
+
+  vcmla_rot90_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot90_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot90_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+  vcmlaq_rot90_laneq_f16(a, b, c, 0);
+  vcmlaq_rot90_laneq_f16(a, b, c, 3);
+
+  vcmlaq_rot90_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot90_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+  vcmla_rot180_lane_f16(a, b, c, 0);
+  vcmla_rot180_lane_f16(a, b, c, 1);
+
+  vcmla_rot180_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot180_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+  vcmla_rot180_laneq_f16(a, b, c, 0);
+  vcmla_rot180_laneq_f16(a, b, c, 1);
+
+  vcmla_rot180_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot180_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot180_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+  vcmlaq_rot180_laneq_f16(a, b, c, 0);
+  vcmlaq_rot180_laneq_f16(a, b, c, 3);
+
+  vcmlaq_rot180_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot180_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+  vcmla_rot270_lane_f16(a, b, c, 0);
+  vcmla_rot270_lane_f16(a, b, c, 1);
+
+  vcmla_rot270_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot270_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+  vcmla_rot270_laneq_f16(a, b, c, 0);
+  vcmla_rot270_laneq_f16(a, b, c, 1);
+
+  vcmla_rot270_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot270_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot270_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+  vcmlaq_rot270_laneq_f16(a, b, c, 0);
+  vcmlaq_rot270_laneq_f16(a, b, c, 3);
+
+  vcmlaq_rot270_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot270_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+  vcmla_rot90_lane_f32(a, b, c, 0);
+
+  vcmla_rot90_lane_f32(a, b, c, 1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot90_lane_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+  vcmla_rot90_laneq_f32(a, b, c, 0);
+  vcmla_rot90_laneq_f32(a, b, c, 1);
+
+  vcmla_rot90_laneq_f32(a, b, c, 2);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot90_laneq_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot90_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+  vcmlaq_rot90_laneq_f32(a, b, c, 0);
+  vcmlaq_rot90_laneq_f32(a, b, c, 1);
+
+  vcmlaq_rot90_laneq_f32(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot90_laneq_f32(a, b, c, -1);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+  vcmla_rot180_lane_f32(a, b, c, 0);
+
+  vcmla_rot180_lane_f32(a, b, c, 1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot180_lane_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+  vcmla_rot180_laneq_f32(a, b, c, 0);
+  vcmla_rot180_laneq_f32(a, b, c, 1);
+
+  vcmla_rot180_laneq_f32(a, b, c, 2);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot180_laneq_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot180_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+  vcmlaq_rot90_laneq_f32(a, b, c, 0);
+  vcmlaq_rot90_laneq_f32(a, b, c, 1);
+
+  vcmlaq_rot90_laneq_f32(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot90_laneq_f32(a, b, c, -1);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+  vcmla_rot270_lane_f32(a, b, c, 0);
+
+  vcmla_rot270_lane_f32(a, b, c, 1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot270_lane_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+  vcmla_rot270_laneq_f32(a, b, c, 0);
+  vcmla_rot270_laneq_f32(a, b, c, 1);
+
+  vcmla_rot270_laneq_f32(a, b, c, 2);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  vcmla_rot270_laneq_f32(a, b, c, -1);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot270_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+  vcmlaq_rot270_laneq_f32(a, b, c, 0);
+  vcmlaq_rot270_laneq_f32(a, b, c, 1);
+
+  vcmlaq_rot270_laneq_f32(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_rot270_laneq_f32(a, b, c, -1);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
\ No newline at end of file
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 8ec8e67388bbd2..a7058e3461f94d 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -333,6 +333,8 @@ class Intrinsic {
 
   /// The types of return value [0] and parameters [1..].
   std::vector<Type> Types;
+
+  SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
   /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
   int PolymorphicKeyType;
   /// The local variables defined.
@@ -368,9 +370,9 @@ class Intrinsic {
 
 public:
   Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
-            TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
+            TypeSpec InTS, ArrayRef<std::tuple<int, int, int>> ImmChecks, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
             StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable, bool BigEndianSafe)
-      : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body),
+      : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), ImmChecks(ImmChecks), CK(CK), Body(Body),
         ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()), IsUnavailable(IsUnavailable),
         BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false),
         UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."),
@@ -414,23 +416,22 @@ class Intrinsic {
   /// Get the architectural guard string (#ifdef).
   std::string getArchGuard() const { return ArchGuard; }
   std::string getTargetGuard() const { return TargetGuard; }
+  ArrayRef<std::tuple<int, int, int>> getImmChecks() const {return ImmChecks; }
   /// Get the non-mangled name.
   std::string getName() const { return Name; }
 
   /// Return true if the intrinsic takes an immediate operand.
   bool hasImmediate() const {
     return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); });
+    //return !ImmChecks.empty();
   }
 
-  /// Return the parameter index of the immediate operand.
-  unsigned getImmediateIdx() const {
-    for (unsigned Idx = 0; Idx < Types.size(); ++Idx)
-      if (Types[Idx].isImmediate())
-        return Idx - 1;
-    llvm_unreachable("Intrinsic has no immediate");
+  // Return if the supplied argument is an immediate
+  bool isArgImmediate(unsigned idx) const {
+    assert((idx + 1) < Types.size() && "Argument type index out of range!");
+    return Types[idx + 1].isImmediate();
   }
 
-
   unsigned getNumParams() const { return Types.size() - 1; }
   Type getReturnType() const { return Types[0]; }
   Type getParamType(unsigned I) const { return Types[I + 1]; }
@@ -554,9 +555,9 @@ class NeonEmitter {
                                      SmallVectorImpl<Intrinsic *> &Defs);
   void genOverloadTypeCheckCode(raw_ostream &OS,
                                 SmallVectorImpl<Intrinsic *> &Defs);
+  void genNeonImmCheckTypes(raw_ostream &OS);
   void genIntrinsicRangeCheckCode(raw_ostream &OS,
                                   SmallVectorImpl<Intrinsic *> &Defs);
-
 public:
   /// Called by Intrinsic - this attempts to get an intrinsic that takes
   /// the given types as arguments.
@@ -1031,7 +1032,7 @@ std::string Intrinsic::getBuiltinTypeStr() {
     if (LocalCK == ClassI && T.isInteger())
       T.makeSigned();
 
-    if (hasImmediate() && getImmediateIdx() == I)
+    if(isArgImmediate(I))
       T.makeImmediate(32);
 
     S += T.builtin_str();
@@ -1953,6 +1954,16 @@ void NeonEmitter::createIntrinsic(Record *R,
   bool BigEndianSafe  = R->getValueAsBit("BigEndianSafe");
   std::string ArchGuard = std::string(R->getValueAsString("ArchGuard"));
   std::string TargetGuard = std::string(R->getValueAsString("TargetGuard"));
+  std::vector<Record*> ImmCheckList = R->getValueAsListOfDefs("ImmChecks");
+
+  SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
+  for(const auto *R: ImmCheckList) {
+
+    ImmChecks.push_back(std::make_tuple(R->getValueAsInt("Arg"), 
+                        R->getValueAsDef("Kind")->getValueAsInt("Value"),
+                        R->getValueAsInt("EltSizeArg")));
+  }
+
   bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
   std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith"));
 
@@ -1993,7 +2004,7 @@ void NeonEmitter::createIntrinsic(Record *R,
   auto &Entry = IntrinsicMap[Name];
 
   for (auto &I : NewTypeSpecs) {
-    Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this,
+    Entry.emplace_back(R, Name, Proto, I.first, I.second, ImmChecks, CK, Body, *this,
                        ArchGuard, TargetGuard, IsUnavailable, BigEndianSafe);
     Out.push_back(&Entry.back());
   }
@@ -2143,84 +2154,40 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
   OS << "#endif\n\n";
 }
 
-void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
-                                        SmallVectorImpl<Intrinsic *> &Defs) {
-  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
+void NeonEmitter::genNeonImmCheckTypes(raw_ostream &OS) {
+  OS << "#ifdef GET_NEON_IMMCHECKTYPES\n";
+
+  for (auto *RV : Records.getAllDerivedDefinitions("ImmCheckType")) {
+    OS << "  " << RV->getNameInitAsString() << " = " << RV->getValueAsInt("Value") << ",\n";
+  }
 
+  OS << "#endif\n\n";
+}
+
+void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
+  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
+  int EltType;
+  // Ensure these are only emitted once.
   std::set<std::string> Emitted;
 
-  for (auto *Def : Defs) {
-    if (Def->hasBody())
-      continue;
-    // Functions which do not have an immediate do not need to have range
-    // checking code emitted.
-    if (!Def->hasImmediate())
-      continue;
-    if (Emitted.find(Def->getMangledName()) != Emitted.end())
+  for (auto &Def : Defs) {
+    if (Emitted.find(Def->getMangledName()) != Emitted.end() || !Def->hasImmediate())
       continue;
 
-    std::string LowerBound, UpperBound;
-
-    Record *R = Def->getRecord();
-    if (R->getValueAsBit("isVXAR")) {
-      //VXAR takes an immediate in the range [0, 63]
-      LowerBound = "0";
-      UpperBound = "63";
-    } else if (R->getValueAsBit("isVCVT_N")) {
-      // VCVT between floating- and fixed-point values takes an immediate
-      // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16.
-      LowerBound = "1";
-	  if (Def->getBaseType().getElementSizeInBits() == 16 ||
-		  Def->getName().find('h') != std::string::npos)
-		// VCVTh operating on FP16 intrinsics in range [1, 16)
-		UpperBound = "15";
-	  else if (Def->getBaseType().getElementSizeInBits() == 32)
-        UpperBound = "31";
-	  else
-        UpperBound = "63";
-    } else if (R->getValueAsBit("isScalarShift")) {
-      // Right shifts have an 'r' in the name, left shifts do not. Convert
-      // instructions have the same bounds and right shifts.
-      if (Def->getName().find('r') != std::string::npos ||
-          Def->getName().find("cvt") != std::string::npos)
-        LowerBound = "1";
-
-      UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);
-    } else if (R->getValueAsBit("isShift")) {
-      // Builtins which are overloaded by type will need to have their upper
-      // bound computed at Sema time based on the type constant.
-
-      // Right shifts have an 'r' in the name, left shifts do not.
-      if (Def->getName().find('r') != std::string::npos)
-        LowerBound = "1";
-      UpperBound = "RFT(TV, true)";
-    } else if (Def->getClassKind(true) == ClassB) {
-      // ClassB intrinsics have a type (and hence lane number) that is only
-      // known at runtime.
-      if (R->getValueAsBit("isLaneQ"))
-        UpperBound = "RFT(TV, false, true)";
-      else
-        UpperBound = "RFT(TV, false, false)";
-    } else {
-      // The immediate generally refers to a lane in the preceding argument.
-      assert(Def->getImmediateIdx() > 0);
-      Type T = Def->getParamType(Def->getImmediateIdx() - 1);
-      UpperBound = utostr(T.getNumElements() - 1);
-    }
+    // If the Def has a body (operation DAGs), it is not a __builtin_neon_
+    if(Def->hasBody()) continue;
 
-    // Calculate the index of the immediate that should be range checked.
-    unsigned Idx = Def->getNumParams();
-    if (Def->hasImmediate())
-      Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());
-
-    OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "
-       << "i = " << Idx << ";";
-    if (!LowerBound.empty())
-      OS << " l = " << LowerBound << ";";
-    if (!UpperBound.empty())
-      OS << " u = " << UpperBound << ";";
-    OS << " break;\n";
+    OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
+    
+    for(const auto &Check: Def->getImmChecks()){
+      EltType = std::get<2>(Check);   // elt type argument
+      if(EltType >= 0)
+        EltType = Def->getParamType(EltType).getNeonEnum();
 
+      OS << "  ImmChecks.push_back(std::make_tuple(" << std::get<0>(Check) << 
+                ", " << std::get<1>(Check) <<  ", " << EltType << ")); \n";
+      OS << "  break;\n";
+    }
     Emitted.insert(Def->getMangledName());
   }
 
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index ca63bd354bfc77..8a79d0b0f50121 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1573,7 +1573,7 @@ void SVEEmitter::createTypeFlags(raw_ostream &OS) {
     OS << "  " << KV.getKey() << " = " << KV.getValue() << ",\n";
   OS << "#endif\n\n";
 
-  OS << "#ifdef LLVM_GET_SVE_IMMCHECKTYPES\n";
+  OS << "#ifdef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES\n";
   for (auto &KV : ImmCheckTypes)
     OS << "  " << KV.getKey() << " = " << KV.getValue() << ",\n";
   OS << "#endif\n\n";

>From f67eacb7826b5a286495e2baa4430070de655c63 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Tue, 23 Jul 2024 22:28:48 +0000
Subject: [PATCH 02/17] Updated/consistent vcmla codegen tests

---
 clang/test/CodeGen/aarch64-neon-vcmla.c | 610 +++++++++++++++---------
 1 file changed, 384 insertions(+), 226 deletions(-)

diff --git a/clang/test/CodeGen/aarch64-neon-vcmla.c b/clang/test/CodeGen/aarch64-neon-vcmla.c
index 2ff48fd97b4271..d82d74d019c012 100644
--- a/clang/test/CodeGen/aarch64-neon-vcmla.c
+++ b/clang/test/CodeGen/aarch64-neon-vcmla.c
@@ -1,438 +1,596 @@
-// RUN: %clang_cc1 -triple arm64-apple-ios -target-feature +neon \
-// RUN:        -target-feature +v8.3a \
-// RUN:        -target-feature +fullfp16 \
-// RUN:        -disable-O0-optnone -emit-llvm -o - %s | opt -S -O1 | FileCheck %s
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon  \
+// RUN:            -target-feature +v8.3a -target-feature +fullfp16 \
+// RUN:            -disable-O0-optnone -emit-llvm -o - %s | opt -S -O1 | FileCheck %s
 
 // REQUIRES: aarch64-registered-target
 
 #include <arm_neon.h>
 
-// CHECK-LABEL: @test_vcmla_f16(
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_F163_I]]
+//
 float16x4_t test_vcmla_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_F323_I]]
+//
 float32x2_t test_vcmla_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_f16(
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_F163_I]]
+//
 float16x8_t test_vcmlaq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_f32(
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_F323_I]]
+//
 float32x4_t test_vcmlaq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_f64(
-// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
-// CHECK: ret <2 x double> [[RES]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_f64(
+// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
+// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_F643_I]]
+//
 float64x2_t test_vcmlaq_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_rot90_f16(
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT90_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT90_F163_I]]
+//
 float16x4_t test_vcmla_rot90_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot90_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_rot90_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT90_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT90_F323_I]]
+//
 float32x2_t test_vcmla_rot90_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot90_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot90_f16(
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT90_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
+//
 float16x8_t test_vcmlaq_rot90_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot90_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot90_f32(
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT90_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
+//
 float32x4_t test_vcmlaq_rot90_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot90_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot90_f64(
-// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
-// CHECK: ret <2 x double> [[RES]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot90_f64(
+// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT90_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
+// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_ROT90_F643_I]]
+//
 float64x2_t test_vcmlaq_rot90_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_rot90_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_rot180_f16(
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT180_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT180_F163_I]]
+//
 float16x4_t test_vcmla_rot180_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot180_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_rot180_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT180_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT180_F323_I]]
+//
 float32x2_t test_vcmla_rot180_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot180_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot180_f16(
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT180_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
+//
 float16x8_t test_vcmlaq_rot180_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot180_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot180_f32(
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT180_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
+//
 float32x4_t test_vcmlaq_rot180_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot180_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot180_f64(
-// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
-// CHECK: ret <2 x double> [[RES]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot180_f64(
+// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT180_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
+// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_ROT180_F643_I]]
+//
 float64x2_t test_vcmlaq_rot180_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_rot180_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_rot270_f16(
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT270_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT270_F163_I]]
+//
 float16x4_t test_vcmla_rot270_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot270_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_rot270_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT270_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT270_F323_I]]
+//
 float32x2_t test_vcmla_rot270_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot270_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot270_f16(
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT270_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
+//
 float16x8_t test_vcmlaq_rot270_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot270_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot270_f32(
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT270_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
+//
 float32x4_t test_vcmlaq_rot270_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot270_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot270_f64(
-// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
-// CHECK: ret <2 x double> [[RES]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot270_f64(
+// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLAQ_ROT270_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
+// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_ROT270_F643_I]]
+//
 float64x2_t test_vcmlaq_rot270_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_rot270_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: @test_vcmla_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_lane_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLA_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_F163_I]]
+//
 float16x4_t test_vcmla_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmla_laneq_f16(
-// CHECK:  %0 = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK:  %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
-// CHECK:  %2 = bitcast <2 x i32> %1 to <4 x half>
-// CHECK:  %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK:  %vcmla_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> 
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_laneq_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLA_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_F163_I]]
+//
 float16x4_t test_vcmla_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
   return vcmla_laneq_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_lane_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_F163_I]]
+//
 float16x8_t test_vcmlaq_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_laneq_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
+// CHECK-NEXT:    [[VCMLAQ_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_F163_I]]
+//
 float16x8_t test_vcmlaq_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: @test_vcmla_lane_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_F323_I]]
+//
 float32x2_t test_vcmla_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmla_laneq_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
-// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_laneq_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
+// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT:    [[VCMLA_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_F323_I]]
+//
 float32x2_t test_vcmla_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_lane_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
-// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
-// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT:    [[VCMLAQ_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_F323_I]]
+//
 float32x4_t test_vcmlaq_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmlaq_laneq_f32(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_laneq_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_F323_I]]
+//
 float32x4_t test_vcmlaq_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmla_rot90_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_lane_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLA_ROT90_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT90_F163_I]]
+//
 float16x4_t test_vcmla_rot90_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot90_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK: %0 = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
-// CHECK: %2 = bitcast <2 x i32> %1 to <4 x half>
-// CHECK: %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: %vcmla_rot90_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
-// CHECK: ret <4 x half> %vcmla_rot90_f163.i
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_laneq_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT:    [[VCMLA_ROT90_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT90_F163_I]]
+//
 float16x4_t test_vcmla_rot90_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
   return vcmla_rot90_laneq_f16(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot90_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_lane_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_ROT90_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
+//
 float16x8_t test_vcmlaq_rot90_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_rot90_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot90_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_laneq_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
+// CHECK-NEXT:    [[VCMLAQ_ROT90_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
+//
 float16x8_t test_vcmlaq_rot90_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot90_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: @test_vcmla_rot90_lane_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT90_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT90_F323_I]]
+//
 float32x2_t test_vcmla_rot90_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot90_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmla_rot90_laneq_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
-// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_laneq_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
+// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT:    [[VCMLA_ROT90_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT90_F323_I]]
+//
 float32x2_t test_vcmla_rot90_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_rot90_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot90_lane_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
-// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
-// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT:    [[VCMLAQ_ROT90_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
+//
 float32x4_t test_vcmlaq_rot90_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_rot90_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot90_laneq_f32(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_laneq_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_ROT90_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
+//
 float32x4_t test_vcmlaq_rot90_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot90_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmla_rot180_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_lane_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLA_ROT180_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT180_F163_I]]
+//
 float16x4_t test_vcmla_rot180_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot180_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmla_rot180_laneq_f16(
-// CHECK:  %0 = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK:  %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
-// CHECK:  %2 = bitcast <2 x i32> %1 to <4 x half>
-// CHECK:  %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK:  %vcmla_rot180_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
-// CHECK:  ret <4 x half> %vcmla_rot180_f163.i
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_laneq_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLA_ROT180_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT180_F163_I]]
+//
 float16x4_t test_vcmla_rot180_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
   return vcmla_rot180_laneq_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot180_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_lane_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_ROT180_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
+//
 float16x8_t test_vcmlaq_rot180_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_rot180_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot180_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_laneq_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
+// CHECK-NEXT:    [[VCMLAQ_ROT180_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
+//
 float16x8_t test_vcmlaq_rot180_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot180_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: @test_vcmla_rot180_lane_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT180_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT180_F323_I]]
+//
 float32x2_t test_vcmla_rot180_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot180_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmla_rot180_laneq_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
-// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_laneq_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
+// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT:    [[VCMLA_ROT180_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT180_F323_I]]
+//
 float32x2_t test_vcmla_rot180_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_rot180_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot180_lane_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
-// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
-// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT:    [[VCMLAQ_ROT180_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
+//
 float32x4_t test_vcmlaq_rot180_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_rot180_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot180_laneq_f32(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_laneq_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_ROT180_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
+//
 float32x4_t test_vcmlaq_rot180_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot180_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmla_rot270_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_lane_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLA_ROT270_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT270_F163_I]]
+//
 float16x4_t test_vcmla_rot270_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot270_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmla_rot270_laneq_f16(
-// CHECK:  %0 = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK:  %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
-// CHECK:  %2 = bitcast <2 x i32> %1 to <4 x half>
-// CHECK:  %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK:  %vcmla_rot270_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
-// CHECK:  ret <4 x half> %vcmla_rot270_f163.
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_laneq_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT:    [[VCMLA_ROT270_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
+// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT270_F163_I]]
+//
 float16x4_t test_vcmla_rot270_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
   return vcmla_rot270_laneq_f16(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot270_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_lane_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_ROT270_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
+//
 float16x8_t test_vcmlaq_rot270_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_rot270_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot270_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_laneq_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
+// CHECK-NEXT:    [[VCMLAQ_ROT270_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
+// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
+//
 float16x8_t test_vcmlaq_rot270_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot270_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: @test_vcmla_rot270_lane_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VCMLA_ROT270_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT270_F323_I]]
+//
 float32x2_t test_vcmla_rot270_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot270_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmla_rot270_laneq_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
-// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_laneq_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
+// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT:    [[VCMLA_ROT270_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
+// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT270_F323_I]]
+//
 float32x2_t test_vcmla_rot270_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_rot270_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot270_lane_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
-// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
-// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[DUP]] to <4 x float>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
+// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT:    [[VCMLAQ_ROT270_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
+//
 float32x4_t test_vcmlaq_rot270_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_rot270_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: @test_vcmlaq_rot270_laneq_f32(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_laneq_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT:    [[VCMLAQ_ROT270_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
+// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
+//
 float32x4_t test_vcmlaq_rot270_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot270_laneq_f32(acc, lhs, rhs, 1);
 }

>From fe21f32cca597b2c811b883687940df69cb24149 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 24 Jul 2024 11:33:27 +0000
Subject: [PATCH 03/17] Run clang-format

---
 clang/include/clang/Basic/TargetBuiltins.h | 51 ++++++------
 clang/include/clang/Sema/SemaARM.h         |  4 +-
 clang/lib/Sema/SemaARM.cpp                 | 93 +++++++++++-----------
 clang/utils/TableGen/NeonEmitter.cpp       | 54 ++++++++-----
 4 files changed, 106 insertions(+), 96 deletions(-)

diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 50e17ad7e16284..384811f9281ac6 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -209,7 +209,7 @@ namespace clang {
         Flags |= QuadFlag;
     }
 
-    EltType getEltType() const { return (EltType)(Flags & EltTypeMask); } 
+    EltType getEltType() const { return (EltType)(Flags & EltTypeMask); }
     bool isPoly() const {
       EltType ET = getEltType();
       return ET == Poly8 || ET == Poly16 || ET == Poly64;
@@ -217,36 +217,36 @@ namespace clang {
     bool isUnsigned() const { return (Flags & UnsignedFlag) != 0; }
     bool isQuad() const { return (Flags & QuadFlag) != 0; };
     unsigned getEltSizeInBits() const {
-      switch(getEltType()){
-        case Int8:
-        case Poly8:
-          return 8;
-        case Int16:
-        case Float16:
-        case Poly16:
-        case BFloat16:
-          return 16;
-        case Int32:
-        case Float32:
-          return 32;
-        case Int64:
-        case Float64:
-        case Poly64:
-          return 64;
-        case Poly128:
-          return 128;
-        default:
-          llvm_unreachable("Invalid NeonTypeFlag!");
+      switch (getEltType()) {
+      case Int8:
+      case Poly8:
+        return 8;
+      case Int16:
+      case Float16:
+      case Poly16:
+      case BFloat16:
+        return 16;
+      case Int32:
+      case Float32:
+        return 32;
+      case Int64:
+      case Float64:
+      case Poly64:
+        return 64;
+      case Poly128:
+        return 128;
+      default:
+        llvm_unreachable("Invalid NeonTypeFlag!");
       }
     }
   };
 
-    // Shared between SVE/SME and NEON
-    enum ArmImmCheckType {
+  // Shared between SVE/SME and NEON
+  enum ArmImmCheckType {
 #define LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
 #include "clang/Basic/arm_sve_typeflags.inc"
-#undef  LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
-    };
+#undef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
+  };
 
   /// Flags to identify the types for overloaded SVE builtins.
   class SVETypeFlags {
@@ -279,7 +279,6 @@ namespace clang {
 #undef LLVM_GET_SVE_MERGETYPES
     };
 
-
     SVETypeFlags(uint64_t F) : Flags(F) {
       EltTypeShift = llvm::countr_zero(EltTypeMask);
       MemEltTypeShift = llvm::countr_zero(MemEltTypeMask);
diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index e1cc36f0644e8d..4fdeecf2ac6949 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -44,8 +44,8 @@ class SemaARM : public SemaBase {
                                     unsigned MaxWidth);
   bool CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
                                     CallExpr *TheCall);
-  bool ParseNeonImmChecks(CallExpr *TheCall, 
-                          SmallVector<std::tuple<int, int, int>, 2> &ImmChecks, 
+  bool ParseNeonImmChecks(CallExpr *TheCall,
+                          SmallVector<std::tuple<int, int, int>, 2> &ImmChecks,
                           int OverloadType);
   bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index f0d943c6cc2f55..aa19e5f82aa15f 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -370,62 +370,63 @@ enum ArmSMEState : unsigned {
   ArmZT0Mask = 0b11 << 2
 };
 
-bool SemaARM::ParseNeonImmChecks(CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 2> 
-                                &ImmChecks, int OverloadType = -1) {
+bool SemaARM::ParseNeonImmChecks(
+    CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 2> &ImmChecks,
+    int OverloadType = -1) {
   int ArgIdx, CheckTy, ElementType;
   bool hasError = false;
 
   for (auto &I : ImmChecks) {
     std::tie(ArgIdx, CheckTy, ElementType) = I;
 
-    NeonTypeFlags Type = (OverloadType != -1) ? 
-                          NeonTypeFlags(OverloadType) : NeonTypeFlags(ElementType); 
-          
-    switch((ArmImmCheckType)CheckTy) {
-      case ArmImmCheckType::ImmCheck0_3:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 3);
-        break;
-      case ArmImmCheckType::ImmCheck0_63:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 63);
-        break;
-      case ArmImmCheckType::ImmCheck0_7:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 7);
-        break;
-      case ArmImmCheckType::ImmCheck1_16:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 16);
-        break;
-      case ArmImmCheckType::ImmCheck1_32:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 32);
-        break;
-      case ArmImmCheckType::ImmCheck1_64:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 64);
-        break;
-      case ArmImmCheckType::ImmCheckLaneIndex:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,  (64 << Type.isQuad()) / 
-                                                    Type.getEltSizeInBits() - 1);
-        break; 
-      case ArmImmCheckType::ImmCheckLaneQIndex:    // force to use quad
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 
-                                                    (128/Type.getEltSizeInBits()) - 1);
-        break;
-      case ArmImmCheckType::ImmCheckShiftLeft:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 
-                                                    Type.getEltSizeInBits() - 1);
-        break;
-      case ArmImmCheckType::ImmCheckShiftRight:
-        hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 
-                                                    1, Type.getEltSizeInBits());
-        break;
-      default:
-        llvm_unreachable("Invalid Neon immediate range typeflag!");
-        break;
+    NeonTypeFlags Type = (OverloadType != -1) ? NeonTypeFlags(OverloadType)
+                                              : NeonTypeFlags(ElementType);
+
+    switch ((ArmImmCheckType)CheckTy) {
+    case ArmImmCheckType::ImmCheck0_3:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 3);
+      break;
+    case ArmImmCheckType::ImmCheck0_63:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 63);
+      break;
+    case ArmImmCheckType::ImmCheck0_7:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 7);
+      break;
+    case ArmImmCheckType::ImmCheck1_16:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 16);
+      break;
+    case ArmImmCheckType::ImmCheck1_32:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 32);
+      break;
+    case ArmImmCheckType::ImmCheck1_64:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 64);
+      break;
+    case ArmImmCheckType::ImmCheckLaneIndex:
+      hasError |= SemaRef.BuiltinConstantArgRange(
+          TheCall, ArgIdx, 0,
+          (64 << Type.isQuad()) / Type.getEltSizeInBits() - 1);
+      break;
+    case ArmImmCheckType::ImmCheckLaneQIndex: // force to use quad
+      hasError |= SemaRef.BuiltinConstantArgRange(
+          TheCall, ArgIdx, 0, (128 / Type.getEltSizeInBits()) - 1);
+      break;
+    case ArmImmCheckType::ImmCheckShiftLeft:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+                                                  Type.getEltSizeInBits() - 1);
+      break;
+    case ArmImmCheckType::ImmCheckShiftRight:
+      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1,
+                                                  Type.getEltSizeInBits());
+      break;
+    default:
+      llvm_unreachable("Invalid Neon immediate range typeflag!");
+      break;
     }
   }
 
   return hasError;
 }
 
-
 bool SemaARM::ParseSVEImmChecks(
     CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) {
   // Perform all the immediate checks for this builtin call.
@@ -835,8 +836,8 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 #include "clang/Basic/arm_fp16.inc"
 #include "clang/Basic/arm_neon.inc"
 #undef GET_NEON_IMMEDIATE_CHECK
-    }
-    
+  }
+
   return ParseNeonImmChecks(TheCall, ImmChecks, TV);
 }
 
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index a7058e3461f94d..bd7e94937d350c 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -370,10 +370,13 @@ class Intrinsic {
 
 public:
   Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
-            TypeSpec InTS, ArrayRef<std::tuple<int, int, int>> ImmChecks, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
-            StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable, bool BigEndianSafe)
-      : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), ImmChecks(ImmChecks), CK(CK), Body(Body),
-        ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()), IsUnavailable(IsUnavailable),
+            TypeSpec InTS, ArrayRef<std::tuple<int, int, int>> ImmChecks,
+            ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
+            StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable,
+            bool BigEndianSafe)
+      : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), ImmChecks(ImmChecks),
+        CK(CK), Body(Body), ArchGuard(ArchGuard.str()),
+        TargetGuard(TargetGuard.str()), IsUnavailable(IsUnavailable),
         BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false),
         UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."),
         Emitter(Emitter) {
@@ -416,14 +419,14 @@ class Intrinsic {
   /// Get the architectural guard string (#ifdef).
   std::string getArchGuard() const { return ArchGuard; }
   std::string getTargetGuard() const { return TargetGuard; }
-  ArrayRef<std::tuple<int, int, int>> getImmChecks() const {return ImmChecks; }
+  ArrayRef<std::tuple<int, int, int>> getImmChecks() const { return ImmChecks; }
   /// Get the non-mangled name.
   std::string getName() const { return Name; }
 
   /// Return true if the intrinsic takes an immediate operand.
   bool hasImmediate() const {
     return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); });
-    //return !ImmChecks.empty();
+    // return !ImmChecks.empty();
   }
 
   // Return if the supplied argument is an immediate
@@ -558,6 +561,7 @@ class NeonEmitter {
   void genNeonImmCheckTypes(raw_ostream &OS);
   void genIntrinsicRangeCheckCode(raw_ostream &OS,
                                   SmallVectorImpl<Intrinsic *> &Defs);
+
 public:
   /// Called by Intrinsic - this attempts to get an intrinsic that takes
   /// the given types as arguments.
@@ -1032,7 +1036,7 @@ std::string Intrinsic::getBuiltinTypeStr() {
     if (LocalCK == ClassI && T.isInteger())
       T.makeSigned();
 
-    if(isArgImmediate(I))
+    if (isArgImmediate(I))
       T.makeImmediate(32);
 
     S += T.builtin_str();
@@ -1954,12 +1958,13 @@ void NeonEmitter::createIntrinsic(Record *R,
   bool BigEndianSafe  = R->getValueAsBit("BigEndianSafe");
   std::string ArchGuard = std::string(R->getValueAsString("ArchGuard"));
   std::string TargetGuard = std::string(R->getValueAsString("TargetGuard"));
-  std::vector<Record*> ImmCheckList = R->getValueAsListOfDefs("ImmChecks");
+  std::vector<Record *> ImmCheckList = R->getValueAsListOfDefs("ImmChecks");
 
   SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
-  for(const auto *R: ImmCheckList) {
+  for (const auto *R : ImmCheckList) {
 
-    ImmChecks.push_back(std::make_tuple(R->getValueAsInt("Arg"), 
+    ImmChecks.push_back(
+        std::make_tuple(R->getValueAsInt("Arg"),
                         R->getValueAsDef("Kind")->getValueAsInt("Value"),
                         R->getValueAsInt("EltSizeArg")));
   }
@@ -2004,8 +2009,9 @@ void NeonEmitter::createIntrinsic(Record *R,
   auto &Entry = IntrinsicMap[Name];
 
   for (auto &I : NewTypeSpecs) {
-    Entry.emplace_back(R, Name, Proto, I.first, I.second, ImmChecks, CK, Body, *this,
-                       ArchGuard, TargetGuard, IsUnavailable, BigEndianSafe);
+    Entry.emplace_back(R, Name, Proto, I.first, I.second, ImmChecks, CK, Body,
+                       *this, ArchGuard, TargetGuard, IsUnavailable,
+                       BigEndianSafe);
     Out.push_back(&Entry.back());
   }
 
@@ -2158,34 +2164,38 @@ void NeonEmitter::genNeonImmCheckTypes(raw_ostream &OS) {
   OS << "#ifdef GET_NEON_IMMCHECKTYPES\n";
 
   for (auto *RV : Records.getAllDerivedDefinitions("ImmCheckType")) {
-    OS << "  " << RV->getNameInitAsString() << " = " << RV->getValueAsInt("Value") << ",\n";
+    OS << "  " << RV->getNameInitAsString() << " = "
+       << RV->getValueAsInt("Value") << ",\n";
   }
 
   OS << "#endif\n\n";
 }
 
-void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
+void NeonEmitter::genIntrinsicRangeCheckCode(
+    raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
   int EltType;
   // Ensure these are only emitted once.
   std::set<std::string> Emitted;
 
   for (auto &Def : Defs) {
-    if (Emitted.find(Def->getMangledName()) != Emitted.end() || !Def->hasImmediate())
+    if (Emitted.find(Def->getMangledName()) != Emitted.end() ||
+        !Def->hasImmediate())
       continue;
 
     // If the Def has a body (operation DAGs), it is not a __builtin_neon_
-    if(Def->hasBody()) continue;
+    if (Def->hasBody())
+      continue;
 
     OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
-    
-    for(const auto &Check: Def->getImmChecks()){
-      EltType = std::get<2>(Check);   // elt type argument
-      if(EltType >= 0)
+
+    for (const auto &Check : Def->getImmChecks()) {
+      EltType = std::get<2>(Check); // elt type argument
+      if (EltType >= 0)
         EltType = Def->getParamType(EltType).getNeonEnum();
 
-      OS << "  ImmChecks.push_back(std::make_tuple(" << std::get<0>(Check) << 
-                ", " << std::get<1>(Check) <<  ", " << EltType << ")); \n";
+      OS << "  ImmChecks.push_back(std::make_tuple(" << std::get<0>(Check)
+         << ", " << std::get<1>(Check) << ", " << EltType << ")); \n";
       OS << "  break;\n";
     }
     Emitted.insert(Def->getMangledName());

>From 810d76568e4261a54ec4a5e5548521172af6a4a0 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Mon, 29 Jul 2024 11:47:31 +0000
Subject: [PATCH 04/17] Move immediate check types to own generated file

---
 clang/include/clang/Basic/CMakeLists.txt   | 3 +++
 clang/include/clang/Basic/TargetBuiltins.h | 2 +-
 clang/utils/TableGen/SveEmitter.cpp        | 9 +++++++++
 clang/utils/TableGen/TableGen.cpp          | 7 +++++++
 clang/utils/TableGen/TableGenBackends.h    | 1 +
 llvm/docs/CommandGuide/tblgen.rst          | 4 ++++
 6 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt
index 2ef6ddc68f4bf3..f069f4fc118f27 100644
--- a/clang/include/clang/Basic/CMakeLists.txt
+++ b/clang/include/clang/Basic/CMakeLists.txt
@@ -67,6 +67,9 @@ clang_tablegen(arm_neon.inc -gen-arm-neon-sema
 clang_tablegen(arm_fp16.inc -gen-arm-neon-sema
   SOURCE arm_fp16.td
   TARGET ClangARMFP16)
+clang_tablegen(arm_immcheck_types.inc -gen-arm-immcheck-types
+  SOURCE arm_sve.td
+  TARGET ClangARMImmChecks)
 clang_tablegen(arm_mve_builtins.inc -gen-arm-mve-builtin-def
   SOURCE arm_mve.td
   TARGET ClangARMMveBuiltinsDef)
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 384811f9281ac6..15757e43f4a5e5 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -244,7 +244,7 @@ namespace clang {
   // Shared between SVE/SME and NEON
   enum ArmImmCheckType {
 #define LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
-#include "clang/Basic/arm_sve_typeflags.inc"
+#include "clang/Basic/arm_immcheck_types.inc"
 #undef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
   };
 
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 8a79d0b0f50121..ccf687959ab6fb 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -388,6 +388,9 @@ class SVEEmitter {
   /// Emit all the range checks for the immediates.
   void createRangeChecks(raw_ostream &o);
 
+  // Emit all the ImmCheckTypes to arm_immcheck_types.h
+  void createImmCheckTypes(raw_ostream &OS);
+
   /// Create the SVETypeFlags used in CGBuiltins
   void createTypeFlags(raw_ostream &o);
 
@@ -1572,7 +1575,9 @@ void SVEEmitter::createTypeFlags(raw_ostream &OS) {
   for (auto &KV : MergeTypes)
     OS << "  " << KV.getKey() << " = " << KV.getValue() << ",\n";
   OS << "#endif\n\n";
+}
 
+void SVEEmitter::createImmCheckTypes(raw_ostream &OS) {
   OS << "#ifdef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES\n";
   for (auto &KV : ImmCheckTypes)
     OS << "  " << KV.getKey() << " = " << KV.getValue() << ",\n";
@@ -1858,6 +1863,10 @@ void EmitSveTypeFlags(RecordKeeper &Records, raw_ostream &OS) {
   SVEEmitter(Records).createTypeFlags(OS);
 }
 
+void EmitImmCheckTypes(RecordKeeper &Records, raw_ostream &OS) {
+  SVEEmitter(Records).createImmCheckTypes(OS);
+}
+
 void EmitSveStreamingAttrs(RecordKeeper &Records, raw_ostream &OS) {
   SVEEmitter(Records).createStreamingAttrs(OS, ACLEKind::SVE);
 }
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 42cc704543f18e..dab447ff7d944f 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -75,6 +75,7 @@ enum ActionType {
   GenArmVectorType,
   GenArmNeonSema,
   GenArmNeonTest,
+  GenArmImmCheckTypes,
   GenArmMveHeader,
   GenArmMveBuiltinDef,
   GenArmMveBuiltinSema,
@@ -234,6 +235,9 @@ cl::opt<ActionType> Action(
                    "Generate ARM NEON sema support for clang"),
         clEnumValN(GenArmNeonTest, "gen-arm-neon-test",
                    "Generate ARM NEON tests for clang"),
+        clEnumValN(GenArmImmCheckTypes, "gen-arm-immcheck-types",
+                   "Generate arm_immchecktypes.h (immediate range check types)"
+                   " for clang"),
         clEnumValN(GenArmSveHeader, "gen-arm-sve-header",
                    "Generate arm_sve.h for clang"),
         clEnumValN(GenArmSveBuiltins, "gen-arm-sve-builtins",
@@ -469,6 +473,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenArmNeonTest:
     EmitNeonTest(Records, OS);
     break;
+  case GenArmImmCheckTypes:
+    EmitImmCheckTypes(Records, OS);
+    break;
   case GenArmMveHeader:
     EmitMveHeader(Records, OS);
     break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index 5f2dd257cb90a9..3a424c9c91fe71 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -108,6 +108,7 @@ void EmitNeonSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitVectorTypes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitNeonTest(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 
+void EmitImmCheckTypes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitSveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitSveBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitSveBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
diff --git a/llvm/docs/CommandGuide/tblgen.rst b/llvm/docs/CommandGuide/tblgen.rst
index aa4c8e17864050..92186579e682df 100644
--- a/llvm/docs/CommandGuide/tblgen.rst
+++ b/llvm/docs/CommandGuide/tblgen.rst
@@ -276,6 +276,10 @@ clang-tblgen Options
 
   Generate ARM NEON tests for Clang.
 
+.. option:: -gen-arm-immcheck-types
+
+  Generate ``arm_immcheck_types.inc`` for Clang.
+
 .. option:: -gen-arm-sve-header
 
   Generate ``arm_sve.h`` for Clang.

>From f25a2393efdacfce0f2c576019c8d8a46a4556d2 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Fri, 9 Aug 2024 10:15:27 +0000
Subject: [PATCH 05/17] Refactor immediate range checking change

For each immediate, a NEON instruction must define
  - The index of the immediate argument to be chcked

  - The type of immediate range check to be performed,
    (e.g., ImmCheckShiftRight)

  - The index of the argument whose type defines the context
    of this immediate check (base type, vector size).

	- Note that if this definition generates a polymorphic
	  NEON builtin, the base type defined by this argument is
	  overwritten by that of the type code supplied to the
	  overloaded builtin call. This third argument is omitted
	  in some cases due to this.

- Example
 - vfma_laneq
 (https://developer.arm.com/architectures/instruction-sets/intrinsics/#f:@navigationhierarchiessimdisa=[Neon]&q=vfma_laneq)
	- The immdiate is supplied in argument 3.
	- The immdiate is used as an index into the lanes of argument 2
	- So we must perform an immediate check on argument 3, based on
	  the number of lanes in argument 2.
	- ImmCheck<3, ImmCheckLaneIndex, 2>

- Changes
 - include/clang/Basic/arm_neon.td
	- Fix incorrect immediate range specifiers
	- Replace ImmCheckLaneQIndex occurences with
	  ImmCheckLaneIndex and (vector width is now inferred
	  from type context argument).
 - include/clang/Basic/arm_fp16.td
	- fix immediate range of fcvt instructions
 - include/clang/Basic/arm_immcheck_incl.td
	- Remove ImmCheckLaneQIndex
	- Add ImmCheckCvt
 - include/clang/Basic/TargetBuiltins.h
	- revert renaming of ImmCheckTypes

 - utils/Tablegen/NeonEMitter.cpp
	- Share ImmCheck class with SVEEmitter
	- Move parsing of ImmCheck list to constructor so that
	  TypeContextArg can be understood.
 - utils/TableGen/SVEEmitter.cpp
	- Reflect renaming of 'EltSizeArg' to 'TypeContextArg'.

 - Addition
  - llvm/include/llvm/TableGen/AArch64ImmCheck.h
	- move ImmCheck cpp class so that it can be shared
	  by NeonEmitter and SVEEmitter.
---
 clang/include/clang/Basic/TargetBuiltins.h    |   2 +-
 clang/include/clang/Basic/arm_fp16.td         |  15 +-
 .../include/clang/Basic/arm_immcheck_incl.td  |   6 +-
 clang/include/clang/Basic/arm_neon.td         |  83 +++--
 clang/include/clang/Sema/SemaARM.h            |  19 +-
 clang/lib/Sema/SemaARM.cpp                    | 350 +++++++++---------
 clang/utils/TableGen/NeonEmitter.cpp          |  80 ++--
 clang/utils/TableGen/SveEmitter.cpp           |  21 +-
 llvm/include/llvm/TableGen/AArch64ImmCheck.h  |  37 ++
 9 files changed, 312 insertions(+), 301 deletions(-)
 create mode 100644 llvm/include/llvm/TableGen/AArch64ImmCheck.h

diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 15757e43f4a5e5..a85e070cad0383 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -242,7 +242,7 @@ namespace clang {
   };
 
   // Shared between SVE/SME and NEON
-  enum ArmImmCheckType {
+  enum ImmCheckType {
 #define LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
 #include "clang/Basic/arm_immcheck_types.inc"
 #undef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td
index 42228a3ba1ffad..81d257fc73033e 100644
--- a/clang/include/clang/Basic/arm_fp16.td
+++ b/clang/include/clang/Basic/arm_fp16.td
@@ -81,12 +81,17 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
     def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "(1F<)(1!)I", "iUi">;
     def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "(1F<<)(1!)I", "lUl">;
     def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "(1S)1I", "Sh">;
-    def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "(1S>)1I", "Sh">;
-    def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "(1S>>)1I", "Sh">;
-    def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "(1U)1I", "Sh">;
-    def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "(1U>)1I", "Sh">;
-    def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "(1U>>)1I", "Sh">;
   }
+    def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "(1S>)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
+    def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "(1S>>)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
+    def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "(1U)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
+    def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "(1U>)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
+    def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "(1U>>)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
   // Comparison
   def SCALAR_CMEQRH   : SInst<"vceq", "(1U)11", "Sh">;
   def SCALAR_CMEQZH   : SInst<"vceqz", "(1U)1", "Sh">;
diff --git a/clang/include/clang/Basic/arm_immcheck_incl.td b/clang/include/clang/Basic/arm_immcheck_incl.td
index 3b20248f650400..88440532799839 100644
--- a/clang/include/clang/Basic/arm_immcheck_incl.td
+++ b/clang/include/clang/Basic/arm_immcheck_incl.td
@@ -11,7 +11,7 @@ def ImmCheckShiftRightNarrow    : ImmCheckType<4>;  // 1..sizeinbits(elt)/2
 def ImmCheckShiftLeft           : ImmCheckType<5>;  // 0..(sizeinbits(elt) - 1)
 def ImmCheck0_7                 : ImmCheckType<6>;  // 0..7
 def ImmCheckLaneIndex           : ImmCheckType<7>;  // 0..(128/(1*sizeinbits(elt)) - 1)
-def ImmCheckLaneQIndex          : ImmCheckType<8>;  // (Neon) treat type as Quad
+def ImmCheckCvt                 : ImmCheckType<8>;  // 1..sizeinbits(elt) (same as ShiftRight)
 def ImmCheckLaneIndexCompRotate : ImmCheckType<9>;  // 0..(128/(2*sizeinbits(elt)) - 1)
 def ImmCheckLaneIndexDot        : ImmCheckType<10>; // 0..(128/(4*sizeinbits(elt)) - 1)
 def ImmCheckComplexRot90_270    : ImmCheckType<11>; // [90,270]
@@ -31,9 +31,9 @@ def ImmCheck1_32                : ImmCheckType<24>; // 1..32
 def ImmCheck1_64                : ImmCheckType<25>; // 1..64
 def ImmCheck0_63                : ImmCheckType<26>; // 0..63
 
-class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
+class ImmCheck<int arg, ImmCheckType kind, int typeArg = -1> {
   int Arg = arg;
   // The index of the argument whose type should be referred to when validating this immedaite.
-  int EltSizeArg = eltSizeArg;
+  int TypeContextArg = typeArg;
   ImmCheckType Kind = kind;
 }
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index f92e405b1d8691..2752e540207114 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -288,14 +288,14 @@ def SPLAT  : WInst<"splat_lane", ".(!q)I",
                     [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
                    "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl", 
-                   [ImmCheck<1, ImmCheckLaneQIndex, 1>]> {
+                   [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
   let isLaneQ = 1;
 }
 let TargetGuard = "bf16,neon" in {
   def SPLAT_BF  : WInst<"splat_lane", ".(!q)I", "bQb", 
                       [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
   def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb", 
-                      [ImmCheck<1, ImmCheckLaneQIndex, 1>]> {
+                      [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
     let isLaneQ = 1;
   }
 }
@@ -421,6 +421,12 @@ def VQSHL_N    : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
                       [ImmCheck<1, ImmCheckShiftLeft>]>;
 def VQSHLU_N   : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl", 
                       [ImmCheck<1, ImmCheckShiftLeft>]>;
+
+// Narrowing right shifts should have an immediate range of 1..(sizeinbits(arg)/2).
+// However, as the overloaded type code that is supplied to a polymorphic builtin
+// is that of the return type (half as wide as the argument in this case), using
+// ImmCheckShiftRightNarrow would return in an upper bound of (sizeinbits(arg)/2)/2.
+// ImmCheckShiftRight produces the correct behavior here.
 def VSHRN_N    : IInst<"vshrn_n", "<QI", "silUsUiUl", 
                       [ImmCheck<1, ImmCheckShiftRight>]>;
 def VQSHRUN_N  : SInst<"vqshrun_n", "(<U)QI", "sil", 
@@ -433,6 +439,12 @@ def VRSHRN_N   : IInst<"vrshrn_n", "<QI", "silUsUiUl",
                       [ImmCheck<1, ImmCheckShiftRight>]>;
 def VQRSHRN_N  : SInst<"vqrshrn_n", "<QI", "silUsUiUl", 
                       [ImmCheck<1, ImmCheckShiftRight>]>;
+
+// Widening left-shifts should have a range of 0..(sizeinbits(arg)-1).
+// This polymorphic builtin is supplied the wider return type as it's overloaded
+// base type, so the range here is actually 0..(sizeinbits(arg)*2).
+// This cannot be rectified currently due to a use of vshll_n_s16 with an
+// out-of-bounds immediate in the defintiion of vcvt_f32_bf16.
 def VSHLL_N    : SInst<"vshll_n", "(>Q).I", "csiUcUsUi", 
                       [ImmCheck<1, ImmCheckShiftLeft>]>;
 
@@ -479,14 +491,14 @@ def VLD1_X2_F16   : WInst<"vld1_x2", "2(c*!)", "hQh">;
 def VLD1_X3_F16   : WInst<"vld1_x3", "3(c*!)", "hQh">;
 def VLD1_X4_F16   : WInst<"vld1_x4", "4(c*!)", "hQh">;
 def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh", 
-                          [ImmCheck<2, ImmCheck0_3, 1>]>;
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def VLD1_DUP_F16  : WInst<"vld1_dup", ".(c*!)", "hQh">;
 def VST1_F16      : WInst<"vst1", "v*(.!)", "hQh">;
 def VST1_X2_F16   : WInst<"vst1_x2", "v*(2!)", "hQh">;
 def VST1_X3_F16   : WInst<"vst1_x3", "v*(3!)", "hQh">;
 def VST1_X4_F16   : WInst<"vst1_x4", "v*(4!)", "hQh">;
 def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh", 
-                          [ImmCheck<2, ImmCheck0_3, 1>]>;
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -523,20 +535,20 @@ def VLD2_DUP_F16  : WInst<"vld2_dup", "2(c*!)", "hQh">;
 def VLD3_DUP_F16  : WInst<"vld3_dup", "3(c*!)", "hQh">;
 def VLD4_DUP_F16  : WInst<"vld4_dup", "4(c*!)", "hQh">;
 def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh", 
-                          [ImmCheck<4, ImmCheck0_3, 1>]>;
+                          [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
 def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh", 
-                          [ImmCheck<5, ImmCheck0_3, 1>]>;
+                          [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh", 
-                          [ImmCheck<6, ImmCheck0_3, 1>]>;
+                          [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
 def VST2_F16      : WInst<"vst2", "v*(2!)", "hQh">;
 def VST3_F16      : WInst<"vst3", "v*(3!)", "hQh">;
 def VST4_F16      : WInst<"vst4", "v*(4!)", "hQh">;
 def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh", 
-                          [ImmCheck<3, ImmCheck0_3, 1>]>;
+                          [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
 def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh", 
-                         [ImmCheck<4, ImmCheck0_3, 1>]>;
+                         [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
 def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh", 
-                          [ImmCheck<5, ImmCheck0_3, 1>]>;
+                          [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -655,9 +667,9 @@ def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "..qI", "siQsQi", OP_QRDMULH_LN>;
 }
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
 def A64_VQDMULH_LANE  : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi", 
-                              [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
+                              [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi", 
-                              [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
+                              [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 }
 
 let TargetGuard = "v8.1a,neon" in {
@@ -1072,9 +1084,9 @@ def VMLS_LANEQ   : IOpInst<"vmls_laneq", "...QI",
 }
 
 def VFMA_LANE    : IInst<"vfma_lane", "...qI", "fdQfQd", 
-                        [ImmCheck<3, ImmCheckLaneIndex, 0>]>;
+                        [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 def VFMA_LANEQ   : IInst<"vfma_laneq", "...QI", "fdQfQd", 
-                        [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
+                        [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
   let isLaneQ = 1;
 }
 def VFMS_LANE    : IOpInst<"vfms_lane", "...qI", "fdQfQd", OP_FMS_LN>;
@@ -1151,9 +1163,9 @@ def VQDMULL_HIGH_LANEQ  : SOpInst<"vqdmull_high_laneq", "(>Q)QQI", "si",
 
 let isLaneQ = 1 in {
 def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi", 
-                          [ImmCheck<2, ImmCheckLaneQIndex, 1>]>;
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi", 
-                          [ImmCheck<2, ImmCheckLaneQIndex, 1>]>;
+                          [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 }
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a,neon" in {
 def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN> {
@@ -1425,16 +1437,16 @@ def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl",
 let isScalarNarrowShift = 1 in {
   // Signed/Unsigned Saturating Shift Right Narrow (Immediate)
   def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl", 
-                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+                            [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
   // Signed/Unsigned Saturating Rounded Shift Right Narrow (Immediate)
   def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl", 
-                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+                            [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
   // Signed Saturating Shift Right Unsigned Narrow (Immediate)
   def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<U)1I", "SsSiSl", 
-                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+                            [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
   // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
   def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<U)1I", "SsSiSl", 
-                            [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+                            [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1660,11 +1672,11 @@ def SCALAR_VMUL_N : IInst<"vmul_n", "..1", "d">;
 
 // VMUL_LANE_A64 d type implemented using scalar mul lane
 def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d", 
-                            [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
+                            [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 
 // VMUL_LANEQ d type implemented using scalar mul lane
 def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d", 
-                              [ImmCheck<2, ImmCheckLaneQIndex, 1>]> {
+                              [ImmCheck<2, ImmCheckLaneIndex, 1>]> {
   let isLaneQ = 1;
 }
 
@@ -1680,7 +1692,7 @@ def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ
 def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd", 
                             [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd", 
-                            [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
+                            [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
   let isLaneQ = 1;
 }
 
@@ -1700,15 +1712,15 @@ def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR
 def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi", 
                                 [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
 def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi", 
-                                [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
+                                [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
   let isLaneQ = 1;
 }
 
 // Signed Saturating Doubling Multiply-Subtract Long (scalar by element)
 def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi", 
-                              [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+                              [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
 def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi", 
-                              [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
+                              [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
   let isLaneQ = 1;
 }
 
@@ -1741,7 +1753,7 @@ def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR
 def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
                             [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
-                            [ImmCheck<1, ImmCheckLaneQIndex, 0>]> {
+                            [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
   let isLaneQ = 1;
 }
 
@@ -1901,7 +1913,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def VFMA_LANEH   : IInst<"vfma_lane", "...qI", "hQh", 
                           [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
   def VFMA_LANEQH  : IInst<"vfma_laneq", "...QI", "hQh", 
-                          [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
+                          [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
     let isLaneQ = 1;
   }
 
@@ -1911,7 +1923,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def SCALAR_FMLA_LANEH  : IInst<"vfma_lane", "111.I", "Sh", 
                                 [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
   def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh", 
-                                [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
+                                [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
     let isLaneQ = 1;
   }
 
@@ -1948,7 +1960,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh", 
                                 [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
   def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh", 
-                                [ImmCheck<2, ImmCheckLaneQIndex, 1>]> {
+                                [ImmCheck<2, ImmCheckLaneIndex, 1>]> {
     let isLaneQ = 1;
   }
 
@@ -1971,7 +1983,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
   def SCALAR_VDUP_LANEH  : IInst<"vdup_lane", "1.I", "Sh", 
                                 [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
   def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh", 
-                                [ImmCheck<1, ImmCheckLaneQIndex, 0>]> {
+                                [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
     let isLaneQ = 1;
   }
 }
@@ -2063,10 +2075,9 @@ multiclass VCMLA_ROTS<string type, string lanety, string laneqty> {
            (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast lanety, $p2), $p3))))>>;
 
     let isLaneQ = 1 in  {
-      // vcmla{ROT}_laneq
-      // ACLE specifies that the fp16 vcmla_#ROT_laneq variant has an immedaite range of 0 <= lane <= 1.
-      // fp16 is the only variant for which these two differ.
-      // https://developer.arm.com/documentation/ihi0073/latest/ 
+      // ACLE specifies that the f16 variant of vcmla_#ROT_laneq has an immediate range 0<=lane<=1,
+      // whereas the f16 variant of vcmlaq_#ROT_laneq has an immediate range 0<=lane<=3.
+      // f16 is the only type for which these two differ.
       defvar getlanety = !if(!eq(type, "h"), lanety, laneqty);
       def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type,  Op<(call "vcmla" # ROT, $p0, $p1,
                 (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast getlanety, $p2), $p3))))>>;
@@ -2126,7 +2137,7 @@ let TargetGuard = "bf16,neon" in {
   def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb", 
                           [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
   def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb", 
-                          [ImmCheck<1, ImmCheckLaneQIndex, 0>]> {
+                          [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
     let isLaneQ = 1;
   }
 
diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index 4fdeecf2ac6949..ebeb82e9455ba9 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -13,7 +13,10 @@
 #ifndef LLVM_CLANG_SEMA_SEMAARM_H
 #define LLVM_CLANG_SEMA_SEMAARM_H
 
-#include "clang/AST/ASTFwd.h"
+#include "clang/AST/DeclBase.h"
+#include "clang/AST/Expr.h"
+#include "clang/Basic/TargetBuiltins.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Sema/SemaBase.h"
 #include "llvm/ADT/StringRef.h"
 #include <tuple>
@@ -40,18 +43,20 @@ class SemaARM : public SemaBase {
                             /// flags. Do Sema checks for the runtime mode.
   };
 
+  bool CheckImmediateArg(CallExpr *TheCall, unsigned CheckTy, unsigned ArgIdx,
+                         unsigned EltBitWidth, unsigned VecBitWidth);
   bool CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall,
                                     unsigned MaxWidth);
   bool CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
                                     CallExpr *TheCall);
-  bool ParseNeonImmChecks(CallExpr *TheCall,
-                          SmallVector<std::tuple<int, int, int>, 2> &ImmChecks,
-                          int OverloadType);
+  bool
+  ParseNeonImmChecks(CallExpr *TheCall,
+                     SmallVector<std::tuple<int, int, int, int>, 2> &ImmChecks,
+                     int OverloadType);
+  bool ParseSVEImmChecks(CallExpr *TheCall,
+                         SmallVector<std::tuple<int, int, int>, 3> &ImmChecks);
   bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
-  bool
-  ParseSVEImmChecks(CallExpr *TheCall,
-                    llvm::SmallVector<std::tuple<int, int, int>, 3> &ImmChecks);
   bool CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckCDEBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
                                    CallExpr *TheCall);
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index aa19e5f82aa15f..3dfeb1bd02adeb 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -12,7 +12,6 @@
 
 #include "clang/Sema/SemaARM.h"
 #include "clang/Basic/DiagnosticSema.h"
-#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/ParsedAttr.h"
@@ -370,202 +369,183 @@ enum ArmSMEState : unsigned {
   ArmZT0Mask = 0b11 << 2
 };
 
+bool SemaARM::CheckImmediateArg(CallExpr *TheCall, unsigned CheckTy,
+                                unsigned ArgIdx, unsigned EltBitWidth,
+                                unsigned VecBitWidth) {
+
+  typedef bool (*OptionSetCheckFnTy)(int64_t Value);
+
+  // Function that checks whether the operand (ArgIdx) is an immediate
+  // that is one of the predefined values.
+  auto CheckImmediateInSet = [&](OptionSetCheckFnTy CheckImm,
+                                 int ErrDiag) -> bool {
+    // We can't check the value of a dependent argument.
+    Expr *Arg = TheCall->getArg(ArgIdx);
+    if (Arg->isTypeDependent() || Arg->isValueDependent())
+      return false;
+
+    // Check constant-ness first.
+    llvm::APSInt Imm;
+    if (SemaRef.BuiltinConstantArg(TheCall, ArgIdx, Imm))
+      return true;
+
+    if (!CheckImm(Imm.getSExtValue()))
+      return Diag(TheCall->getBeginLoc(), ErrDiag) << Arg->getSourceRange();
+    return false;
+  };
+
+  switch ((ImmCheckType)CheckTy) {
+  case ImmCheckType::ImmCheck0_31:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 31))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_13:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 13))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_63:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 63))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_16:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 16))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_7:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 7))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_1:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_3:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 3))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_7:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 7))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckExtract:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+                                        (2048 / EltBitWidth) - 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckCvt:
+  case ImmCheckType::ImmCheckShiftRight:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, EltBitWidth))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckShiftRightNarrow:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, EltBitWidth / 2))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckShiftLeft:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, EltBitWidth - 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckLaneIndex:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+                                        (VecBitWidth / (1 * EltBitWidth)) - 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckLaneIndexCompRotate:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+                                        (VecBitWidth / (2 * EltBitWidth)) - 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckLaneIndexDot:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+                                        (VecBitWidth / (4 * EltBitWidth)) - 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckComplexRot90_270:
+    if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; },
+                            diag::err_rotation_argument_to_cadd))
+      return true;
+    break;
+  case ImmCheckType::ImmCheckComplexRotAll90:
+    if (CheckImmediateInSet(
+            [](int64_t V) { return V == 0 || V == 90 || V == 180 || V == 270; },
+            diag::err_rotation_argument_to_cmla))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_1:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 1))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_2:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 2))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_3:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 3))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_0:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 0))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_15:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 15))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck0_255:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 255))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_32:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 32))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck1_64:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 64))
+      return true;
+    break;
+  case ImmCheckType::ImmCheck2_4_Mul2:
+    if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 2, 4) ||
+        SemaRef.BuiltinConstantArgMultiple(TheCall, ArgIdx, 2))
+      return true;
+    break;
+  default:
+    llvm_unreachable("Invalid immediate range typeflag!");
+    break;
+  }
+  return false;
+}
+
 bool SemaARM::ParseNeonImmChecks(
-    CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 2> &ImmChecks,
+    CallExpr *TheCall,
+    SmallVector<std::tuple<int, int, int, int>, 2> &ImmChecks,
     int OverloadType = -1) {
-  int ArgIdx, CheckTy, ElementType;
-  bool hasError = false;
+  unsigned CheckTy;
+  unsigned ArgIdx, ElementSizeInBits, VecSizeInBits;
+  bool HasError = false;
 
-  for (auto &I : ImmChecks) {
-    std::tie(ArgIdx, CheckTy, ElementType) = I;
+  for (const auto &I : ImmChecks) {
+    std::tie(ArgIdx, CheckTy, ElementSizeInBits, VecSizeInBits) = I;
 
-    NeonTypeFlags Type = (OverloadType != -1) ? NeonTypeFlags(OverloadType)
-                                              : NeonTypeFlags(ElementType);
+    if (OverloadType >= 0)
+      ElementSizeInBits = NeonTypeFlags(OverloadType).getEltSizeInBits();
 
-    switch ((ArmImmCheckType)CheckTy) {
-    case ArmImmCheckType::ImmCheck0_3:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 3);
-      break;
-    case ArmImmCheckType::ImmCheck0_63:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 63);
-      break;
-    case ArmImmCheckType::ImmCheck0_7:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 7);
-      break;
-    case ArmImmCheckType::ImmCheck1_16:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 16);
-      break;
-    case ArmImmCheckType::ImmCheck1_32:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 32);
-      break;
-    case ArmImmCheckType::ImmCheck1_64:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 64);
-      break;
-    case ArmImmCheckType::ImmCheckLaneIndex:
-      hasError |= SemaRef.BuiltinConstantArgRange(
-          TheCall, ArgIdx, 0,
-          (64 << Type.isQuad()) / Type.getEltSizeInBits() - 1);
-      break;
-    case ArmImmCheckType::ImmCheckLaneQIndex: // force to use quad
-      hasError |= SemaRef.BuiltinConstantArgRange(
-          TheCall, ArgIdx, 0, (128 / Type.getEltSizeInBits()) - 1);
-      break;
-    case ArmImmCheckType::ImmCheckShiftLeft:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
-                                                  Type.getEltSizeInBits() - 1);
-      break;
-    case ArmImmCheckType::ImmCheckShiftRight:
-      hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1,
-                                                  Type.getEltSizeInBits());
-      break;
-    default:
-      llvm_unreachable("Invalid Neon immediate range typeflag!");
-      break;
-    }
+    HasError |= CheckImmediateArg(TheCall, CheckTy, ArgIdx, ElementSizeInBits,
+                                  VecSizeInBits);
   }
 
-  return hasError;
+  return HasError;
 }
 
 bool SemaARM::ParseSVEImmChecks(
     CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) {
-  // Perform all the immediate checks for this builtin call.
-  bool HasError = false;
-  for (auto &I : ImmChecks) {
-    int ArgNum, CheckTy, ElementSizeInBits;
-    std::tie(ArgNum, CheckTy, ElementSizeInBits) = I;
-
-    typedef bool (*OptionSetCheckFnTy)(int64_t Value);
-
-    // Function that checks whether the operand (ArgNum) is an immediate
-    // that is one of the predefined values.
-    auto CheckImmediateInSet = [&](OptionSetCheckFnTy CheckImm,
-                                   int ErrDiag) -> bool {
-      // We can't check the value of a dependent argument.
-      Expr *Arg = TheCall->getArg(ArgNum);
-      if (Arg->isTypeDependent() || Arg->isValueDependent())
-        return false;
-
-      // Check constant-ness first.
-      llvm::APSInt Imm;
-      if (SemaRef.BuiltinConstantArg(TheCall, ArgNum, Imm))
-        return true;
 
-      if (!CheckImm(Imm.getSExtValue()))
-        return Diag(TheCall->getBeginLoc(), ErrDiag) << Arg->getSourceRange();
-      return false;
-    };
+  bool HasError = false;
+  unsigned CheckTy, ArgIdx, ElementSizeInBits;
 
-    switch ((ArmImmCheckType)CheckTy) {
-    case ArmImmCheckType::ImmCheck0_31:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 31))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_13:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 13))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck1_16:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 16))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_7:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck1_1:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck1_3:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck1_7:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckExtract:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
-                                          (2048 / ElementSizeInBits) - 1))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckShiftRight:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1,
-                                          ElementSizeInBits))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckShiftRightNarrow:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1,
-                                          ElementSizeInBits / 2))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckShiftLeft:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
-                                          ElementSizeInBits - 1))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckLaneIndex:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
-                                          (128 / (1 * ElementSizeInBits)) - 1))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckLaneIndexCompRotate:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
-                                          (128 / (2 * ElementSizeInBits)) - 1))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckLaneIndexDot:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
-                                          (128 / (4 * ElementSizeInBits)) - 1))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckComplexRot90_270:
-      if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; },
-                              diag::err_rotation_argument_to_cadd))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheckComplexRotAll90:
-      if (CheckImmediateInSet(
-              [](int64_t V) {
-                return V == 0 || V == 90 || V == 180 || V == 270;
-              },
-              diag::err_rotation_argument_to_cmla))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_1:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 1))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_2:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 2))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_3:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 3))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_0:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 0))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_15:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 15))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck0_255:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 255))
-        HasError = true;
-      break;
-    case ArmImmCheckType::ImmCheck2_4_Mul2:
-      if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
-          SemaRef.BuiltinConstantArgMultiple(TheCall, ArgNum, 2))
-        HasError = true;
-      break;
-    default:
-      llvm_unreachable("Invalid SVE immediate range typeflag!");
-      break;
-    }
+  for (const auto &I : ImmChecks) {
+    std::tie(ArgIdx, CheckTy, ElementSizeInBits) = I;
+    HasError |=
+        CheckImmediateArg(TheCall, CheckTy, ArgIdx, ElementSizeInBits, 128);
   }
 
   return HasError;
@@ -828,7 +808,7 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 
   // For NEON intrinsics which take an immediate value as part of the
   // instruction, range check them here.
-  SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
+  SmallVector<std::tuple<int, int, int, int>, 2> ImmChecks;
   switch (BuiltinID) {
   default:
     return false;
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index bd7e94937d350c..2330448d9ce5ed 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/AArch64ImmCheck.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/SetTheory.h"
@@ -334,7 +335,7 @@ class Intrinsic {
   /// The types of return value [0] and parameters [1..].
   std::vector<Type> Types;
 
-  SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
+  SmallVector<ImmCheck, 2> ImmChecks;
   /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
   int PolymorphicKeyType;
   /// The local variables defined.
@@ -370,16 +371,14 @@ class Intrinsic {
 
 public:
   Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
-            TypeSpec InTS, ArrayRef<std::tuple<int, int, int>> ImmChecks,
-            ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
+            TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
             StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable,
             bool BigEndianSafe)
-      : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), ImmChecks(ImmChecks),
-        CK(CK), Body(Body), ArchGuard(ArchGuard.str()),
-        TargetGuard(TargetGuard.str()), IsUnavailable(IsUnavailable),
-        BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false),
-        UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."),
-        Emitter(Emitter) {
+      : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body),
+        ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()),
+        IsUnavailable(IsUnavailable), BigEndianSafe(BigEndianSafe),
+        PolymorphicKeyType(0), NeededEarly(false), UseMacro(false),
+        BaseType(OutTS, "."), InBaseType(InTS, "."), Emitter(Emitter) {
     // Modify the TypeSpec per-argument to get a concrete Type, and create
     // known variables for each.
     // Types[0] is the return value.
@@ -408,6 +407,26 @@ class Intrinsic {
           (Type.isScalar() && Type.isHalf()))
         UseMacro = true;
     }
+
+    int ArgIdx, Kind, TypeArgIdx;
+    std::vector<Record *> ImmCheckList = R->getValueAsListOfDefs("ImmChecks");
+    for (const auto *I : ImmCheckList) {
+      unsigned EltSizeInBits = 0, VecSizeInBits = 0;
+
+      ArgIdx = I->getValueAsInt("Arg");
+      TypeArgIdx = I->getValueAsInt("TypeContextArg");
+      Kind = I->getValueAsDef("Kind")->getValueAsInt("Value");
+
+      assert((ArgIdx >= 0 && Kind >= 0) && "Arg and Kind must be nonnegative");
+
+      if (TypeArgIdx >= 0) {
+        EltSizeInBits = getParamType(TypeArgIdx).getElementSizeInBits();
+        VecSizeInBits = getParamType(TypeArgIdx).getSizeInBits();
+      }
+
+      ImmChecks.emplace_back(
+          ImmCheck(ArgIdx, Kind, EltSizeInBits, VecSizeInBits));
+    }
   }
 
   /// Get the Record that this intrinsic is based off.
@@ -419,14 +438,13 @@ class Intrinsic {
   /// Get the architectural guard string (#ifdef).
   std::string getArchGuard() const { return ArchGuard; }
   std::string getTargetGuard() const { return TargetGuard; }
-  ArrayRef<std::tuple<int, int, int>> getImmChecks() const { return ImmChecks; }
+  ArrayRef<ImmCheck> getImmChecks() const { return ImmChecks; }
   /// Get the non-mangled name.
   std::string getName() const { return Name; }
 
   /// Return true if the intrinsic takes an immediate operand.
   bool hasImmediate() const {
     return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); });
-    // return !ImmChecks.empty();
   }
 
   // Return if the supplied argument is an immediate
@@ -558,7 +576,6 @@ class NeonEmitter {
                                      SmallVectorImpl<Intrinsic *> &Defs);
   void genOverloadTypeCheckCode(raw_ostream &OS,
                                 SmallVectorImpl<Intrinsic *> &Defs);
-  void genNeonImmCheckTypes(raw_ostream &OS);
   void genIntrinsicRangeCheckCode(raw_ostream &OS,
                                   SmallVectorImpl<Intrinsic *> &Defs);
 
@@ -1958,16 +1975,6 @@ void NeonEmitter::createIntrinsic(Record *R,
   bool BigEndianSafe  = R->getValueAsBit("BigEndianSafe");
   std::string ArchGuard = std::string(R->getValueAsString("ArchGuard"));
   std::string TargetGuard = std::string(R->getValueAsString("TargetGuard"));
-  std::vector<Record *> ImmCheckList = R->getValueAsListOfDefs("ImmChecks");
-
-  SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
-  for (const auto *R : ImmCheckList) {
-
-    ImmChecks.push_back(
-        std::make_tuple(R->getValueAsInt("Arg"),
-                        R->getValueAsDef("Kind")->getValueAsInt("Value"),
-                        R->getValueAsInt("EltSizeArg")));
-  }
 
   bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
   std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith"));
@@ -2009,9 +2016,8 @@ void NeonEmitter::createIntrinsic(Record *R,
   auto &Entry = IntrinsicMap[Name];
 
   for (auto &I : NewTypeSpecs) {
-    Entry.emplace_back(R, Name, Proto, I.first, I.second, ImmChecks, CK, Body,
-                       *this, ArchGuard, TargetGuard, IsUnavailable,
-                       BigEndianSafe);
+    Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this,
+                       ArchGuard, TargetGuard, IsUnavailable, BigEndianSafe);
     Out.push_back(&Entry.back());
   }
 
@@ -2160,21 +2166,9 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
   OS << "#endif\n\n";
 }
 
-void NeonEmitter::genNeonImmCheckTypes(raw_ostream &OS) {
-  OS << "#ifdef GET_NEON_IMMCHECKTYPES\n";
-
-  for (auto *RV : Records.getAllDerivedDefinitions("ImmCheckType")) {
-    OS << "  " << RV->getNameInitAsString() << " = "
-       << RV->getValueAsInt("Value") << ",\n";
-  }
-
-  OS << "#endif\n\n";
-}
-
 void NeonEmitter::genIntrinsicRangeCheckCode(
     raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
-  int EltType;
   // Ensure these are only emitted once.
   std::set<std::string> Emitted;
 
@@ -2188,15 +2182,11 @@ void NeonEmitter::genIntrinsicRangeCheckCode(
       continue;
 
     OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
-
     for (const auto &Check : Def->getImmChecks()) {
-      EltType = std::get<2>(Check); // elt type argument
-      if (EltType >= 0)
-        EltType = Def->getParamType(EltType).getNeonEnum();
-
-      OS << "  ImmChecks.push_back(std::make_tuple(" << std::get<0>(Check)
-         << ", " << std::get<1>(Check) << ", " << EltType << ")); \n";
-      OS << "  break;\n";
+      OS << " ImmChecks.push_back(std::make_tuple(" << Check.getArg() << ", "
+         << Check.getKind() << ", " << Check.getElementSizeInBits() << ", "
+         << Check.getVecSizeInBits() << "));\n"
+         << " break;\n";
     }
     Emitted.insert(Def->getMangledName());
   }
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index ccf687959ab6fb..a47e16a28fcd68 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -27,6 +27,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/TableGen/AArch64ImmCheck.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include <array>
@@ -49,23 +50,6 @@ enum class ACLEKind { SVE, SME };
 using TypeSpec = std::string;
 
 namespace {
-
-class ImmCheck {
-  unsigned Arg;
-  unsigned Kind;
-  unsigned ElementSizeInBits;
-
-public:
-  ImmCheck(unsigned Arg, unsigned Kind, unsigned ElementSizeInBits = 0)
-      : Arg(Arg), Kind(Kind), ElementSizeInBits(ElementSizeInBits) {}
-  ImmCheck(const ImmCheck &Other) = default;
-  ~ImmCheck() = default;
-
-  unsigned getArg() const { return Arg; }
-  unsigned getKind() const { return Kind; }
-  unsigned getElementSizeInBits() const { return ElementSizeInBits; }
-};
-
 class SVEType {
   bool Float, Signed, Immediate, Void, Constant, Pointer, BFloat;
   bool DefaultType, IsScalable, Predicate, PredicatePattern, PrefetchOp,
@@ -433,7 +417,6 @@ const std::array<SVEEmitter::ReinterpretTypeInfo, 12> SVEEmitter::Reinterprets =
 
 } // end anonymous namespace
 
-
 //===----------------------------------------------------------------------===//
 // Type implementation
 //===----------------------------------------------------------------------===//
@@ -1214,7 +1197,7 @@ void SVEEmitter::createIntrinsic(
     SmallVector<ImmCheck, 2> ImmChecks;
     for (auto *R : ImmCheckList) {
       int64_t Arg = R->getValueAsInt("Arg");
-      int64_t EltSizeArg = R->getValueAsInt("EltSizeArg");
+      int64_t EltSizeArg = R->getValueAsInt("TypeContextArg");
       int64_t Kind = R->getValueAsDef("Kind")->getValueAsInt("Value");
       assert(Arg >= 0 && Kind >= 0 && "Arg and Kind must be nonnegative");
 
diff --git a/llvm/include/llvm/TableGen/AArch64ImmCheck.h b/llvm/include/llvm/TableGen/AArch64ImmCheck.h
new file mode 100644
index 00000000000000..ff8c4a1865ea34
--- /dev/null
+++ b/llvm/include/llvm/TableGen/AArch64ImmCheck.h
@@ -0,0 +1,37 @@
+//===----- AArch64ImmCheck.h -- ARM immediate range check -----*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the ImmCheck class which supports the range-checking of
+/// immediate values supplied to AArch64 SVE/SME and NEON intrinsics.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef AARCH64_IMMCHECK_H
+#define AARCH64_IMMCHECK_H
+
+class ImmCheck {
+  unsigned Arg;
+  unsigned Kind;
+  unsigned ElementSizeInBits;
+  unsigned VecSizeInBits;
+
+public:
+  ImmCheck(unsigned Arg, unsigned Kind, unsigned ElementSizeInBits = 0,
+           unsigned VecSizeInBits = 128)
+      : Arg(Arg), Kind(Kind), ElementSizeInBits(ElementSizeInBits),
+        VecSizeInBits(VecSizeInBits) {}
+  ImmCheck(const ImmCheck &Other) = default;
+  ~ImmCheck() = default;
+
+  unsigned getArg() const { return Arg; }
+  unsigned getKind() const { return Kind; }
+  unsigned getElementSizeInBits() const { return ElementSizeInBits; }
+  unsigned getVecSizeInBits() const { return VecSizeInBits; }
+};
+
+#endif

>From 0df668f70a8e23e8d7bbc4d9f64885c50f33f22c Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Fri, 9 Aug 2024 10:40:30 +0000
Subject: [PATCH 06/17] Add NEON intrinsic immediate range tests

- Create ../clang/test/Sema/aarch64-neon-immediate-ranges/
	- including immediate range checking tests for all intrinsics
	  that take an immediate argument and aren't tested elsewhere.
	- A note has been made where tests have been excluded to avoid
	  duplication, and where these existing tests have been found.
	- A note has been made about the exclusion of widening left-shifts
	  from these tests.
	- These tests were created with the help of this tool (written by
	  the author of this commit), https://github.com/SpencerAbson/imm-test-gen
		- Although they are generated on a per-file basis, care has been
		  taken to ensure that the tests are correct and no test
		  duplication is introduced.
---
 .../aarch64-neon-immediate-ranges/bfloat16.c  |  204 ++++
 .../conversions.c                             |  144 +++
 .../copy-vector-lane.c                        |  498 ++++++++
 .../aarch64-neon-immediate-ranges/dotprod.c   |   29 +
 .../extract-elt-from-vector.c                 |  301 +++++
 .../extract-vector-from-vectors.c             |  134 ++
 .../fp16-scalar.c                             |   24 +
 .../aarch64-neon-immediate-ranges/fp16-v84.c  |   89 ++
 .../fp16-vector.c                             |  181 +++
 .../fused-multiply-accumulate.c               |  115 ++
 .../matrix-multiplication.c                   |   50 +
 .../multiply-extended.c                       |   69 ++
 .../saturating-multiply-accumulate.c          |   91 ++
 .../saturating-multiply-by-scalar-and-widen.c |  136 +++
 .../set-lanes-to-value.c                      |  277 +++++
 .../set-vector-lane.c                         |  162 +++
 .../sqrdmlah-ranges.c                         |   94 ++
 .../vector-load.c                             |  525 ++++++++
 .../vector-multiply-accumulate-by-scalar.c    |  161 +++
 .../vector-multiply-by-scalar-and-widen.c     |   78 ++
 .../vector-multiply-by-scalar.c               |  158 +++
 .../vector-multiply-subtract-by-scalar.c      |  161 +++
 .../vector-shift-left.c                       |  542 +++++++++
 .../vector-shift-right.c                      | 1083 +++++++++++++++++
 .../vector-store.c                            |  448 +++++++
 25 files changed, 5754 insertions(+)
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/bfloat16.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/conversions.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/copy-vector-lane.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/dotprod.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/extract-elt-from-vector.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/extract-vector-from-vectors.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/fp16-scalar.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/fp16-v84.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/fp16-vector.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/fused-multiply-accumulate.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/matrix-multiplication.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/multiply-extended.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-accumulate.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-by-scalar-and-widen.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/set-lanes-to-value.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/set-vector-lane.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/sqrdmlah-ranges.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-load.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-accumulate-by-scalar.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar-and-widen.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-subtract-by-scalar.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-left.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-right.c
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/vector-store.c

diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/bfloat16.c b/clang/test/Sema/aarch64-neon-immediate-ranges/bfloat16.c
new file mode 100644
index 00000000000000..bde8efc260f172
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/bfloat16.c
@@ -0,0 +1,204 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -target-feature +bf16 -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+#include <arm_bf16.h>
+// REQUIRES: aarch64-registered-target
+
+// clang/test/Sema/aarch64-neon-bf16-ranges.c includes tests for:
+// vcopy_lane_bf16, vcopyq_lane_bf16, vcopy_laneq_bf16, vcopyq_laneq_bf16
+
+void test_set_all_lanes_to_the_same_value_bf16(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4) {
+	vdup_lane_bf16(arg_b16x4, 0);
+	vdup_lane_bf16(arg_b16x4, 3);
+	vdup_lane_bf16(arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_bf16(arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_bf16(arg_b16x4, 0);
+	vdupq_lane_bf16(arg_b16x4, 3);
+	vdupq_lane_bf16(arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_bf16(arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_bf16(arg_b16x8, 0);
+	vdup_laneq_bf16(arg_b16x8, 7);
+	vdup_laneq_bf16(arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_bf16(arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_bf16(arg_b16x8, 0);
+	vdupq_laneq_bf16(arg_b16x8, 7);
+	vdupq_laneq_bf16(arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_bf16(arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vduph_lane_bf16(arg_b16x4, 0);
+	vduph_lane_bf16(arg_b16x4, 3);
+	vduph_lane_bf16(arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_lane_bf16(arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vduph_laneq_bf16(arg_b16x8, 0);
+	vduph_laneq_bf16(arg_b16x8, 7);
+	vduph_laneq_bf16(arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_laneq_bf16(arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_split_vectors_bf16(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4) {
+	vget_lane_bf16(arg_b16x4, 0);
+	vget_lane_bf16(arg_b16x4, 3);
+	vget_lane_bf16(arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_bf16(arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_bf16(arg_b16x8, 0);
+	vgetq_lane_bf16(arg_b16x8, 7);
+	vgetq_lane_bf16(arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_bf16(arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_bf16(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4, bfloat16_t arg_b16) {
+	vset_lane_bf16(arg_b16, arg_b16x4, 0);
+	vset_lane_bf16(arg_b16, arg_b16x4, 3);
+	vset_lane_bf16(arg_b16, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_bf16(arg_b16, arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_bf16(arg_b16, arg_b16x8, 0);
+	vsetq_lane_bf16(arg_b16, arg_b16x8, 7);
+	vsetq_lane_bf16(arg_b16, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_bf16(arg_b16, arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_load_bf16(bfloat16x4_t arg_b16x4, bfloat16x4x4_t arg_b16x4x4, bfloat16x8x4_t arg_b16x8x4,
+					bfloat16x4x3_t arg_b16x4x3, bfloat16x8_t arg_b16x8, bfloat16_t* arg_b16_ptr,
+					bfloat16x8x3_t arg_b16x8x3, bfloat16x4x2_t arg_b16x4x2, bfloat16x8x2_t arg_b16x8x2) {
+	vld1_lane_bf16(arg_b16_ptr, arg_b16x4, 0);
+	vld1_lane_bf16(arg_b16_ptr, arg_b16x4, 3);
+	vld1_lane_bf16(arg_b16_ptr, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_bf16(arg_b16_ptr, arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_bf16(arg_b16_ptr, arg_b16x8, 0);
+	vld1q_lane_bf16(arg_b16_ptr, arg_b16x8, 7);
+	vld1q_lane_bf16(arg_b16_ptr, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_bf16(arg_b16_ptr, arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 0);
+	vld2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 3);
+	vld2_lane_bf16(arg_b16_ptr, arg_b16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 0);
+	vld2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 7);
+	vld2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 0);
+	vld3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 3);
+	vld3_lane_bf16(arg_b16_ptr, arg_b16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 0);
+	vld3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 7);
+	vld3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 0);
+	vld4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 3);
+	vld4_lane_bf16(arg_b16_ptr, arg_b16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 0);
+	vld4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 7);
+	vld4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_bf16(bfloat16x4_t arg_b16x4, bfloat16x4x4_t arg_b16x4x4, bfloat16x8x4_t arg_b16x8x4,
+					 bfloat16x4x3_t arg_b16x4x3, bfloat16x8_t arg_b16x8, bfloat16_t* arg_b16_ptr,
+					 bfloat16x8x3_t arg_b16x8x3, bfloat16x4x2_t arg_b16x4x2, bfloat16x8x2_t arg_b16x8x2) {
+	vst1_lane_bf16(arg_b16_ptr, arg_b16x4, 0);
+	vst1_lane_bf16(arg_b16_ptr, arg_b16x4, 3);
+	vst1_lane_bf16(arg_b16_ptr, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_bf16(arg_b16_ptr, arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_bf16(arg_b16_ptr, arg_b16x8, 0);
+	vst1q_lane_bf16(arg_b16_ptr, arg_b16x8, 7);
+	vst1q_lane_bf16(arg_b16_ptr, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_bf16(arg_b16_ptr, arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 0);
+	vst2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 3);
+	vst2_lane_bf16(arg_b16_ptr, arg_b16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_bf16(arg_b16_ptr, arg_b16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 0);
+	vst2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 7);
+	vst2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_bf16(arg_b16_ptr, arg_b16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 0);
+	vst3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 3);
+	vst3_lane_bf16(arg_b16_ptr, arg_b16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_bf16(arg_b16_ptr, arg_b16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 0);
+	vst3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 7);
+	vst3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_bf16(arg_b16_ptr, arg_b16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 0);
+	vst4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 3);
+	vst4_lane_bf16(arg_b16_ptr, arg_b16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_bf16(arg_b16_ptr, arg_b16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 0);
+	vst4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 7);
+	vst4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_bf16(arg_b16_ptr, arg_b16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_dot_product_f32(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4, float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vbfdot_lane_f32(arg_f32x2, arg_b16x4, arg_b16x4, 0);
+	vbfdot_lane_f32(arg_f32x2, arg_b16x4, arg_b16x4, 1);
+	vbfdot_lane_f32(arg_f32x2, arg_b16x4, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vbfdot_lane_f32(arg_f32x2, arg_b16x4, arg_b16x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vbfdotq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 0);
+	vbfdotq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 3);
+	vbfdotq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vbfdotq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vbfdot_laneq_f32(arg_f32x2, arg_b16x4, arg_b16x8, 0);
+	vbfdot_laneq_f32(arg_f32x2, arg_b16x4, arg_b16x8, 3);
+	vbfdot_laneq_f32(arg_f32x2, arg_b16x4, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vbfdot_laneq_f32(arg_f32x2, arg_b16x4, arg_b16x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vbfdotq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 0);
+	vbfdotq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 1);
+	vbfdotq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vbfdotq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_accumulate_by_scalar_f32(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4, float32x4_t arg_f32x4) {
+	vbfmlalbq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 0);
+	vbfmlalbq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 3);
+	vbfmlalbq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vbfmlalbq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vbfmlalbq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 0);
+	vbfmlalbq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 7);
+	vbfmlalbq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vbfmlalbq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vbfmlaltq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 0);
+	vbfmlaltq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 3);
+	vbfmlaltq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vbfmlaltq_lane_f32(arg_f32x4, arg_b16x8, arg_b16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vbfmlaltq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 0);
+	vbfmlaltq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 7);
+	vbfmlaltq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vbfmlaltq_laneq_f32(arg_f32x4, arg_b16x8, arg_b16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/conversions.c b/clang/test/Sema/aarch64-neon-immediate-ranges/conversions.c
new file mode 100644
index 00000000000000..30ae7f73924221
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/conversions.c
@@ -0,0 +1,144 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_conversions_f32(float32_t arg_f32, float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vcvt_n_s32_f32(arg_f32x2, 1);
+	vcvt_n_s32_f32(arg_f32x2, 32);
+	vcvt_n_s32_f32(arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_s32_f32(arg_f32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_s32_f32(arg_f32x4, 1);
+	vcvtq_n_s32_f32(arg_f32x4, 32);
+	vcvtq_n_s32_f32(arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_s32_f32(arg_f32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvt_n_u32_f32(arg_f32x2, 1);
+	vcvt_n_u32_f32(arg_f32x2, 32);
+	vcvt_n_u32_f32(arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_u32_f32(arg_f32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_u32_f32(arg_f32x4, 1);
+	vcvtq_n_u32_f32(arg_f32x4, 32);
+	vcvtq_n_u32_f32(arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_u32_f32(arg_f32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvts_n_s32_f32(arg_f32, 1);
+	vcvts_n_s32_f32(arg_f32, 32);
+	vcvts_n_s32_f32(arg_f32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvts_n_s32_f32(arg_f32, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvts_n_u32_f32(arg_f32, 1);
+	vcvts_n_u32_f32(arg_f32, 32);
+	vcvts_n_u32_f32(arg_f32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvts_n_u32_f32(arg_f32, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_f64(float64x1_t arg_f64x1, float64x2_t arg_f64x2, float64_t arg_f64) {
+	vcvt_n_s64_f64(arg_f64x1, 1);
+	vcvt_n_s64_f64(arg_f64x1, 64);
+	vcvt_n_s64_f64(arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_s64_f64(arg_f64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_s64_f64(arg_f64x2, 1);
+	vcvtq_n_s64_f64(arg_f64x2, 64);
+	vcvtq_n_s64_f64(arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_s64_f64(arg_f64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvt_n_u64_f64(arg_f64x1, 1);
+	vcvt_n_u64_f64(arg_f64x1, 64);
+	vcvt_n_u64_f64(arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_u64_f64(arg_f64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_u64_f64(arg_f64x2, 1);
+	vcvtq_n_u64_f64(arg_f64x2, 64);
+	vcvtq_n_u64_f64(arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_u64_f64(arg_f64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtd_n_s64_f64(arg_f64, 1);
+	vcvtd_n_s64_f64(arg_f64, 64);
+	vcvtd_n_s64_f64(arg_f64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtd_n_s64_f64(arg_f64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtd_n_u64_f64(arg_f64, 1);
+	vcvtd_n_u64_f64(arg_f64, 64);
+	vcvtd_n_u64_f64(arg_f64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtd_n_u64_f64(arg_f64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_s32(int32_t arg_i32, int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vcvt_n_f32_s32(arg_i32x2, 1);
+	vcvt_n_f32_s32(arg_i32x2, 32);
+	vcvt_n_f32_s32(arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f32_s32(arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_f32_s32(arg_i32x4, 1);
+	vcvtq_n_f32_s32(arg_i32x4, 32);
+	vcvtq_n_f32_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_f32_s32(arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvts_n_f32_s32(arg_i32, 1);
+	vcvts_n_f32_s32(arg_i32, 32);
+	vcvts_n_f32_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvts_n_f32_s32(arg_i32, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_u32(uint32x4_t arg_u32x4, uint32x2_t arg_u32x2, uint32_t arg_u32) {
+	vcvt_n_f32_u32(arg_u32x2, 1);
+	vcvt_n_f32_u32(arg_u32x2, 32);
+	vcvt_n_f32_u32(arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f32_u32(arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_f32_u32(arg_u32x4, 1);
+	vcvtq_n_f32_u32(arg_u32x4, 32);
+	vcvtq_n_f32_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_f32_u32(arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvts_n_f32_u32(arg_u32, 1);
+	vcvts_n_f32_u32(arg_u32, 32);
+	vcvts_n_f32_u32(arg_u32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvts_n_f32_u32(arg_u32, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_s64(int64x2_t arg_i64x2, int64x1_t arg_i64x1, int64_t arg_i64) {
+	vcvt_n_f64_s64(arg_i64x1, 1);
+	vcvt_n_f64_s64(arg_i64x1, 64);
+	vcvt_n_f64_s64(arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f64_s64(arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_f64_s64(arg_i64x2, 1);
+	vcvtq_n_f64_s64(arg_i64x2, 64);
+	vcvtq_n_f64_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_f64_s64(arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtd_n_f64_s64(arg_i64, 1);
+	vcvtd_n_f64_s64(arg_i64, 64);
+	vcvtd_n_f64_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtd_n_f64_s64(arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vcvt_n_f64_u64(arg_u64x1, 1);
+	vcvt_n_f64_u64(arg_u64x1, 64);
+	vcvt_n_f64_u64(arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f64_u64(arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_f64_u64(arg_u64x2, 1);
+	vcvtq_n_f64_u64(arg_u64x2, 64);
+	vcvtq_n_f64_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_f64_u64(arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtd_n_f64_u64(arg_u64, 1);
+	vcvtd_n_f64_u64(arg_u64, 64);
+	vcvtd_n_f64_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtd_n_f64_u64(arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/copy-vector-lane.c b/clang/test/Sema/aarch64-neon-immediate-ranges/copy-vector-lane.c
new file mode 100644
index 00000000000000..aafd36d1ccfe62
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/copy-vector-lane.c
@@ -0,0 +1,498 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_copy_vector_lane_s8(int8x16_t arg_i8x16, int8x8_t arg_i8x8) {
+	vcopy_lane_s8(arg_i8x8, 0, arg_i8x8, 0);
+
+	vcopy_lane_s8(arg_i8x8, 7, arg_i8x8, 0);
+	vcopy_lane_s8(arg_i8x8, -1, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s8(arg_i8x8, 8, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_s8(arg_i8x8, 0, arg_i8x8, 7);
+	vcopy_lane_s8(arg_i8x8, 0, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s8(arg_i8x8, 0, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s8(arg_i8x16, 0, arg_i8x8, 0);
+	vcopyq_lane_s8(arg_i8x16, 15, arg_i8x8, 0);
+	vcopyq_lane_s8(arg_i8x16, -1, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s8(arg_i8x16, 16, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s8(arg_i8x16, 0, arg_i8x8, 7);
+	vcopyq_lane_s8(arg_i8x16, 0, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s8(arg_i8x16, 0, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s8(arg_i8x8, 0, arg_i8x16, 0);
+	vcopy_laneq_s8(arg_i8x8, 7, arg_i8x16, 0);
+	vcopy_laneq_s8(arg_i8x8, -1, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s8(arg_i8x8, 8, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s8(arg_i8x8, 0, arg_i8x16, 15);
+	vcopy_laneq_s8(arg_i8x8, 0, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s8(arg_i8x8, 0, arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s8(arg_i8x16, 0, arg_i8x16, 0);
+	vcopyq_laneq_s8(arg_i8x16, 15, arg_i8x16, 0);
+	vcopyq_laneq_s8(arg_i8x16, -1, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s8(arg_i8x16, 16, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s8(arg_i8x16, 0, arg_i8x16, 15);
+	vcopyq_laneq_s8(arg_i8x16, 0, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s8(arg_i8x16, 0, arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vcopy_lane_s16(arg_i16x4, 0, arg_i16x4, 0);
+	vcopy_lane_s16(arg_i16x4, 3, arg_i16x4, 0);
+	vcopy_lane_s16(arg_i16x4, -1, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s16(arg_i16x4, 4, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_s16(arg_i16x4, 0, arg_i16x4, 3);
+	vcopy_lane_s16(arg_i16x4, 0, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s16(arg_i16x4, 0, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s16(arg_i16x8, 0, arg_i16x4, 0);
+	vcopyq_lane_s16(arg_i16x8, 7, arg_i16x4, 0);
+	vcopyq_lane_s16(arg_i16x8, -1, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s16(arg_i16x8, 8, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s16(arg_i16x8, 0, arg_i16x4, 3);
+	vcopyq_lane_s16(arg_i16x8, 0, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s16(arg_i16x8, 0, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s16(arg_i16x4, 0, arg_i16x8, 0);
+	vcopy_laneq_s16(arg_i16x4, 3, arg_i16x8, 0);
+	vcopy_laneq_s16(arg_i16x4, -1, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s16(arg_i16x4, 4, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s16(arg_i16x4, 0, arg_i16x8, 7);
+	vcopy_laneq_s16(arg_i16x4, 0, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s16(arg_i16x4, 0, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s16(arg_i16x8, 0, arg_i16x8, 0);
+	vcopyq_laneq_s16(arg_i16x8, 7, arg_i16x8, 0);
+	vcopyq_laneq_s16(arg_i16x8, -1, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s16(arg_i16x8, 8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s16(arg_i16x8, 0, arg_i16x8, 7);
+	vcopyq_laneq_s16(arg_i16x8, 0, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s16(arg_i16x8, 0, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vcopy_lane_s32(arg_i32x2, 0, arg_i32x2, 0);
+	vcopy_lane_s32(arg_i32x2, 1, arg_i32x2, 0);
+	vcopy_lane_s32(arg_i32x2, -1, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s32(arg_i32x2, 2, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_s32(arg_i32x2, 0, arg_i32x2, 1);
+	vcopy_lane_s32(arg_i32x2, 0, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s32(arg_i32x2, 0, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s32(arg_i32x4, 0, arg_i32x2, 0);
+	vcopyq_lane_s32(arg_i32x4, 3, arg_i32x2, 0);
+	vcopyq_lane_s32(arg_i32x4, -1, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s32(arg_i32x4, 4, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s32(arg_i32x4, 0, arg_i32x2, 1);
+	vcopyq_lane_s32(arg_i32x4, 0, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s32(arg_i32x4, 0, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s32(arg_i32x2, 0, arg_i32x4, 0);
+	vcopy_laneq_s32(arg_i32x2, 1, arg_i32x4, 0);
+	vcopy_laneq_s32(arg_i32x2, -1, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s32(arg_i32x2, 2, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s32(arg_i32x2, 0, arg_i32x4, 3);
+	vcopy_laneq_s32(arg_i32x2, 0, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s32(arg_i32x2, 0, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s32(arg_i32x4, 0, arg_i32x4, 0);
+	vcopyq_laneq_s32(arg_i32x4, 3, arg_i32x4, 0);
+	vcopyq_laneq_s32(arg_i32x4, -1, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s32(arg_i32x4, 4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s32(arg_i32x4, 0, arg_i32x4, 3);
+	vcopyq_laneq_s32(arg_i32x4, 0, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s32(arg_i32x4, 0, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_s64(int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vcopy_lane_s64(arg_i64x1, 0, arg_i64x1, 0);
+	vcopy_lane_s64(arg_i64x1, -1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s64(arg_i64x1, 1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s64(arg_i64x1, 0, arg_i64x1, 0);
+	vcopy_lane_s64(arg_i64x1, 0, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_s64(arg_i64x1, 0, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s64(arg_i64x2, 0, arg_i64x1, 0);
+	vcopyq_lane_s64(arg_i64x2, 1, arg_i64x1, 0);
+	vcopyq_lane_s64(arg_i64x2, -1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s64(arg_i64x2, 2, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_s64(arg_i64x2, 0, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_s64(arg_i64x2, 0, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s64(arg_i64x1, 0, arg_i64x2, 0);
+	vcopy_laneq_s64(arg_i64x1, -1, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s64(arg_i64x1, 1, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_s64(arg_i64x1, 0, arg_i64x2, 1);
+	vcopy_laneq_s64(arg_i64x1, 0, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_s64(arg_i64x1, 0, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s64(arg_i64x2, 0, arg_i64x2, 0);
+	vcopyq_laneq_s64(arg_i64x2, 1, arg_i64x2, 0);
+	vcopyq_laneq_s64(arg_i64x2, -1, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s64(arg_i64x2, 2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_s64(arg_i64x2, 0, arg_i64x2, 1);
+	vcopyq_laneq_s64(arg_i64x2, 0, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_s64(arg_i64x2, 0, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vcopy_lane_u8(arg_u8x8, 0, arg_u8x8, 0);
+	vcopy_lane_u8(arg_u8x8, 7, arg_u8x8, 0);
+	vcopy_lane_u8(arg_u8x8, -1, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u8(arg_u8x8, 8, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_u8(arg_u8x8, 0, arg_u8x8, 7);
+	vcopy_lane_u8(arg_u8x8, 0, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u8(arg_u8x8, 0, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u8(arg_u8x16, 0, arg_u8x8, 0);
+	vcopyq_lane_u8(arg_u8x16, 15, arg_u8x8, 0);
+	vcopyq_lane_u8(arg_u8x16, -1, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u8(arg_u8x16, 16, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u8(arg_u8x16, 0, arg_u8x8, 7);
+	vcopyq_lane_u8(arg_u8x16, 0, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u8(arg_u8x16, 0, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u8(arg_u8x8, 0, arg_u8x16, 0);
+	vcopy_laneq_u8(arg_u8x8, 7, arg_u8x16, 0);
+	vcopy_laneq_u8(arg_u8x8, -1, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u8(arg_u8x8, 8, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u8(arg_u8x8, 0, arg_u8x16, 15);
+	vcopy_laneq_u8(arg_u8x8, 0, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u8(arg_u8x8, 0, arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u8(arg_u8x16, 0, arg_u8x16, 0);
+	vcopyq_laneq_u8(arg_u8x16, 15, arg_u8x16, 0);
+	vcopyq_laneq_u8(arg_u8x16, -1, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u8(arg_u8x16, 16, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u8(arg_u8x16, 0, arg_u8x16, 15);
+	vcopyq_laneq_u8(arg_u8x16, 0, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u8(arg_u8x16, 0, arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vcopy_lane_u16(arg_u16x4, 0, arg_u16x4, 0);
+	vcopy_lane_u16(arg_u16x4, 3, arg_u16x4, 0);
+	vcopy_lane_u16(arg_u16x4, -1, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u16(arg_u16x4, 4, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_u16(arg_u16x4, 0, arg_u16x4, 3);
+	vcopy_lane_u16(arg_u16x4, 0, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u16(arg_u16x4, 0, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u16(arg_u16x8, 0, arg_u16x4, 0);
+	vcopyq_lane_u16(arg_u16x8, 7, arg_u16x4, 0);
+	vcopyq_lane_u16(arg_u16x8, -1, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u16(arg_u16x8, 8, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u16(arg_u16x8, 0, arg_u16x4, 3);
+	vcopyq_lane_u16(arg_u16x8, 0, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u16(arg_u16x8, 0, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u16(arg_u16x4, 0, arg_u16x8, 0);
+	vcopy_laneq_u16(arg_u16x4, 3, arg_u16x8, 0);
+	vcopy_laneq_u16(arg_u16x4, -1, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u16(arg_u16x4, 4, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u16(arg_u16x4, 0, arg_u16x8, 7);
+	vcopy_laneq_u16(arg_u16x4, 0, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u16(arg_u16x4, 0, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u16(arg_u16x8, 0, arg_u16x8, 0);
+	vcopyq_laneq_u16(arg_u16x8, 7, arg_u16x8, 0);
+	vcopyq_laneq_u16(arg_u16x8, -1, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u16(arg_u16x8, 8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u16(arg_u16x8, 0, arg_u16x8, 7);
+	vcopyq_laneq_u16(arg_u16x8, 0, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u16(arg_u16x8, 0, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vcopy_lane_u32(arg_u32x2, 0, arg_u32x2, 0);
+	vcopy_lane_u32(arg_u32x2, 1, arg_u32x2, 0);
+	vcopy_lane_u32(arg_u32x2, -1, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u32(arg_u32x2, 2, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_u32(arg_u32x2, 0, arg_u32x2, 1);
+	vcopy_lane_u32(arg_u32x2, 0, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u32(arg_u32x2, 0, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u32(arg_u32x4, 0, arg_u32x2, 0);
+	vcopyq_lane_u32(arg_u32x4, 3, arg_u32x2, 0);
+	vcopyq_lane_u32(arg_u32x4, -1, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u32(arg_u32x4, 4, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u32(arg_u32x4, 0, arg_u32x2, 1);
+	vcopyq_lane_u32(arg_u32x4, 0, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u32(arg_u32x4, 0, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u32(arg_u32x2, 0, arg_u32x4, 0);
+	vcopy_laneq_u32(arg_u32x2, 1, arg_u32x4, 0);
+	vcopy_laneq_u32(arg_u32x2, -1, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u32(arg_u32x2, 2, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u32(arg_u32x2, 0, arg_u32x4, 3);
+	vcopy_laneq_u32(arg_u32x2, 0, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u32(arg_u32x2, 0, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u32(arg_u32x4, 0, arg_u32x4, 0);
+	vcopyq_laneq_u32(arg_u32x4, 3, arg_u32x4, 0);
+	vcopyq_laneq_u32(arg_u32x4, -1, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u32(arg_u32x4, 4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u32(arg_u32x4, 0, arg_u32x4, 3);
+	vcopyq_laneq_u32(arg_u32x4, 0, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u32(arg_u32x4, 0, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_u64(uint64x2_t arg_u64x2, uint64x1_t arg_u64x1) {
+	vcopy_lane_u64(arg_u64x1, 0, arg_u64x1, 0);
+	vcopy_lane_u64(arg_u64x1, -1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u64(arg_u64x1, 1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_u64(arg_u64x1, 0, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_u64(arg_u64x1, 0, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u64(arg_u64x2, 0, arg_u64x1, 0);
+	vcopyq_lane_u64(arg_u64x2, 1, arg_u64x1, 0);
+	vcopyq_lane_u64(arg_u64x2, -1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u64(arg_u64x2, 2, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_u64(arg_u64x2, 0, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_u64(arg_u64x2, 0, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u64(arg_u64x1, 0, arg_u64x2, 0);
+	vcopy_laneq_u64(arg_u64x1, -1, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u64(arg_u64x1, 1, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_u64(arg_u64x1, 0, arg_u64x2, 1);
+	vcopy_laneq_u64(arg_u64x1, 0, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_u64(arg_u64x1, 0, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u64(arg_u64x2, 0, arg_u64x2, 0);
+	vcopyq_laneq_u64(arg_u64x2, 1, arg_u64x2, 0);
+	vcopyq_laneq_u64(arg_u64x2, -1, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u64(arg_u64x2, 2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_u64(arg_u64x2, 0, arg_u64x2, 1);
+	vcopyq_laneq_u64(arg_u64x2, 0, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_u64(arg_u64x2, 0, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_p64(poly64x1_t arg_p64x1, poly64x2_t arg_p64x2) {
+	vcopy_lane_p64(arg_p64x1, 0, arg_p64x1, 0);
+	vcopy_lane_p64(arg_p64x1, -1, arg_p64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p64(arg_p64x1, 1, arg_p64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_p64(arg_p64x1, 0, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p64(arg_p64x1, 0, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p64(arg_p64x2, 0, arg_p64x1, 0);
+	vcopyq_lane_p64(arg_p64x2, 1, arg_p64x1, 0);
+	vcopyq_lane_p64(arg_p64x2, -1, arg_p64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p64(arg_p64x2, 2, arg_p64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p64(arg_p64x2, 0, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p64(arg_p64x2, 0, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p64(arg_p64x1, 0, arg_p64x2, 0);
+	vcopy_laneq_p64(arg_p64x1, -1, arg_p64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p64(arg_p64x1, 1, arg_p64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p64(arg_p64x1, 0, arg_p64x2, 1);
+	vcopy_laneq_p64(arg_p64x1, 0, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p64(arg_p64x1, 0, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p64(arg_p64x2, 0, arg_p64x2, 0);
+	vcopyq_laneq_p64(arg_p64x2, 1, arg_p64x2, 0);
+	vcopyq_laneq_p64(arg_p64x2, -1, arg_p64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p64(arg_p64x2, 2, arg_p64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p64(arg_p64x2, 0, arg_p64x2, 1);
+	vcopyq_laneq_p64(arg_p64x2, 0, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p64(arg_p64x2, 0, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_f32(float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vcopy_lane_f32(arg_f32x2, 0, arg_f32x2, 0);
+	vcopy_lane_f32(arg_f32x2, 1, arg_f32x2, 0);
+	vcopy_lane_f32(arg_f32x2, -1, arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_f32(arg_f32x2, 2, arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_f32(arg_f32x2, 0, arg_f32x2, 1);
+	vcopy_lane_f32(arg_f32x2, 0, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_f32(arg_f32x2, 0, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_f32(arg_f32x4, 0, arg_f32x2, 0);
+	vcopyq_lane_f32(arg_f32x4, 3, arg_f32x2, 0);
+	vcopyq_lane_f32(arg_f32x4, -1, arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_f32(arg_f32x4, 4, arg_f32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_f32(arg_f32x4, 0, arg_f32x2, 1);
+	vcopyq_lane_f32(arg_f32x4, 0, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_f32(arg_f32x4, 0, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_f32(arg_f32x2, 0, arg_f32x4, 0);
+	vcopy_laneq_f32(arg_f32x2, 1, arg_f32x4, 0);
+	vcopy_laneq_f32(arg_f32x2, -1, arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_f32(arg_f32x2, 2, arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_f32(arg_f32x2, 0, arg_f32x4, 3);
+	vcopy_laneq_f32(arg_f32x2, 0, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_f32(arg_f32x2, 0, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_f32(arg_f32x4, 0, arg_f32x4, 0);
+	vcopyq_laneq_f32(arg_f32x4, 3, arg_f32x4, 0);
+	vcopyq_laneq_f32(arg_f32x4, -1, arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_f32(arg_f32x4, 4, arg_f32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_f32(arg_f32x4, 0, arg_f32x4, 3);
+	vcopyq_laneq_f32(arg_f32x4, 0, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_f32(arg_f32x4, 0, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_f64(float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vcopy_lane_f64(arg_f64x1, 0, arg_f64x1, 0);
+	vcopy_lane_f64(arg_f64x1, -1, arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_f64(arg_f64x1, 1, arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_f64(arg_f64x1, 0, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_f64(arg_f64x1, 0, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_f64(arg_f64x2, 0, arg_f64x1, 0);
+	vcopyq_lane_f64(arg_f64x2, 1, arg_f64x1, 0);
+	vcopyq_lane_f64(arg_f64x2, -1, arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_f64(arg_f64x2, 2, arg_f64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_f64(arg_f64x2, 0, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_f64(arg_f64x2, 0, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_f64(arg_f64x1, 0, arg_f64x2, 0);
+	vcopy_laneq_f64(arg_f64x1, -1, arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_f64(arg_f64x1, 1, arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_f64(arg_f64x1, 0, arg_f64x2, 1);
+	vcopy_laneq_f64(arg_f64x1, 0, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_f64(arg_f64x1, 0, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_f64(arg_f64x2, 0, arg_f64x2, 0);
+	vcopyq_laneq_f64(arg_f64x2, 1, arg_f64x2, 0);
+	vcopyq_laneq_f64(arg_f64x2, -1, arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_f64(arg_f64x2, 2, arg_f64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_f64(arg_f64x2, 0, arg_f64x2, 1);
+	vcopyq_laneq_f64(arg_f64x2, 0, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_f64(arg_f64x2, 0, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vcopy_lane_p8(arg_p8x8, 0, arg_p8x8, 0);
+	vcopy_lane_p8(arg_p8x8, 7, arg_p8x8, 0);
+	vcopy_lane_p8(arg_p8x8, -1, arg_p8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p8(arg_p8x8, 8, arg_p8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_p8(arg_p8x8, 0, arg_p8x8, 7);
+	vcopy_lane_p8(arg_p8x8, 0, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p8(arg_p8x8, 0, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p8(arg_p8x16, 0, arg_p8x8, 0);
+	vcopyq_lane_p8(arg_p8x16, 15, arg_p8x8, 0);
+	vcopyq_lane_p8(arg_p8x16, -1, arg_p8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p8(arg_p8x16, 16, arg_p8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p8(arg_p8x16, 0, arg_p8x8, 7);
+	vcopyq_lane_p8(arg_p8x16, 0, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p8(arg_p8x16, 0, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p8(arg_p8x8, 0, arg_p8x16, 0);
+	vcopy_laneq_p8(arg_p8x8, 7, arg_p8x16, 0);
+	vcopy_laneq_p8(arg_p8x8, -1, arg_p8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p8(arg_p8x8, 8, arg_p8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p8(arg_p8x8, 0, arg_p8x16, 15);
+	vcopy_laneq_p8(arg_p8x8, 0, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p8(arg_p8x8, 0, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p8(arg_p8x16, 0, arg_p8x16, 0);
+	vcopyq_laneq_p8(arg_p8x16, 15, arg_p8x16, 0);
+	vcopyq_laneq_p8(arg_p8x16, -1, arg_p8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p8(arg_p8x16, 16, arg_p8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p8(arg_p8x16, 0, arg_p8x16, 15);
+	vcopyq_laneq_p8(arg_p8x16, 0, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p8(arg_p8x16, 0, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_copy_vector_lane_p16(poly16x8_t arg_p16x8, poly16x4_t arg_p16x4) {
+	vcopy_lane_p16(arg_p16x4, 0, arg_p16x4, 0);
+	vcopy_lane_p16(arg_p16x4, 3, arg_p16x4, 0);
+	vcopy_lane_p16(arg_p16x4, -1, arg_p16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p16(arg_p16x4, 4, arg_p16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_lane_p16(arg_p16x4, 0, arg_p16x4, 3);
+	vcopy_lane_p16(arg_p16x4, 0, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_p16(arg_p16x4, 0, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p16(arg_p16x8, 0, arg_p16x4, 0);
+	vcopyq_lane_p16(arg_p16x8, 7, arg_p16x4, 0);
+	vcopyq_lane_p16(arg_p16x8, -1, arg_p16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p16(arg_p16x8, 8, arg_p16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_p16(arg_p16x8, 0, arg_p16x4, 3);
+	vcopyq_lane_p16(arg_p16x8, 0, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_p16(arg_p16x8, 0, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p16(arg_p16x4, 0, arg_p16x8, 0);
+	vcopy_laneq_p16(arg_p16x4, 3, arg_p16x8, 0);
+	vcopy_laneq_p16(arg_p16x4, -1, arg_p16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p16(arg_p16x4, 4, arg_p16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_p16(arg_p16x4, 0, arg_p16x8, 7);
+	vcopy_laneq_p16(arg_p16x4, 0, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_p16(arg_p16x4, 0, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p16(arg_p16x8, 0, arg_p16x8, 0);
+	vcopyq_laneq_p16(arg_p16x8, 7, arg_p16x8, 0);
+	vcopyq_laneq_p16(arg_p16x8, -1, arg_p16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p16(arg_p16x8, 8, arg_p16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_p16(arg_p16x8, 0, arg_p16x8, 7);
+	vcopyq_laneq_p16(arg_p16x8, 0, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_p16(arg_p16x8, 0, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/dotprod.c b/clang/test/Sema/aarch64-neon-immediate-ranges/dotprod.c
new file mode 100644
index 00000000000000..553ee096e490c7
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/dotprod.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.2a -target-feature +dotprod -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// s32 variant is tested under clang/test/CodeGen/arm-neon-range-checks
+void test_dot_product_u32(uint8x8_t arg_u8x8, uint32x2_t arg_u32x2, uint8x16_t arg_u8x16, uint32x4_t arg_u32x4) {
+	vdot_lane_u32(arg_u32x2, arg_u8x8, arg_u8x8, 0);
+	vdot_lane_u32(arg_u32x2, arg_u8x8, arg_u8x8, 1);
+	vdot_lane_u32(arg_u32x2, arg_u8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdot_lane_u32(arg_u32x2, arg_u8x8, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdotq_laneq_u32(arg_u32x4, arg_u8x16, arg_u8x16, 0);
+	vdotq_laneq_u32(arg_u32x4, arg_u8x16, arg_u8x16, 3);
+	vdotq_laneq_u32(arg_u32x4, arg_u8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdotq_laneq_u32(arg_u32x4, arg_u8x16, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdot_laneq_u32(arg_u32x2, arg_u8x8, arg_u8x16, 0);
+	vdot_laneq_u32(arg_u32x2, arg_u8x8, arg_u8x16, 3);
+	vdot_laneq_u32(arg_u32x2, arg_u8x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdot_laneq_u32(arg_u32x2, arg_u8x8, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdotq_lane_u32(arg_u32x4, arg_u8x16, arg_u8x8, 0);
+	vdotq_lane_u32(arg_u32x4, arg_u8x16, arg_u8x8, 1);
+	vdotq_lane_u32(arg_u32x4, arg_u8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdotq_lane_u32(arg_u32x4, arg_u8x16, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/extract-elt-from-vector.c b/clang/test/Sema/aarch64-neon-immediate-ranges/extract-elt-from-vector.c
new file mode 100644
index 00000000000000..5738f5ad27f3e8
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/extract-elt-from-vector.c
@@ -0,0 +1,301 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_extract_one_element_from_vector_s8(int8x16_t arg_i8x16, int8x8_t arg_i8x8) {
+	vdupb_lane_s8(arg_i8x8, 0);
+	vdupb_lane_s8(arg_i8x8, 7);
+	vdupb_lane_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_lane_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupb_laneq_s8(arg_i8x16, 0);
+	vdupb_laneq_s8(arg_i8x16, 15);
+	vdupb_laneq_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_laneq_s8(arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_s8(arg_i8x8, 0);
+	vget_lane_s8(arg_i8x8, 7);
+	vget_lane_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_s8(arg_i8x16, 0);
+	vgetq_lane_s8(arg_i8x16, 15);
+	vgetq_lane_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_s8(arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vduph_lane_s16(arg_i16x4, 0);
+	vduph_lane_s16(arg_i16x4, 3);
+	vduph_lane_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_lane_s16(arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vduph_laneq_s16(arg_i16x8, 0);
+	vduph_laneq_s16(arg_i16x8, 7);
+	vduph_laneq_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_laneq_s16(arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_s16(arg_i16x4, 0);
+	vget_lane_s16(arg_i16x4, 3);
+	vget_lane_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_s16(arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_s16(arg_i16x8, 0);
+	vgetq_lane_s16(arg_i16x8, 7);
+	vgetq_lane_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_s16(arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_s32(int32x4_t arg_i32x4, int32x2_t arg_i32x2) {
+	vdups_lane_s32(arg_i32x2, 0);
+	vdups_lane_s32(arg_i32x2, 1);
+	vdups_lane_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_lane_s32(arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdups_laneq_s32(arg_i32x4, 0);
+	vdups_laneq_s32(arg_i32x4, 3);
+	vdups_laneq_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_laneq_s32(arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_s32(arg_i32x2, 0);
+	vget_lane_s32(arg_i32x2, 1);
+	vget_lane_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_s32(arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_s32(arg_i32x4, 0);
+	vgetq_lane_s32(arg_i32x4, 3);
+	vgetq_lane_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_s32(arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_s64(int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vdupd_lane_s64(arg_i64x1, 0);
+	vdupd_lane_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_lane_s64(arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupd_laneq_s64(arg_i64x2, 0);
+	vdupd_laneq_s64(arg_i64x2, 1);
+	vdupd_laneq_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_laneq_s64(arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_s64(arg_i64x1, 0);
+	vget_lane_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_s64(arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_s64(arg_i64x2, 0);
+	vgetq_lane_s64(arg_i64x2, 1);
+	vgetq_lane_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_s64(arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vdupb_lane_u8(arg_u8x8, 0);
+	vdupb_lane_u8(arg_u8x8, 7);
+	vdupb_lane_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_lane_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupb_laneq_u8(arg_u8x16, 0);
+	vdupb_laneq_u8(arg_u8x16, 15);
+	vdupb_laneq_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_laneq_u8(arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_u8(arg_u8x8, 0);
+	vget_lane_u8(arg_u8x8, 7);
+	vget_lane_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_u8(arg_u8x16, 0);
+	vgetq_lane_u8(arg_u8x16, 15);
+	vgetq_lane_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_u8(arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vduph_lane_u16(arg_u16x4, 0);
+	vduph_lane_u16(arg_u16x4, 3);
+	vduph_lane_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_lane_u16(arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vduph_laneq_u16(arg_u16x8, 0);
+	vduph_laneq_u16(arg_u16x8, 7);
+	vduph_laneq_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_laneq_u16(arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_u16(arg_u16x4, 0);
+	vget_lane_u16(arg_u16x4, 3);
+	vget_lane_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_u16(arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_u16(arg_u16x8, 0);
+	vgetq_lane_u16(arg_u16x8, 7);
+	vgetq_lane_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_u16(arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vdups_lane_u32(arg_u32x2, 0);
+	vdups_lane_u32(arg_u32x2, 1);
+	vdups_lane_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_lane_u32(arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdups_laneq_u32(arg_u32x4, 0);
+	vdups_laneq_u32(arg_u32x4, 3);
+	vdups_laneq_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_laneq_u32(arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_u32(arg_u32x2, 0);
+	vget_lane_u32(arg_u32x2, 1);
+	vget_lane_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_u32(arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_u32(arg_u32x4, 0);
+	vgetq_lane_u32(arg_u32x4, 3);
+	vgetq_lane_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_u32(arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_u64(uint64x1_t arg_u64x1, uint64x2_t arg_u64x2) {
+	vdupd_lane_u64(arg_u64x1, 0);
+	vdupd_lane_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_lane_u64(arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupd_laneq_u64(arg_u64x2, 0);
+	vdupd_laneq_u64(arg_u64x2, 1);
+	vdupd_laneq_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_laneq_u64(arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_u64(arg_u64x1, 0);
+	vget_lane_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_u64(arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_u64(arg_u64x2, 0);
+	vgetq_lane_u64(arg_u64x2, 1);
+	vgetq_lane_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_u64(arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_f32(float32x4_t arg_f32x4, float32x2_t arg_f32x2) {
+	vdups_lane_f32(arg_f32x2, 0);
+	vdups_lane_f32(arg_f32x2, 1);
+	vdups_lane_f32(arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_lane_f32(arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdups_laneq_f32(arg_f32x4, 0);
+	vdups_laneq_f32(arg_f32x4, 3);
+	vdups_laneq_f32(arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdups_laneq_f32(arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_f32(arg_f32x2, 0);
+	vget_lane_f32(arg_f32x2, 1);
+	vget_lane_f32(arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_f32(arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_f32(arg_f32x4, 0);
+	vgetq_lane_f32(arg_f32x4, 3);
+	vgetq_lane_f32(arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_f32(arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_f64(float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vdupd_lane_f64(arg_f64x1, 0);
+	vdupd_lane_f64(arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_lane_f64(arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupd_laneq_f64(arg_f64x2, 0);
+	vdupd_laneq_f64(arg_f64x2, 1);
+	vdupd_laneq_f64(arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupd_laneq_f64(arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_f64(arg_f64x1, 0);
+	vget_lane_f64(arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_f64(arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_f64(arg_f64x2, 0);
+	vgetq_lane_f64(arg_f64x2, 1);
+	vgetq_lane_f64(arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_f64(arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vdupb_lane_p8(arg_p8x8, 0);
+	vdupb_lane_p8(arg_p8x8, 7);
+	vdupb_lane_p8(arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_lane_p8(arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupb_laneq_p8(arg_p8x16, 0);
+	vdupb_laneq_p8(arg_p8x16, 15);
+	vdupb_laneq_p8(arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupb_laneq_p8(arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_p8(arg_p8x8, 0);
+	vget_lane_p8(arg_p8x8, 7);
+	vget_lane_p8(arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_p8(arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_p8(arg_p8x16, 0);
+	vgetq_lane_p8(arg_p8x16, 15);
+	vgetq_lane_p8(arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_p8(arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_p16(poly16x4_t arg_p16x4, poly16x8_t arg_p16x8) {
+	vduph_lane_p16(arg_p16x4, 0);
+	vduph_lane_p16(arg_p16x4, 3);
+	vduph_lane_p16(arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_lane_p16(arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vduph_laneq_p16(arg_p16x8, 0);
+	vduph_laneq_p16(arg_p16x8, 7);
+	vduph_laneq_p16(arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vduph_laneq_p16(arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vget_lane_p16(arg_p16x4, 0);
+	vget_lane_p16(arg_p16x4, 3);
+	vget_lane_p16(arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_p16(arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_p16(arg_p16x8, 0);
+	vgetq_lane_p16(arg_p16x8, 7);
+	vgetq_lane_p16(arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_p16(arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_p64(poly64x2_t arg_p64x2, poly64x1_t arg_p64x1) {
+	vget_lane_p64(arg_p64x1, 0);
+	vget_lane_p64(arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_p64(arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_p64(arg_p64x2, 0);
+	vgetq_lane_p64(arg_p64x2, 1);
+	vgetq_lane_p64(arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_p64(arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_one_element_from_vector_f16(float16x8_t arg_f16x8, float16x4_t arg_f16x4) {
+	vget_lane_f16(arg_f16x4, 0);
+	vget_lane_f16(arg_f16x4, 3);
+	vget_lane_f16(arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vget_lane_f16(arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vgetq_lane_f16(arg_f16x8, 0);
+	vgetq_lane_f16(arg_f16x8, 7);
+	vgetq_lane_f16(arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vgetq_lane_f16(arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/extract-vector-from-vectors.c b/clang/test/Sema/aarch64-neon-immediate-ranges/extract-vector-from-vectors.c
new file mode 100644
index 00000000000000..a17df47eb98eaf
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/extract-vector-from-vectors.c
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vext_s8, vextq_s8, vext_u8, vextq_u8, vext_p8, vextq_p8 are tested under
+// clang/test/Sema/aarch64-neon-ranges.c
+
+void test_extract_vector_from_a_pair_of_vectors_s16(int16x8_t arg_i16x8, int16x4_t arg_i16x4) {
+	vext_s16(arg_i16x4, arg_i16x4, 0);
+	vext_s16(arg_i16x4, arg_i16x4, 3);
+	vext_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_s16(arg_i16x8, arg_i16x8, 0);
+	vextq_s16(arg_i16x8, arg_i16x8, 7);
+	vextq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vext_s32(arg_i32x2, arg_i32x2, 0);
+	vext_s32(arg_i32x2, arg_i32x2, 1);
+	vext_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_s32(arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_s32(arg_i32x4, arg_i32x4, 0);
+	vextq_s32(arg_i32x4, arg_i32x4, 3);
+	vextq_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_s32(arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_s64(int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vext_s64(arg_i64x1, arg_i64x1, 0);
+	vext_s64(arg_i64x1, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_s64(arg_i64x1, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_s64(arg_i64x2, arg_i64x2, 0);
+	vextq_s64(arg_i64x2, arg_i64x2, 1);
+	vextq_s64(arg_i64x2, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_s64(arg_i64x2, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vext_u16(arg_u16x4, arg_u16x4, 0);
+	vext_u16(arg_u16x4, arg_u16x4, 3);
+	vext_u16(arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_u16(arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_u16(arg_u16x8, arg_u16x8, 0);
+	vextq_u16(arg_u16x8, arg_u16x8, 7);
+	vextq_u16(arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_u16(arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vext_u32(arg_u32x2, arg_u32x2, 0);
+	vext_u32(arg_u32x2, arg_u32x2, 1);
+	vext_u32(arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_u32(arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_u32(arg_u32x4, arg_u32x4, 0);
+	vextq_u32(arg_u32x4, arg_u32x4, 3);
+	vextq_u32(arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_u32(arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_u64(uint64x1_t arg_u64x1, uint64x2_t arg_u64x2) {
+	vext_u64(arg_u64x1, arg_u64x1, 0);
+	vext_u64(arg_u64x1, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_u64(arg_u64x1, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_u64(arg_u64x2, arg_u64x2, 0);
+	vextq_u64(arg_u64x2, arg_u64x2, 1);
+	vextq_u64(arg_u64x2, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_u64(arg_u64x2, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_p64(poly64x2_t arg_p64x2, poly64x1_t arg_p64x1) {
+	vext_p64(arg_p64x1, arg_p64x1, 0);
+	vext_p64(arg_p64x1, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_p64(arg_p64x1, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_p64(arg_p64x2, arg_p64x2, 0);
+	vextq_p64(arg_p64x2, arg_p64x2, 1);
+	vextq_p64(arg_p64x2, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_p64(arg_p64x2, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_f32(float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vext_f32(arg_f32x2, arg_f32x2, 0);
+	vext_f32(arg_f32x2, arg_f32x2, 1);
+	vext_f32(arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_f32(arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_f32(arg_f32x4, arg_f32x4, 0);
+	vextq_f32(arg_f32x4, arg_f32x4, 3);
+	vextq_f32(arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_f32(arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_f64(float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vext_f64(arg_f64x1, arg_f64x1, 0);
+	vext_f64(arg_f64x1, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_f64(arg_f64x1, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_f64(arg_f64x2, arg_f64x2, 0);
+	vextq_f64(arg_f64x2, arg_f64x2, 1);
+	vextq_f64(arg_f64x2, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_f64(arg_f64x2, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_extract_vector_from_a_pair_of_vectors_p16(poly16x8_t arg_p16x8, poly16x4_t arg_p16x4) {
+	vext_p16(arg_p16x4, arg_p16x4, 0);
+	vext_p16(arg_p16x4, arg_p16x4, 3);
+	vext_p16(arg_p16x4, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_p16(arg_p16x4, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_p16(arg_p16x8, arg_p16x8, 0);
+	vextq_p16(arg_p16x8, arg_p16x8, 7);
+	vextq_p16(arg_p16x8, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_p16(arg_p16x8, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-scalar.c b/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-scalar.c
new file mode 100644
index 00000000000000..6f83169498fb76
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-scalar.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -target-feature +v8.2a -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+#include <arm_fp16.h>
+// REQUIRES: aarch64-registered-target
+
+// vcvth_n_f16_s16, vcvth_n_f16_s32, vcvth_n_f16_s64, vcvth_n_f16_u16, vcvth_n_f16_u32
+// vcvth_n_s16_f16, vcvth_n_s32_f16, vcvth_n_s64_f16, vcvth_n_u16_f16, vcvth_n_u32_f16
+// are tested under clang/test/Sema/aarch64-neon-fp16-ranges.c
+
+void test_conversions_u64(uint64_t arg_u64) {
+	vcvth_n_f16_u64(arg_u64, 1);
+	vcvth_n_f16_u64(arg_u64, 16);
+	vcvth_n_f16_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_f16_u64(arg_u64, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_conversions_f16(float16_t arg_f16) {
+	vcvth_n_u64_f16(arg_f16, 1);
+	vcvth_n_u64_f16(arg_f16, 16);
+	vcvth_n_u64_f16(arg_f16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_u64_f16(arg_f16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-v84.c b/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-v84.c
new file mode 100644
index 00000000000000..d31cf321d76196
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-v84.c
@@ -0,0 +1,89 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.4a -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_fused_multiply_accumulate_f16(float32x2_t arg_f32x2, float32x4_t arg_f32x4, float16x4_t arg_f16x4, float16x8_t arg_f16x8) {
+	vfmlal_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 0);
+	vfmlal_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 3);
+	vfmlal_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlal_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlal_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 0);
+	vfmlal_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 7);
+	vfmlal_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlal_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlalq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 0);
+	vfmlalq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 3);
+	vfmlalq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlalq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlalq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 0);
+	vfmlalq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 7);
+	vfmlalq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlalq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlsl_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 0);
+	vfmlsl_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 3);
+	vfmlsl_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlsl_lane_low_f16(arg_f32x2, arg_f16x4, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlsl_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 0);
+	vfmlsl_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 7);
+	vfmlsl_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlsl_laneq_low_f16(arg_f32x2, arg_f16x4, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlslq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 0);
+	vfmlslq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 3);
+	vfmlslq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlslq_lane_low_f16(arg_f32x4, arg_f16x8, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlslq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 0);
+	vfmlslq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 7);
+	vfmlslq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlslq_laneq_low_f16(arg_f32x4, arg_f16x8, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlal_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 0);
+	vfmlal_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 3);
+	vfmlal_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlal_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlsl_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 0);
+	vfmlsl_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 3);
+	vfmlsl_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlsl_lane_high_f16(arg_f32x2, arg_f16x4, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlalq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 0);
+	vfmlalq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 3);
+	vfmlalq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlalq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlslq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 0);
+	vfmlslq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 3);
+	vfmlslq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlslq_lane_high_f16(arg_f32x4, arg_f16x8, arg_f16x4, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlal_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 0);
+	vfmlal_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 7);
+	vfmlal_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlal_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlsl_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 0);
+	vfmlsl_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 7);
+	vfmlsl_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlsl_laneq_high_f16(arg_f32x2, arg_f16x4, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlalq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 0);
+	vfmlalq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 7);
+	vfmlalq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlalq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vfmlslq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 0);
+	vfmlslq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 7);
+	vfmlslq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vfmlslq_laneq_high_f16(arg_f32x4, arg_f16x8, arg_f16x8, 8); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-vector.c b/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-vector.c
new file mode 100644
index 00000000000000..6460018b744086
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-vector.c
@@ -0,0 +1,181 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -target-feature +v8.2a -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+#include <arm_fp16.h>
+// REQUIRES: aarch64-registered-target
+
+// vcvtq_n_f16_u16 is tested under clang/test/Sema/arm-mve-immediates.c
+
+void test_multiplication_f16(float16_t arg_f16, float16x8_t arg_f16x8, float16x4_t arg_f16x4) {
+	vmul_lane_f16(arg_f16x4, arg_f16x4, 0);
+	vmul_lane_f16(arg_f16x4, arg_f16x4, 3);
+	vmul_lane_f16(arg_f16x4, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_f16(arg_f16x4, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_f16(arg_f16x8, arg_f16x4, 0);
+	vmulq_lane_f16(arg_f16x8, arg_f16x4, 3);
+	vmulq_lane_f16(arg_f16x8, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_f16(arg_f16x8, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_f16(arg_f16x4, arg_f16x8, 0);
+	vmul_laneq_f16(arg_f16x4, arg_f16x8, 7);
+	vmul_laneq_f16(arg_f16x4, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_f16(arg_f16x4, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_f16(arg_f16x8, arg_f16x8, 0);
+	vmulq_laneq_f16(arg_f16x8, arg_f16x8, 7);
+	vmulq_laneq_f16(arg_f16x8, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_f16(arg_f16x8, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulh_lane_f16(arg_f16, arg_f16x4, 0);
+	vmulh_lane_f16(arg_f16, arg_f16x4, 3);
+	vmulh_lane_f16(arg_f16, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulh_lane_f16(arg_f16, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulh_laneq_f16(arg_f16, arg_f16x8, 0);
+	vmulh_laneq_f16(arg_f16, arg_f16x8, 7);
+	vmulh_laneq_f16(arg_f16, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulh_laneq_f16(arg_f16, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_multiply_extended_f16(float16_t arg_f16, float16x8_t arg_f16x8, float16x4_t arg_f16x4) {
+	vmulx_lane_f16(arg_f16x4, arg_f16x4, 0);
+	vmulx_lane_f16(arg_f16x4, arg_f16x4, 3);
+	vmulx_lane_f16(arg_f16x4, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulx_lane_f16(arg_f16x4, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_lane_f16(arg_f16x8, arg_f16x4, 0);
+	vmulxq_lane_f16(arg_f16x8, arg_f16x4, 3);
+	vmulxq_lane_f16(arg_f16x8, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_lane_f16(arg_f16x8, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulx_laneq_f16(arg_f16x4, arg_f16x8, 0);
+	vmulx_laneq_f16(arg_f16x4, arg_f16x8, 7);
+	vmulx_laneq_f16(arg_f16x4, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulx_laneq_f16(arg_f16x4, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_laneq_f16(arg_f16x8, arg_f16x8, 0);
+	vmulxq_laneq_f16(arg_f16x8, arg_f16x8, 7);
+	vmulxq_laneq_f16(arg_f16x8, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_laneq_f16(arg_f16x8, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxh_lane_f16(arg_f16, arg_f16x4, 0);
+	vmulxh_lane_f16(arg_f16, arg_f16x4, 3);
+	vmulxh_lane_f16(arg_f16, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxh_lane_f16(arg_f16, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxh_laneq_f16(arg_f16, arg_f16x8, 0);
+	vmulxh_laneq_f16(arg_f16, arg_f16x8, 7);
+	vmulxh_laneq_f16(arg_f16, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxh_laneq_f16(arg_f16, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_fused_multiply_accumulate_f16(float16_t arg_f16, float16x8_t arg_f16x8, float16x4_t arg_f16x4) {
+	vfma_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 0);
+	vfma_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 3);
+	vfma_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmaq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 0);
+	vfmaq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 3);
+	vfmaq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfma_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 0);
+	vfma_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 7);
+	vfma_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmaq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 0);
+	vfmaq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 7);
+	vfmaq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmah_lane_f16(arg_f16, arg_f16, arg_f16x4, 0);
+	vfmah_lane_f16(arg_f16, arg_f16, arg_f16x4, 3);
+	vfmah_lane_f16(arg_f16, arg_f16, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmah_lane_f16(arg_f16, arg_f16, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmah_laneq_f16(arg_f16, arg_f16, arg_f16x8, 0);
+	vfmah_laneq_f16(arg_f16, arg_f16, arg_f16x8, 7);
+	vfmah_laneq_f16(arg_f16, arg_f16, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmah_laneq_f16(arg_f16, arg_f16, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 0);
+	vfms_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 3);
+	vfms_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_lane_f16(arg_f16x4, arg_f16x4, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 0);
+	vfmsq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 3);
+	vfmsq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_lane_f16(arg_f16x8, arg_f16x8, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 0);
+	vfms_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 7);
+	vfms_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_laneq_f16(arg_f16x4, arg_f16x4, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 0);
+	vfmsq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 7);
+	vfmsq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_laneq_f16(arg_f16x8, arg_f16x8, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsh_lane_f16(arg_f16, arg_f16, arg_f16x4, 0);
+	vfmsh_lane_f16(arg_f16, arg_f16, arg_f16x4, 3);
+	vfmsh_lane_f16(arg_f16, arg_f16, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsh_lane_f16(arg_f16, arg_f16, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsh_laneq_f16(arg_f16, arg_f16, arg_f16x8, 0);
+	vfmsh_laneq_f16(arg_f16, arg_f16, arg_f16x8, 7);
+	vfmsh_laneq_f16(arg_f16, arg_f16, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsh_laneq_f16(arg_f16, arg_f16, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_s16(int16x8_t arg_i16x8, int16x4_t arg_i16x4) {
+	vcvt_n_f16_s16(arg_i16x4, 1);
+	vcvt_n_f16_s16(arg_i16x4, 16);
+	vcvt_n_f16_s16(arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f16_s16(arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_f16_s16(arg_i16x8, 1);
+	vcvtq_n_f16_s16(arg_i16x8, 16);
+	vcvtq_n_f16_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_f16_s16(arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vcvt_n_f16_u16(arg_u16x4, 1);
+	vcvt_n_f16_u16(arg_u16x4, 16);
+	vcvt_n_f16_u16(arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_f16_u16(arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_conversions_f16(float16x8_t arg_f16x8, float16x4_t arg_f16x4) {
+	vcvt_n_s16_f16(arg_f16x4, 1);
+	vcvt_n_s16_f16(arg_f16x4, 16);
+	vcvt_n_s16_f16(arg_f16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_s16_f16(arg_f16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_s16_f16(arg_f16x8, 1);
+	vcvtq_n_s16_f16(arg_f16x8, 16);
+	vcvtq_n_s16_f16(arg_f16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_s16_f16(arg_f16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvt_n_u16_f16(arg_f16x4, 1);
+	vcvt_n_u16_f16(arg_f16x4, 16);
+	vcvt_n_u16_f16(arg_f16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvt_n_u16_f16(arg_f16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvtq_n_u16_f16(arg_f16x8, 1);
+	vcvtq_n_u16_f16(arg_f16x8, 16);
+	vcvtq_n_u16_f16(arg_f16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvtq_n_u16_f16(arg_f16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/fused-multiply-accumulate.c b/clang/test/Sema/aarch64-neon-immediate-ranges/fused-multiply-accumulate.c
new file mode 100644
index 00000000000000..1f03ed2264ffc6
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/fused-multiply-accumulate.c
@@ -0,0 +1,115 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vfma_laneq_f64, vfma_lane_f64, vfmaq_lane_f64, vfmaq_laneq_f64,
+// are tested under aarch64-neon-ranges.c
+
+void test_fused_multiply_accumulate_f32(float32x2_t arg_f32x2, float32_t arg_f32, float32x4_t arg_f32x4) {
+	vfma_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 0);
+	vfma_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 1);
+	vfma_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 0);
+	vfmaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 1);
+	vfmaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmas_lane_f32(arg_f32, arg_f32, arg_f32x2, 0);
+	vfmas_lane_f32(arg_f32, arg_f32, arg_f32x2, 1);
+	vfmas_lane_f32(arg_f32, arg_f32, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmas_lane_f32(arg_f32, arg_f32, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfma_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 0);
+	vfma_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 3);
+	vfma_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 0);
+	vfmaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 3);
+	vfmaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmas_laneq_f32(arg_f32, arg_f32, arg_f32x4, 0);
+	vfmas_laneq_f32(arg_f32, arg_f32, arg_f32x4, 3);
+	vfmas_laneq_f32(arg_f32, arg_f32, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmas_laneq_f32(arg_f32, arg_f32, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 0);
+	vfms_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 1);
+	vfms_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 0);
+	vfmsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 1);
+	vfmsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmss_lane_f32(arg_f32, arg_f32, arg_f32x2, 0);
+	vfmss_lane_f32(arg_f32, arg_f32, arg_f32x2, 1);
+	vfmss_lane_f32(arg_f32, arg_f32, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmss_lane_f32(arg_f32, arg_f32, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 0);
+	vfms_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 3);
+	vfms_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 0);
+	vfmsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 3);
+	vfmsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmss_laneq_f32(arg_f32, arg_f32, arg_f32x4, 0);
+	vfmss_laneq_f32(arg_f32, arg_f32, arg_f32x4, 3);
+	vfmss_laneq_f32(arg_f32, arg_f32, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmss_laneq_f32(arg_f32, arg_f32, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_fused_multiply_accumulate_f64(float64_t arg_f64, float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vfmad_lane_f64(arg_f64, arg_f64, arg_f64x1, 0);
+	vfmad_lane_f64(arg_f64, arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmad_lane_f64(arg_f64, arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfma_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 0);
+	vfma_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 1);
+	vfma_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmad_laneq_f64(arg_f64, arg_f64, arg_f64x2, 0);
+	vfmad_laneq_f64(arg_f64, arg_f64, arg_f64x2, 1);
+	vfmad_laneq_f64(arg_f64, arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmad_laneq_f64(arg_f64, arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_lane_f64(arg_f64x1, arg_f64x1, arg_f64x1, 0);
+	vfms_lane_f64(arg_f64x1, arg_f64x1, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_lane_f64(arg_f64x1, arg_f64x1, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_lane_f64(arg_f64x2, arg_f64x2, arg_f64x1, 0);
+	vfmsq_lane_f64(arg_f64x2, arg_f64x2, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_lane_f64(arg_f64x2, arg_f64x2, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsd_lane_f64(arg_f64, arg_f64, arg_f64x1, 0);
+	vfmsd_lane_f64(arg_f64, arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsd_lane_f64(arg_f64, arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfms_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 0);
+	vfms_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 1);
+	vfms_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfms_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, 0);
+	vfmsq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, 1);
+	vfmsq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmsd_laneq_f64(arg_f64, arg_f64, arg_f64x2, 0);
+	vfmsd_laneq_f64(arg_f64, arg_f64, arg_f64x2, 1);
+	vfmsd_laneq_f64(arg_f64, arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmsd_laneq_f64(arg_f64, arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/matrix-multiplication.c b/clang/test/Sema/aarch64-neon-immediate-ranges/matrix-multiplication.c
new file mode 100644
index 00000000000000..dd501b84bae47a
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/matrix-multiplication.c
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.6a -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_dot_product_s32(int8x8_t arg_i8x8, int32x2_t arg_i32x2, uint8x16_t arg_u8x16, uint8x8_t arg_u8x8,
+						  int32x4_t arg_i32x4, int8x16_t arg_i8x16) {
+	vusdot_lane_s32(arg_i32x2, arg_u8x8, arg_i8x8, 0);
+	vusdot_lane_s32(arg_i32x2, arg_u8x8, arg_i8x8, 1);
+	vusdot_lane_s32(arg_i32x2, arg_u8x8, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vusdot_lane_s32(arg_i32x2, arg_u8x8, arg_i8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsudot_lane_s32(arg_i32x2, arg_i8x8, arg_u8x8, 0);
+	vsudot_lane_s32(arg_i32x2, arg_i8x8, arg_u8x8, 1);
+	vsudot_lane_s32(arg_i32x2, arg_i8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsudot_lane_s32(arg_i32x2, arg_i8x8, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vusdot_laneq_s32(arg_i32x2, arg_u8x8, arg_i8x16, 0);
+	vusdot_laneq_s32(arg_i32x2, arg_u8x8, arg_i8x16, 3);
+	vusdot_laneq_s32(arg_i32x2, arg_u8x8, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vusdot_laneq_s32(arg_i32x2, arg_u8x8, arg_i8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsudot_laneq_s32(arg_i32x2, arg_i8x8, arg_u8x16, 0);
+	vsudot_laneq_s32(arg_i32x2, arg_i8x8, arg_u8x16, 3);
+	vsudot_laneq_s32(arg_i32x2, arg_i8x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsudot_laneq_s32(arg_i32x2, arg_i8x8, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vusdotq_lane_s32(arg_i32x4, arg_u8x16, arg_i8x8, 0);
+	vusdotq_lane_s32(arg_i32x4, arg_u8x16, arg_i8x8, 1);
+	vusdotq_lane_s32(arg_i32x4, arg_u8x16, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vusdotq_lane_s32(arg_i32x4, arg_u8x16, arg_i8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsudotq_lane_s32(arg_i32x4, arg_i8x16, arg_u8x8, 0);
+	vsudotq_lane_s32(arg_i32x4, arg_i8x16, arg_u8x8, 1);
+	vsudotq_lane_s32(arg_i32x4, arg_i8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsudotq_lane_s32(arg_i32x4, arg_i8x16, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vusdotq_laneq_s32(arg_i32x4, arg_u8x16, arg_i8x16, 0);
+	vusdotq_laneq_s32(arg_i32x4, arg_u8x16, arg_i8x16, 3);
+	vusdotq_laneq_s32(arg_i32x4, arg_u8x16, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vusdotq_laneq_s32(arg_i32x4, arg_u8x16, arg_i8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsudotq_laneq_s32(arg_i32x4, arg_i8x16, arg_u8x16, 0);
+	vsudotq_laneq_s32(arg_i32x4, arg_i8x16, arg_u8x16, 3);
+	vsudotq_laneq_s32(arg_i32x4, arg_i8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsudotq_laneq_s32(arg_i32x4, arg_i8x16, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/multiply-extended.c b/clang/test/Sema/aarch64-neon-immediate-ranges/multiply-extended.c
new file mode 100644
index 00000000000000..8c679e7e6a7d95
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/multiply-extended.c
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_multiply_extended_f32(float32_t arg_f32, float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vmulx_lane_f32(arg_f32x2, arg_f32x2, 0);
+	vmulx_lane_f32(arg_f32x2, arg_f32x2, 1);
+	vmulx_lane_f32(arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulx_lane_f32(arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_lane_f32(arg_f32x4, arg_f32x2, 0);
+	vmulxq_lane_f32(arg_f32x4, arg_f32x2, 1);
+	vmulxq_lane_f32(arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_lane_f32(arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxs_lane_f32(arg_f32, arg_f32x2, 0);
+	vmulxs_lane_f32(arg_f32, arg_f32x2, 1);
+	vmulxs_lane_f32(arg_f32, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxs_lane_f32(arg_f32, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulx_laneq_f32(arg_f32x2, arg_f32x4, 0);
+	vmulx_laneq_f32(arg_f32x2, arg_f32x4, 3);
+	vmulx_laneq_f32(arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulx_laneq_f32(arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_laneq_f32(arg_f32x4, arg_f32x4, 0);
+	vmulxq_laneq_f32(arg_f32x4, arg_f32x4, 3);
+	vmulxq_laneq_f32(arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_laneq_f32(arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxs_laneq_f32(arg_f32, arg_f32x4, 0);
+	vmulxs_laneq_f32(arg_f32, arg_f32x4, 3);
+	vmulxs_laneq_f32(arg_f32, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxs_laneq_f32(arg_f32, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_multiply_extended_f64(float64_t arg_f64, float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vmulx_lane_f64(arg_f64x1, arg_f64x1, 0);
+	vmulx_lane_f64(arg_f64x1, arg_f64x1, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+	vmulx_lane_f64(arg_f64x1, arg_f64x1, 1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_lane_f64(arg_f64x2, arg_f64x1, 0);
+	vmulxq_lane_f64(arg_f64x2, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_lane_f64(arg_f64x2, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxd_lane_f64(arg_f64, arg_f64x1, 0);
+	vmulxd_lane_f64(arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxd_lane_f64(arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulx_laneq_f64(arg_f64x1, arg_f64x2, 0);
+	vmulx_laneq_f64(arg_f64x1, arg_f64x2, 1);
+	vmulx_laneq_f64(arg_f64x1, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulx_laneq_f64(arg_f64x1, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxq_laneq_f64(arg_f64x2, arg_f64x2, 0);
+	vmulxq_laneq_f64(arg_f64x2, arg_f64x2, 1);
+	vmulxq_laneq_f64(arg_f64x2, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxq_laneq_f64(arg_f64x2, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulxd_laneq_f64(arg_f64, arg_f64x2, 0);
+	vmulxd_laneq_f64(arg_f64, arg_f64x2, 1);
+	vmulxd_laneq_f64(arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulxd_laneq_f64(arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-accumulate.c b/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-accumulate.c
new file mode 100644
index 00000000000000..4c3dfd0e65a08d
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-accumulate.c
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vqdmlal_lane_s32, vqdmlal_high_lane_s32, vqdmlsl_high_lane_s32, vqdmlsl_laneq_s32,
+// vqdmlsls_laneq_s32, vqdmlsl_high_laneq_s32, are tested under arm-neon-range-checks.c.
+
+void test_saturating_multiply_accumulate_s16(int16x4_t arg_i16x4, int32_t arg_i32, int16_t arg_i16,
+											 int32x4_t arg_i32x4, int16x8_t arg_i16x8) {
+	vqdmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 0);
+	vqdmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 3);
+	vqdmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlalh_lane_s16(arg_i32, arg_i16, arg_i16x4, 0);
+	vqdmlalh_lane_s16(arg_i32, arg_i16, arg_i16x4, 3);
+	vqdmlalh_lane_s16(arg_i32, arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlalh_lane_s16(arg_i32, arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 0);
+	vqdmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 3);
+	vqdmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 0);
+	vqdmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 7);
+	vqdmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlalh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 0);
+	vqdmlalh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 7);
+	vqdmlalh_laneq_s16(arg_i32, arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlalh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 0);
+	vqdmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 7);
+	vqdmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 0);
+	vqdmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 3);
+	vqdmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlslh_lane_s16(arg_i32, arg_i16, arg_i16x4, 0);
+	vqdmlslh_lane_s16(arg_i32, arg_i16, arg_i16x4, 3);
+	vqdmlslh_lane_s16(arg_i32, arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlslh_lane_s16(arg_i32, arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 0);
+	vqdmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 3);
+	vqdmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 0);
+	vqdmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 7);
+	vqdmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlslh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 0);
+	vqdmlslh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 7);
+	vqdmlslh_laneq_s16(arg_i32, arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlslh_laneq_s16(arg_i32, arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 0);
+	vqdmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 7);
+	vqdmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_saturating_multiply_accumulate_s32(int32x2_t arg_i32x2, int64x2_t arg_i64x2, int32_t arg_i32,
+											 int32x4_t arg_i32x4, int64_t arg_i64) {
+	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, 0);
+	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, 1);
+	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, 0);
+	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, 3);
+	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, 0);
+	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, 1);
+	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-by-scalar-and-widen.c b/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-by-scalar-and-widen.c
new file mode 100644
index 00000000000000..a72d1950389dd5
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-by-scalar-and-widen.c
@@ -0,0 +1,136 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// clang/Sema/arm-neon-range-checks.c includes tests for:
+// vqdmull_lane_s32, vqdmull_laneq_s32,vqdmull_high_lane_s32, vqdmull_high_laneq_s32
+// vqdmulh_lane_s32, vqdmulhq_lane_s32, vqdmulh_laneq_s32, vqdmulhq_laneq_s32, vqrdmulh_lane_s32
+// vqrdmulhq_lane_s32, vqrdmulh_laneq_s32, vqrdmulhq_laneq_s32
+
+void test_saturating_multiply_by_scalar_and_widen_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8, int16_t arg_i16) {
+	vqdmull_lane_s16(arg_i16x4, arg_i16x4, 0);
+	vqdmull_lane_s16(arg_i16x4, arg_i16x4, 3);
+	vqdmull_lane_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_lane_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmullh_lane_s16(arg_i16, arg_i16x4, 0);
+	vqdmullh_lane_s16(arg_i16, arg_i16x4, 3);
+	vqdmullh_lane_s16(arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmullh_lane_s16(arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmull_high_lane_s16(arg_i16x8, arg_i16x4, 0);
+	vqdmull_high_lane_s16(arg_i16x8, arg_i16x4, 3);
+	vqdmull_high_lane_s16(arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_high_lane_s16(arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmull_laneq_s16(arg_i16x4, arg_i16x8, 0);
+	vqdmull_laneq_s16(arg_i16x4, arg_i16x8, 7);
+	vqdmull_laneq_s16(arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_laneq_s16(arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmullh_laneq_s16(arg_i16, arg_i16x8, 0);
+	vqdmullh_laneq_s16(arg_i16, arg_i16x8, 7);
+	vqdmullh_laneq_s16(arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmullh_laneq_s16(arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmull_high_laneq_s16(arg_i16x8, arg_i16x8, 0);
+	vqdmull_high_laneq_s16(arg_i16x8, arg_i16x8, 7);
+	vqdmull_high_laneq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_high_laneq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulh_lane_s16(arg_i16x4, arg_i16x4, 0);
+	vqdmulh_lane_s16(arg_i16x4, arg_i16x4, 3);
+	vqdmulh_lane_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulh_lane_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhq_lane_s16(arg_i16x8, arg_i16x4, 0);
+	vqdmulhq_lane_s16(arg_i16x8, arg_i16x4, 3);
+	vqdmulhq_lane_s16(arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhq_lane_s16(arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhh_lane_s16(arg_i16, arg_i16x4, 0);
+	vqdmulhh_lane_s16(arg_i16, arg_i16x4, 3);
+	vqdmulhh_lane_s16(arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhh_lane_s16(arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulh_laneq_s16(arg_i16x4, arg_i16x8, 0);
+	vqdmulh_laneq_s16(arg_i16x4, arg_i16x8, 7);
+	vqdmulh_laneq_s16(arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulh_laneq_s16(arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 0);
+	vqdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 7);
+	vqdmulhq_laneq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhh_laneq_s16(arg_i16, arg_i16x8, 0);
+	vqdmulhh_laneq_s16(arg_i16, arg_i16x8, 7);
+	vqdmulhh_laneq_s16(arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhh_laneq_s16(arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulh_lane_s16(arg_i16x4, arg_i16x4, 0);
+	vqrdmulh_lane_s16(arg_i16x4, arg_i16x4, 3);
+	vqrdmulh_lane_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulh_lane_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhq_lane_s16(arg_i16x8, arg_i16x4, 0);
+	vqrdmulhq_lane_s16(arg_i16x8, arg_i16x4, 3);
+	vqrdmulhq_lane_s16(arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhq_lane_s16(arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhh_lane_s16(arg_i16, arg_i16x4, 0);
+	vqrdmulhh_lane_s16(arg_i16, arg_i16x4, 3);
+	vqrdmulhh_lane_s16(arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhh_lane_s16(arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulh_laneq_s16(arg_i16x4, arg_i16x8, 0);
+	vqrdmulh_laneq_s16(arg_i16x4, arg_i16x8, 7);
+	vqrdmulh_laneq_s16(arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulh_laneq_s16(arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 0);
+	vqrdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 7);
+	vqrdmulhq_laneq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhq_laneq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhh_laneq_s16(arg_i16, arg_i16x8, 0);
+	vqrdmulhh_laneq_s16(arg_i16, arg_i16x8, 7);
+	vqrdmulhh_laneq_s16(arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhh_laneq_s16(arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_saturating_multiply_by_scalar_and_widen_s32(int32x4_t arg_i32x4, int32_t arg_i32, int32x2_t arg_i32x2) {
+	vqdmulls_lane_s32(arg_i32, arg_i32x2, 0);
+	vqdmulls_lane_s32(arg_i32, arg_i32x2, 1);
+	vqdmulls_lane_s32(arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulls_lane_s32(arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulls_laneq_s32(arg_i32, arg_i32x4, 0);
+	vqdmulls_laneq_s32(arg_i32, arg_i32x4, 3);
+	vqdmulls_laneq_s32(arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulls_laneq_s32(arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhs_lane_s32(arg_i32, arg_i32x2, 0);
+	vqdmulhs_lane_s32(arg_i32, arg_i32x2, 1);
+	vqdmulhs_lane_s32(arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhs_lane_s32(arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, 0);
+	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, 3);
+	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, 0);
+	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, 1);
+	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhs_laneq_s32(arg_i32, arg_i32x4, 0);
+	vqrdmulhs_laneq_s32(arg_i32, arg_i32x4, 3);
+	vqrdmulhs_laneq_s32(arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhs_laneq_s32(arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/set-lanes-to-value.c b/clang/test/Sema/aarch64-neon-immediate-ranges/set-lanes-to-value.c
new file mode 100644
index 00000000000000..ea1fcd5dc03221
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/set-lanes-to-value.c
@@ -0,0 +1,277 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vdup_lane_s32, vdupq_lane_s32, vdup_laneq_s32, vdupq_laneq_s3 are tested 
+// under clang/test/CodeGen/arm-neon-range-checks.c
+
+void test_set_all_lanes_to_the_same_value_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vdup_lane_s8(arg_i8x8, 0);
+	vdup_lane_s8(arg_i8x8, 7);
+	vdup_lane_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_s8(arg_i8x8, 0);
+	vdupq_lane_s8(arg_i8x8, 7);
+	vdupq_lane_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_s8(arg_i8x16, 0);
+	vdup_laneq_s8(arg_i8x16, 15);
+	vdup_laneq_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_s8(arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_s8(arg_i8x16, 0);
+	vdupq_laneq_s8(arg_i8x16, 15);
+	vdupq_laneq_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_s8(arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vdup_lane_s16(arg_i16x4, 0);
+	vdup_lane_s16(arg_i16x4, 3);
+	vdup_lane_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_s16(arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_s16(arg_i16x4, 0);
+	vdupq_lane_s16(arg_i16x4, 3);
+	vdupq_lane_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_s16(arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_s16(arg_i16x8, 0);
+	vdup_laneq_s16(arg_i16x8, 7);
+	vdup_laneq_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_s16(arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_s16(arg_i16x8, 0);
+	vdupq_laneq_s16(arg_i16x8, 7);
+	vdupq_laneq_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_s16(arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+
+void test_set_all_lanes_to_the_same_value_s64(int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vdup_lane_s64(arg_i64x1, 0);
+	vdup_lane_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_s64(arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_s64(arg_i64x1, 0);
+	vdupq_lane_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_s64(arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_s64(arg_i64x2, 0);
+	vdup_laneq_s64(arg_i64x2, 1);
+	vdup_laneq_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_s64(arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_s64(arg_i64x2, 0);
+	vdupq_laneq_s64(arg_i64x2, 1);
+	vdupq_laneq_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_s64(arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vdup_lane_u8(arg_u8x8, 0);
+	vdup_lane_u8(arg_u8x8, 7);
+	vdup_lane_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_u8(arg_u8x8, 0);
+	vdupq_lane_u8(arg_u8x8, 7);
+	vdupq_lane_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_u8(arg_u8x16, 0);
+	vdup_laneq_u8(arg_u8x16, 15);
+	vdup_laneq_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_u8(arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_u8(arg_u8x16, 0);
+	vdupq_laneq_u8(arg_u8x16, 15);
+	vdupq_laneq_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_u8(arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vdup_lane_u16(arg_u16x4, 0);
+	vdup_lane_u16(arg_u16x4, 3);
+	vdup_lane_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_u16(arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_u16(arg_u16x4, 0);
+	vdupq_lane_u16(arg_u16x4, 3);
+	vdupq_lane_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_u16(arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_u16(arg_u16x8, 0);
+	vdup_laneq_u16(arg_u16x8, 7);
+	vdup_laneq_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_u16(arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_u16(arg_u16x8, 0);
+	vdupq_laneq_u16(arg_u16x8, 7);
+	vdupq_laneq_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_u16(arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_u32(uint32x4_t arg_u32x4, uint32x2_t arg_u32x2) {
+	vdup_lane_u32(arg_u32x2, 0);
+	vdup_lane_u32(arg_u32x2, 1);
+	vdup_lane_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_u32(arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_u32(arg_u32x2, 0);
+	vdupq_lane_u32(arg_u32x2, 1);
+	vdupq_lane_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_u32(arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_u32(arg_u32x4, 0);
+	vdup_laneq_u32(arg_u32x4, 3);
+	vdup_laneq_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_u32(arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_u32(arg_u32x4, 0);
+	vdupq_laneq_u32(arg_u32x4, 3);
+	vdupq_laneq_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_u32(arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_u64(uint64x1_t arg_u64x1, uint64x2_t arg_u64x2) {
+	vdup_lane_u64(arg_u64x1, 0);
+	vdup_lane_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_u64(arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_u64(arg_u64x1, 0);
+	vdupq_lane_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_u64(arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_u64(arg_u64x2, 0);
+	vdup_laneq_u64(arg_u64x2, 1);
+	vdup_laneq_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_u64(arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_u64(arg_u64x2, 0);
+	vdupq_laneq_u64(arg_u64x2, 1);
+	vdupq_laneq_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_u64(arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_p64(poly64x1_t arg_p64x1, poly64x2_t arg_p64x2) {
+	vdup_lane_p64(arg_p64x1, 0);
+	vdup_lane_p64(arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_p64(arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_p64(arg_p64x1, 0);
+	vdupq_lane_p64(arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_p64(arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_p64(arg_p64x2, 0);
+	vdup_laneq_p64(arg_p64x2, 1);
+	vdup_laneq_p64(arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_p64(arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_p64(arg_p64x2, 0);
+	vdupq_laneq_p64(arg_p64x2, 1);
+	vdupq_laneq_p64(arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_p64(arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_f32(float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vdup_lane_f32(arg_f32x2, 0);
+	vdup_lane_f32(arg_f32x2, 1);
+	vdup_lane_f32(arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_f32(arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_f32(arg_f32x2, 0);
+	vdupq_lane_f32(arg_f32x2, 1);
+	vdupq_lane_f32(arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_f32(arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_f32(arg_f32x4, 0);
+	vdup_laneq_f32(arg_f32x4, 3);
+	vdup_laneq_f32(arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_f32(arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_f32(arg_f32x4, 0);
+	vdupq_laneq_f32(arg_f32x4, 3);
+	vdupq_laneq_f32(arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_f32(arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vdup_lane_p8(arg_p8x8, 0);
+	vdup_lane_p8(arg_p8x8, 7);
+	vdup_lane_p8(arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_p8(arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_p8(arg_p8x8, 0);
+	vdupq_lane_p8(arg_p8x8, 7);
+	vdupq_lane_p8(arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_p8(arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_p8(arg_p8x16, 0);
+	vdup_laneq_p8(arg_p8x16, 15);
+	vdup_laneq_p8(arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_p8(arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_p8(arg_p8x16, 0);
+	vdupq_laneq_p8(arg_p8x16, 15);
+	vdupq_laneq_p8(arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_p8(arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_p16(poly16x4_t arg_p16x4, poly16x8_t arg_p16x8) {
+	vdup_lane_p16(arg_p16x4, 0);
+	vdup_lane_p16(arg_p16x4, 3);
+	vdup_lane_p16(arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_p16(arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_p16(arg_p16x4, 0);
+	vdupq_lane_p16(arg_p16x4, 3);
+	vdupq_lane_p16(arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_p16(arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_p16(arg_p16x8, 0);
+	vdup_laneq_p16(arg_p16x8, 7);
+	vdup_laneq_p16(arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_p16(arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_p16(arg_p16x8, 0);
+	vdupq_laneq_p16(arg_p16x8, 7);
+	vdupq_laneq_p16(arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_p16(arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_all_lanes_to_the_same_value_f64(float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+	vdup_lane_f64(arg_f64x1, 0);
+	vdup_lane_f64(arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_f64(arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_f64(arg_f64x1, 0);
+	vdupq_lane_f64(arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_f64(arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_f64(arg_f64x2, 0);
+	vdup_laneq_f64(arg_f64x2, 1);
+	vdup_laneq_f64(arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_f64(arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_f64(arg_f64x2, 0);
+	vdupq_laneq_f64(arg_f64x2, 1);
+	vdupq_laneq_f64(arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_f64(arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/set-vector-lane.c b/clang/test/Sema/aarch64-neon-immediate-ranges/set-vector-lane.c
new file mode 100644
index 00000000000000..3ab077ed562875
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/set-vector-lane.c
@@ -0,0 +1,162 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vsetq_lane_u8, vsetq_lane_u16, vsetq_lane_u32, vsetq_lane_u64 are
+// tesed under clang/test/Sema/arm-mve-immediates.c
+
+void test_set_vector_lane_u8(uint8x16_t arg_u8x16, uint8_t arg_u8, uint8x8_t arg_u8x8) {
+	vset_lane_u8(arg_u8, arg_u8x8, 0);
+	vset_lane_u8(arg_u8, arg_u8x8, 7);
+	vset_lane_u8(arg_u8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_u8(arg_u8, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_set_vector_lane_u16(uint16x4_t arg_u16x4, uint16_t arg_u16, uint16x8_t arg_u16x8) {
+	vset_lane_u16(arg_u16, arg_u16x4, 0);
+	vset_lane_u16(arg_u16, arg_u16x4, 3);
+	vset_lane_u16(arg_u16, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_u16(arg_u16, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_set_vector_lane_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4, uint32_t arg_u32) {
+	vset_lane_u32(arg_u32, arg_u32x2, 0);
+	vset_lane_u32(arg_u32, arg_u32x2, 1);
+	vset_lane_u32(arg_u32, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_u32(arg_u32, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_set_vector_lane_u64(uint64x2_t arg_u64x2, uint64x1_t arg_u64x1, uint64_t arg_u64) {
+	vset_lane_u64(arg_u64, arg_u64x1, 0);
+	vset_lane_u64(arg_u64, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_u64(arg_u64, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_set_vector_lane_p64(poly64_t arg_p64, poly64x1_t arg_p64x1, poly64x2_t arg_p64x2) {
+	vset_lane_p64(arg_p64, arg_p64x1, 0);
+	vset_lane_p64(arg_p64, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_p64(arg_p64, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_p64(arg_p64, arg_p64x2, 0);
+	vsetq_lane_p64(arg_p64, arg_p64x2, 1);
+	vsetq_lane_p64(arg_p64, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_p64(arg_p64, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_s8(int8x16_t arg_i8x16, int8x8_t arg_i8x8, int8_t arg_i8) {
+	vset_lane_s8(arg_i8, arg_i8x8, 0);
+	vset_lane_s8(arg_i8, arg_i8x8, 7);
+	vset_lane_s8(arg_i8, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_s8(arg_i8, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_s8(arg_i8, arg_i8x16, 0);
+	vsetq_lane_s8(arg_i8, arg_i8x16, 15);
+	vsetq_lane_s8(arg_i8, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_s8(arg_i8, arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_s16(int16x4_t arg_i16x4, int16_t arg_i16, int16x8_t arg_i16x8) {
+	vset_lane_s16(arg_i16, arg_i16x4, 0);
+	vset_lane_s16(arg_i16, arg_i16x4, 3);
+	vset_lane_s16(arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_s16(arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_s16(arg_i16, arg_i16x8, 0);
+	vsetq_lane_s16(arg_i16, arg_i16x8, 7);
+	vsetq_lane_s16(arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_s16(arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_s32(int32_t arg_i32, int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vset_lane_s32(arg_i32, arg_i32x2, 0);
+	vset_lane_s32(arg_i32, arg_i32x2, 1);
+	vset_lane_s32(arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_s32(arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_s32(arg_i32, arg_i32x4, 0);
+	vsetq_lane_s32(arg_i32, arg_i32x4, 3);
+	vsetq_lane_s32(arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_s32(arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_s64(int64_t arg_i64, int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vset_lane_s64(arg_i64, arg_i64x1, 0);
+	vset_lane_s64(arg_i64, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_s64(arg_i64, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_s64(arg_i64, arg_i64x2, 0);
+	vsetq_lane_s64(arg_i64, arg_i64x2, 1);
+	vsetq_lane_s64(arg_i64, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_s64(arg_i64, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_p8(poly8_t arg_p8, poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vset_lane_p8(arg_p8, arg_p8x8, 0);
+	vset_lane_p8(arg_p8, arg_p8x8, 7);
+	vset_lane_p8(arg_p8, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_p8(arg_p8, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_p8(arg_p8, arg_p8x16, 0);
+	vsetq_lane_p8(arg_p8, arg_p8x16, 15);
+	vsetq_lane_p8(arg_p8, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_p8(arg_p8, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_p16(poly16x4_t arg_p16x4, poly16_t arg_p16, poly16x8_t arg_p16x8) {
+	vset_lane_p16(arg_p16, arg_p16x4, 0);
+	vset_lane_p16(arg_p16, arg_p16x4, 3);
+	vset_lane_p16(arg_p16, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_p16(arg_p16, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_p16(arg_p16, arg_p16x8, 0);
+	vsetq_lane_p16(arg_p16, arg_p16x8, 7);
+	vsetq_lane_p16(arg_p16, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_p16(arg_p16, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_f16(float16x8_t arg_f16x8, float16x4_t arg_f16x4, float16_t arg_f16) {
+	vset_lane_f16(arg_f16, arg_f16x4, 0);
+	vset_lane_f16(arg_f16, arg_f16x4, 3);
+	vset_lane_f16(arg_f16, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_f16(arg_f16, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_f16(arg_f16, arg_f16x8, 0);
+	vsetq_lane_f16(arg_f16, arg_f16x8, 7);
+	vsetq_lane_f16(arg_f16, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_f16(arg_f16, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_f32(float32x2_t arg_f32x2, float32x4_t arg_f32x4, float32_t arg_f32) {
+	vset_lane_f32(arg_f32, arg_f32x2, 0);
+	vset_lane_f32(arg_f32, arg_f32x2, 1);
+	vset_lane_f32(arg_f32, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_f32(arg_f32, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_f32(arg_f32, arg_f32x4, 0);
+	vsetq_lane_f32(arg_f32, arg_f32x4, 3);
+	vsetq_lane_f32(arg_f32, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_f32(arg_f32, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_set_vector_lane_f64(float64x1_t arg_f64x1, float64x2_t arg_f64x2, float64_t arg_f64) {
+	vset_lane_f64(arg_f64, arg_f64x1, 0);
+	vset_lane_f64(arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vset_lane_f64(arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsetq_lane_f64(arg_f64, arg_f64x2, 0);
+	vsetq_lane_f64(arg_f64, arg_f64x2, 1);
+	vsetq_lane_f64(arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsetq_lane_f64(arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/sqrdmlah-ranges.c b/clang/test/Sema/aarch64-neon-immediate-ranges/sqrdmlah-ranges.c
new file mode 100644
index 00000000000000..9da02914966295
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/sqrdmlah-ranges.c
@@ -0,0 +1,94 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -target-feature +v8.1a -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vqrdmlah_lane_s32, vqrdmlahq_lane_s32, vqrdmlah_laneq_s32, vqrdmlahq_laneq_s32, 
+// vqrdmlsh_lane_s32 are tested under clang/test/CodeGen/arm-neon-range-checks.c
+
+void test_saturating_multiply_accumulate_by_element_s16(int16x8_t arg_i16x8, int16_t arg_i16, int16x4_t arg_i16x4) {
+	vqrdmlah_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 0);
+	vqrdmlah_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 3);
+	vqrdmlah_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlah_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 0);
+	vqrdmlahq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 3);
+	vqrdmlahq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlah_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 0);
+	vqrdmlah_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 7);
+	vqrdmlah_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlah_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 0);
+	vqrdmlahq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 7);
+	vqrdmlahq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlsh_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 0);
+	vqrdmlsh_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 3);
+	vqrdmlsh_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlsh_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 0);
+	vqrdmlshq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 3);
+	vqrdmlshq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlsh_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 0);
+	vqrdmlsh_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 7);
+	vqrdmlsh_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlsh_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 0);
+	vqrdmlshq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 7);
+	vqrdmlshq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahh_lane_s16(arg_i16, arg_i16, arg_i16x4, 0);
+	vqrdmlahh_lane_s16(arg_i16, arg_i16, arg_i16x4, 3);
+	vqrdmlahh_lane_s16(arg_i16, arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahh_lane_s16(arg_i16, arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 0);
+	vqrdmlahh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 7);
+	vqrdmlahh_laneq_s16(arg_i16, arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshh_lane_s16(arg_i16, arg_i16, arg_i16x4, 0);
+	vqrdmlshh_lane_s16(arg_i16, arg_i16, arg_i16x4, 3);
+	vqrdmlshh_lane_s16(arg_i16, arg_i16, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshh_lane_s16(arg_i16, arg_i16, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 0);
+	vqrdmlshh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 7);
+	vqrdmlshh_laneq_s16(arg_i16, arg_i16, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshh_laneq_s16(arg_i16, arg_i16, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_saturating_multiply_accumulate_by_element_s32(int32x4_t arg_i32x4, int32_t arg_i32, int32x2_t arg_i32x2) {
+	vqrdmlahs_lane_s32(arg_i32, arg_i32, arg_i32x2, 0);
+	vqrdmlahs_lane_s32(arg_i32, arg_i32, arg_i32x2, 1);
+	vqrdmlahs_lane_s32(arg_i32, arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahs_lane_s32(arg_i32, arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 0);
+	vqrdmlahs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 3);
+	vqrdmlahs_laneq_s32(arg_i32, arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshs_lane_s32(arg_i32, arg_i32, arg_i32x2, 0);
+	vqrdmlshs_lane_s32(arg_i32, arg_i32, arg_i32x2, 1);
+	vqrdmlshs_lane_s32(arg_i32, arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshs_lane_s32(arg_i32, arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 0);
+	vqrdmlshs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 3);
+	vqrdmlshs_laneq_s32(arg_i32, arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-load.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-load.c
new file mode 100644
index 00000000000000..a11535acf0b4a7
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-load.c
@@ -0,0 +1,525 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+// the majority of s8,s16,s32 and s64 variants are tested under 
+// clang/test/Sema/aarch64-neon-ranges.c
+
+void test_vector_load_s64(int64x1x4_t arg_i64x1x4, int64x2_t arg_i64x2, int64_t *arg_i64_ptr,
+						  int64x2x3_t arg_i64x2x3, int64x1x2_t arg_i64x1x2, int64x2x2_t arg_i64x2x2,
+						  int64x2x4_t arg_i64x2x4, int64x1x3_t arg_i64x1x3, int64x1_t arg_i64x1) {
+	
+	vldap1_lane_s64(arg_i64_ptr, arg_i64x1, 0);
+	vldap1_lane_s64(arg_i64_ptr, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1_lane_s64(arg_i64_ptr, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1q_lane_s64(arg_i64_ptr, arg_i64x2, 0);
+	vldap1q_lane_s64(arg_i64_ptr, arg_i64x2, 1);
+	vldap1q_lane_s64(arg_i64_ptr, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1q_lane_s64(arg_i64_ptr, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1_lane_s64(arg_i64_ptr, arg_i64x1, 0);
+	vstl1_lane_s64(arg_i64_ptr, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1_lane_s64(arg_i64_ptr, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1q_lane_s64(arg_i64_ptr, arg_i64x2, 0);
+	vstl1q_lane_s64(arg_i64_ptr, arg_i64x2, 1);
+	vstl1q_lane_s64(arg_i64_ptr, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1q_lane_s64(arg_i64_ptr, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_u8(uint8x8x2_t arg_u8x8x2, uint8x16x2_t arg_u8x16x2, uint8x8x4_t arg_u8x8x4,
+						uint8x8_t arg_u8x8, uint8x8x3_t arg_u8x8x3, uint8x16_t arg_u8x16,
+						uint8x16x4_t arg_u8x16x4, uint8_t *arg_u8_ptr, uint8x16x3_t arg_u8x16x3) {
+	vld1_lane_u8(arg_u8_ptr, arg_u8x8, 0);
+	vld1_lane_u8(arg_u8_ptr, arg_u8x8, 7);
+	vld1_lane_u8(arg_u8_ptr, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_u8(arg_u8_ptr, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_u8(arg_u8_ptr, arg_u8x16, 0);
+	vld1q_lane_u8(arg_u8_ptr, arg_u8x16, 15);
+	vld1q_lane_u8(arg_u8_ptr, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_u8(arg_u8_ptr, arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_u8(arg_u8_ptr, arg_u8x8x2, 0);
+	vld2_lane_u8(arg_u8_ptr, arg_u8x8x2, 7);
+	vld2_lane_u8(arg_u8_ptr, arg_u8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_u8(arg_u8_ptr, arg_u8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 0);
+	vld2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 15);
+	vld2q_lane_u8(arg_u8_ptr, arg_u8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_u8(arg_u8_ptr, arg_u8x8x3, 0);
+	vld3_lane_u8(arg_u8_ptr, arg_u8x8x3, 7);
+	vld3_lane_u8(arg_u8_ptr, arg_u8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_u8(arg_u8_ptr, arg_u8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 0);
+	vld3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 15);
+	vld3q_lane_u8(arg_u8_ptr, arg_u8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_u8(arg_u8_ptr, arg_u8x8x4, 0);
+	vld4_lane_u8(arg_u8_ptr, arg_u8x8x4, 7);
+	vld4_lane_u8(arg_u8_ptr, arg_u8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_u8(arg_u8_ptr, arg_u8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 0);
+	vld4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 15);
+	vld4q_lane_u8(arg_u8_ptr, arg_u8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_u16(uint16x8x2_t arg_u16x8x2, uint16x8x4_t arg_u16x8x4, uint16x4x4_t arg_u16x4x4,
+						  uint16x4x2_t arg_u16x4x2, uint16x8_t arg_u16x8, uint16_t *arg_u16_ptr,
+						  uint16x8x3_t arg_u16x8x3, uint16x4_t arg_u16x4, uint16x4x3_t arg_u16x4x3) {
+	vld1_lane_u16(arg_u16_ptr, arg_u16x4, 0);
+	vld1_lane_u16(arg_u16_ptr, arg_u16x4, 3);
+	vld1_lane_u16(arg_u16_ptr, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_u16(arg_u16_ptr, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_u16(arg_u16_ptr, arg_u16x8, 0);
+	vld1q_lane_u16(arg_u16_ptr, arg_u16x8, 7);
+	vld1q_lane_u16(arg_u16_ptr, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_u16(arg_u16_ptr, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_u16(arg_u16_ptr, arg_u16x4x2, 0);
+	vld2_lane_u16(arg_u16_ptr, arg_u16x4x2, 3);
+	vld2_lane_u16(arg_u16_ptr, arg_u16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_u16(arg_u16_ptr, arg_u16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 0);
+	vld2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 7);
+	vld2q_lane_u16(arg_u16_ptr, arg_u16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_u16(arg_u16_ptr, arg_u16x4x3, 0);
+	vld3_lane_u16(arg_u16_ptr, arg_u16x4x3, 3);
+	vld3_lane_u16(arg_u16_ptr, arg_u16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_u16(arg_u16_ptr, arg_u16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 0);
+	vld3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 7);
+	vld3q_lane_u16(arg_u16_ptr, arg_u16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_u16(arg_u16_ptr, arg_u16x4x4, 0);
+	vld4_lane_u16(arg_u16_ptr, arg_u16x4x4, 3);
+	vld4_lane_u16(arg_u16_ptr, arg_u16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_u16(arg_u16_ptr, arg_u16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 0);
+	vld4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 7);
+	vld4q_lane_u16(arg_u16_ptr, arg_u16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_u32(uint32x2x3_t arg_u32x2x3, uint32x2_t arg_u32x2, uint32x2x4_t arg_u32x2x4,
+						  uint32x4_t arg_u32x4, uint32x4x2_t arg_u32x4x2, uint32x2x2_t arg_u32x2x2,
+						  void *arg_u32_ptr, uint32x4x4_t arg_u32x4x4, uint32x4x3_t arg_u32x4x3) {
+	vld1_lane_u32(arg_u32_ptr, arg_u32x2, 0);
+	vld1_lane_u32(arg_u32_ptr, arg_u32x2, 1);
+	vld1_lane_u32(arg_u32_ptr, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_u32(arg_u32_ptr, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_u32(arg_u32_ptr, arg_u32x4, 0);
+	vld1q_lane_u32(arg_u32_ptr, arg_u32x4, 3);
+	vld1q_lane_u32(arg_u32_ptr, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_u32(arg_u32_ptr, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_u32(arg_u32_ptr, arg_u32x2x2, 0);
+	vld2_lane_u32(arg_u32_ptr, arg_u32x2x2, 1);
+	vld2_lane_u32(arg_u32_ptr, arg_u32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_u32(arg_u32_ptr, arg_u32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 0);
+	vld2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 3);
+	vld2q_lane_u32(arg_u32_ptr, arg_u32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_u32(arg_u32_ptr, arg_u32x2x3, 0);
+	vld3_lane_u32(arg_u32_ptr, arg_u32x2x3, 1);
+	vld3_lane_u32(arg_u32_ptr, arg_u32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_u32(arg_u32_ptr, arg_u32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 0);
+	vld3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 3);
+	vld3q_lane_u32(arg_u32_ptr, arg_u32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_u32(arg_u32_ptr, arg_u32x2x4, 0);
+	vld4_lane_u32(arg_u32_ptr, arg_u32x2x4, 1);
+	vld4_lane_u32(arg_u32_ptr, arg_u32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_u32(arg_u32_ptr, arg_u32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 0);
+	vld4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 3);
+	vld4q_lane_u32(arg_u32_ptr, arg_u32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_u64(uint64x2x2_t arg_u64x2x2, uint64x1x2_t arg_u64x1x2, uint64x2x3_t arg_u64x2x3,
+						  uint64x1_t arg_u64x1, uint64x1x4_t arg_u64x1x4, uint64x1x3_t arg_u64x1x3,
+						  uint64_t *arg_u64_ptr, uint64x2_t arg_u64x2, uint64x2x4_t arg_u64x2x4) {
+	vld1_lane_u64(arg_u64_ptr, arg_u64x1, 0);
+	vld1_lane_u64(arg_u64_ptr, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_u64(arg_u64_ptr, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_u64(arg_u64_ptr, arg_u64x2, 0);
+	vld1q_lane_u64(arg_u64_ptr, arg_u64x2, 1);
+	vld1q_lane_u64(arg_u64_ptr, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_u64(arg_u64_ptr, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1_lane_u64(arg_u64_ptr, arg_u64x1, 0);
+	vldap1_lane_u64(arg_u64_ptr, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1_lane_u64(arg_u64_ptr, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1q_lane_u64(arg_u64_ptr, arg_u64x2, 0);
+	vldap1q_lane_u64(arg_u64_ptr, arg_u64x2, 1);
+	vldap1q_lane_u64(arg_u64_ptr, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1q_lane_u64(arg_u64_ptr, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1_lane_u64(arg_u64_ptr, arg_u64x1, 0);
+	vstl1_lane_u64(arg_u64_ptr, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1_lane_u64(arg_u64_ptr, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1q_lane_u64(arg_u64_ptr, arg_u64x2, 0);
+	vstl1q_lane_u64(arg_u64_ptr, arg_u64x2, 1);
+	vstl1q_lane_u64(arg_u64_ptr, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1q_lane_u64(arg_u64_ptr, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_u64(arg_u64_ptr, arg_u64x1x2, 0);
+	vld2_lane_u64(arg_u64_ptr, arg_u64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_u64(arg_u64_ptr, arg_u64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 0);
+	vld2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 1);
+	vld2q_lane_u64(arg_u64_ptr, arg_u64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_u64(arg_u64_ptr, arg_u64x1x3, 0);
+	vld3_lane_u64(arg_u64_ptr, arg_u64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_u64(arg_u64_ptr, arg_u64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 0);
+	vld3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 1);
+	vld3q_lane_u64(arg_u64_ptr, arg_u64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_u64(arg_u64_ptr, arg_u64x1x4, 0);
+	vld4_lane_u64(arg_u64_ptr, arg_u64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_u64(arg_u64_ptr, arg_u64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 0);
+	vld4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 1);
+	vld4q_lane_u64(arg_u64_ptr, arg_u64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_p64(poly64_t *arg_p64_ptr, poly64x2x2_t arg_p64x2x2, poly64x1x2_t arg_p64x1x2,
+						  poly64x2x4_t arg_p64x2x4, poly64x1x3_t arg_p64x1x3, poly64x2x3_t arg_p64x2x3,
+						  poly64x1_t arg_p64x1, poly64x2_t arg_p64x2, poly64x1x4_t arg_p64x1x4) {
+	vld1_lane_p64(arg_p64_ptr, arg_p64x1, 0);
+	vld1_lane_p64(arg_p64_ptr, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_p64(arg_p64_ptr, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_p64(arg_p64_ptr, arg_p64x2, 0);
+	vld1q_lane_p64(arg_p64_ptr, arg_p64x2, 1);
+	vld1q_lane_p64(arg_p64_ptr, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_p64(arg_p64_ptr, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1_lane_p64(arg_p64_ptr, arg_p64x1, 0);
+	vldap1_lane_p64(arg_p64_ptr, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1_lane_p64(arg_p64_ptr, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1q_lane_p64(arg_p64_ptr, arg_p64x2, 0);
+	vldap1q_lane_p64(arg_p64_ptr, arg_p64x2, 1);
+	vldap1q_lane_p64(arg_p64_ptr, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1q_lane_p64(arg_p64_ptr, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1_lane_p64(arg_p64_ptr, arg_p64x1, 0);
+	vstl1_lane_p64(arg_p64_ptr, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1_lane_p64(arg_p64_ptr, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1q_lane_p64(arg_p64_ptr, arg_p64x2, 0);
+	vstl1q_lane_p64(arg_p64_ptr, arg_p64x2, 1);
+	vstl1q_lane_p64(arg_p64_ptr, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1q_lane_p64(arg_p64_ptr, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_p64(arg_p64_ptr, arg_p64x1x2, 0);
+	vld2_lane_p64(arg_p64_ptr, arg_p64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_p64(arg_p64_ptr, arg_p64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 0);
+	vld2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 1);
+	vld2q_lane_p64(arg_p64_ptr, arg_p64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_p64(arg_p64_ptr, arg_p64x1x3, 0);
+	vld3_lane_p64(arg_p64_ptr, arg_p64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_p64(arg_p64_ptr, arg_p64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 0);
+	vld3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 1);
+	vld3q_lane_p64(arg_p64_ptr, arg_p64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_p64(arg_p64_ptr, arg_p64x1x4, 0);
+	vld4_lane_p64(arg_p64_ptr, arg_p64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_p64(arg_p64_ptr, arg_p64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 0);
+	vld4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 1);
+	vld4q_lane_p64(arg_p64_ptr, arg_p64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_f16(float16_t *arg_f16_ptr, float16x8_t arg_f16x8, float16x8x2_t arg_f16x8x2,
+						  float16x8x3_t arg_f16x8x3, float16x4x4_t arg_f16x4x4, float16x8x4_t arg_f16x8x4,
+						  float16x4x2_t arg_f16x4x2, float16x4_t arg_f16x4, float16x4x3_t arg_f16x4x3) {
+	vld1_lane_f16(arg_f16_ptr, arg_f16x4, 0);
+	vld1_lane_f16(arg_f16_ptr, arg_f16x4, 3);
+	vld1_lane_f16(arg_f16_ptr, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_f16(arg_f16_ptr, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_f16(arg_f16_ptr, arg_f16x8, 0);
+	vld1q_lane_f16(arg_f16_ptr, arg_f16x8, 7);
+	vld1q_lane_f16(arg_f16_ptr, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_f16(arg_f16_ptr, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_f16(arg_f16_ptr, arg_f16x4x2, 0);
+	vld2_lane_f16(arg_f16_ptr, arg_f16x4x2, 3);
+	vld2_lane_f16(arg_f16_ptr, arg_f16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_f16(arg_f16_ptr, arg_f16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 0);
+	vld2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 7);
+	vld2q_lane_f16(arg_f16_ptr, arg_f16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_f16(arg_f16_ptr, arg_f16x4x3, 0);
+	vld3_lane_f16(arg_f16_ptr, arg_f16x4x3, 3);
+	vld3_lane_f16(arg_f16_ptr, arg_f16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_f16(arg_f16_ptr, arg_f16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 0);
+	vld3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 7);
+	vld3q_lane_f16(arg_f16_ptr, arg_f16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_f16(arg_f16_ptr, arg_f16x4x4, 0);
+	vld4_lane_f16(arg_f16_ptr, arg_f16x4x4, 3);
+	vld4_lane_f16(arg_f16_ptr, arg_f16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_f16(arg_f16_ptr, arg_f16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 0);
+	vld4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 7);
+	vld4q_lane_f16(arg_f16_ptr, arg_f16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_f32(float32_t *arg_f32_ptr, float32x4x3_t arg_f32x4x3, float32x2x4_t arg_f32x2x4,
+						  float32x4x4_t arg_f32x4x4, float32x2x3_t arg_f32x2x3, float32x2x2_t arg_f32x2x2,
+						  float32x4x2_t arg_f32x4x2, float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vld1_lane_f32(arg_f32_ptr, arg_f32x2, 0);
+	vld1_lane_f32(arg_f32_ptr, arg_f32x2, 1);
+	vld1_lane_f32(arg_f32_ptr, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_f32(arg_f32_ptr, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_f32(arg_f32_ptr, arg_f32x4, 0);
+	vld1q_lane_f32(arg_f32_ptr, arg_f32x4, 3);
+	vld1q_lane_f32(arg_f32_ptr, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_f32(arg_f32_ptr, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_f32(arg_f32_ptr, arg_f32x2x2, 0);
+	vld2_lane_f32(arg_f32_ptr, arg_f32x2x2, 1);
+	vld2_lane_f32(arg_f32_ptr, arg_f32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_f32(arg_f32_ptr, arg_f32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 0);
+	vld2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 3);
+	vld2q_lane_f32(arg_f32_ptr, arg_f32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_f32(arg_f32_ptr, arg_f32x2x3, 0);
+	vld3_lane_f32(arg_f32_ptr, arg_f32x2x3, 1);
+	vld3_lane_f32(arg_f32_ptr, arg_f32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_f32(arg_f32_ptr, arg_f32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 0);
+	vld3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 3);
+	vld3q_lane_f32(arg_f32_ptr, arg_f32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_f32(arg_f32_ptr, arg_f32x2x4, 0);
+	vld4_lane_f32(arg_f32_ptr, arg_f32x2x4, 1);
+	vld4_lane_f32(arg_f32_ptr, arg_f32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_f32(arg_f32_ptr, arg_f32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 0);
+	vld4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 3);
+	vld4q_lane_f32(arg_f32_ptr, arg_f32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_p8(poly8x16_t arg_p8x16, poly8x8x2_t arg_p8x8x2, poly8x16x4_t arg_p8x16x4,
+						 poly8_t *arg_p8_ptr, poly8x8_t arg_p8x8, poly8x8x4_t arg_p8x8x4,
+						 poly8x16x2_t arg_p8x16x2, poly8x8x3_t arg_p8x8x3, poly8x16x3_t arg_p8x16x3) {
+	vld1_lane_p8(arg_p8_ptr, arg_p8x8, 0);
+	vld1_lane_p8(arg_p8_ptr, arg_p8x8, 7);
+	vld1_lane_p8(arg_p8_ptr, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_p8(arg_p8_ptr, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_p8(arg_p8_ptr, arg_p8x16, 0);
+	vld1q_lane_p8(arg_p8_ptr, arg_p8x16, 15);
+	vld1q_lane_p8(arg_p8_ptr, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_p8(arg_p8_ptr, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_p8(arg_p8_ptr, arg_p8x8x2, 0);
+	vld2_lane_p8(arg_p8_ptr, arg_p8x8x2, 7);
+	vld2_lane_p8(arg_p8_ptr, arg_p8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_p8(arg_p8_ptr, arg_p8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 0);
+	vld2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 15);
+	vld2q_lane_p8(arg_p8_ptr, arg_p8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_p8(arg_p8_ptr, arg_p8x8x3, 0);
+	vld3_lane_p8(arg_p8_ptr, arg_p8x8x3, 7);
+	vld3_lane_p8(arg_p8_ptr, arg_p8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_p8(arg_p8_ptr, arg_p8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 0);
+	vld3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 15);
+	vld3q_lane_p8(arg_p8_ptr, arg_p8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_p8(arg_p8_ptr, arg_p8x8x4, 0);
+	vld4_lane_p8(arg_p8_ptr, arg_p8x8x4, 7);
+	vld4_lane_p8(arg_p8_ptr, arg_p8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_p8(arg_p8_ptr, arg_p8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 0);
+	vld4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 15);
+	vld4q_lane_p8(arg_p8_ptr, arg_p8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_p16(poly16x8x4_t arg_p16x8x4, poly16x8_t arg_p16x8, poly16x4x4_t arg_p16x4x4,
+						  poly16x8x3_t arg_p16x8x3, poly16_t *arg_p16_ptr, poly16x4_t arg_p16x4,
+						  poly16x8x2_t arg_p16x8x2, poly16x4x2_t arg_p16x4x2, poly16x4x3_t arg_p16x4x3) {
+	vld1_lane_p16(arg_p16_ptr, arg_p16x4, 0);
+	vld1_lane_p16(arg_p16_ptr, arg_p16x4, 3);
+	vld1_lane_p16(arg_p16_ptr, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_p16(arg_p16_ptr, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_p16(arg_p16_ptr, arg_p16x8, 0);
+	vld1q_lane_p16(arg_p16_ptr, arg_p16x8, 7);
+	vld1q_lane_p16(arg_p16_ptr, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_p16(arg_p16_ptr, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_p16(arg_p16_ptr, arg_p16x4x2, 0);
+	vld2_lane_p16(arg_p16_ptr, arg_p16x4x2, 3);
+	vld2_lane_p16(arg_p16_ptr, arg_p16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_p16(arg_p16_ptr, arg_p16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 0);
+	vld2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 7);
+	vld2q_lane_p16(arg_p16_ptr, arg_p16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_p16(arg_p16_ptr, arg_p16x4x3, 0);
+	vld3_lane_p16(arg_p16_ptr, arg_p16x4x3, 3);
+	vld3_lane_p16(arg_p16_ptr, arg_p16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_p16(arg_p16_ptr, arg_p16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 0);
+	vld3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 7);
+	vld3q_lane_p16(arg_p16_ptr, arg_p16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_p16(arg_p16_ptr, arg_p16x4x4, 0);
+	vld4_lane_p16(arg_p16_ptr, arg_p16x4x4, 3);
+	vld4_lane_p16(arg_p16_ptr, arg_p16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_p16(arg_p16_ptr, arg_p16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 0);
+	vld4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 7);
+	vld4q_lane_p16(arg_p16_ptr, arg_p16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_f64(float64x1_t arg_f64x1, float64x1x2_t arg_f64x1x2, float64_t* arg_f64_ptr,
+						  float64x2x3_t arg_f64x2x3, float64x2x4_t arg_f64x2x4, float64x2x2_t arg_f64x2x2,
+						  float64x2_t arg_f64x2, float64x1x3_t arg_f64x1x3, float64x1x4_t arg_f64x1x4) {
+	vld1_lane_f64(arg_f64_ptr, arg_f64x1, 0);
+	vld1_lane_f64(arg_f64_ptr, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_f64(arg_f64_ptr, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_f64(arg_f64_ptr, arg_f64x2, 0);
+	vld1q_lane_f64(arg_f64_ptr, arg_f64x2, 1);
+	vld1q_lane_f64(arg_f64_ptr, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_f64(arg_f64_ptr, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1_lane_f64(arg_f64_ptr, arg_f64x1, 0);
+	vldap1_lane_f64(arg_f64_ptr, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1_lane_f64(arg_f64_ptr, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vldap1q_lane_f64(arg_f64_ptr, arg_f64x2, 0);
+	vldap1q_lane_f64(arg_f64_ptr, arg_f64x2, 1);
+	vldap1q_lane_f64(arg_f64_ptr, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vldap1q_lane_f64(arg_f64_ptr, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1_lane_f64(arg_f64_ptr, arg_f64x1, 0);
+	vstl1_lane_f64(arg_f64_ptr, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1_lane_f64(arg_f64_ptr, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vstl1q_lane_f64(arg_f64_ptr, arg_f64x2, 0);
+	vstl1q_lane_f64(arg_f64_ptr, arg_f64x2, 1);
+	vstl1q_lane_f64(arg_f64_ptr, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vstl1q_lane_f64(arg_f64_ptr, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_f64(arg_f64_ptr, arg_f64x1x2, 0);
+	vld2_lane_f64(arg_f64_ptr, arg_f64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_f64(arg_f64_ptr, arg_f64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 0);
+	vld2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 1);
+	vld2q_lane_f64(arg_f64_ptr, arg_f64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_f64(arg_f64_ptr, arg_f64x1x3, 0);
+	vld3_lane_f64(arg_f64_ptr, arg_f64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_f64(arg_f64_ptr, arg_f64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 0);
+	vld3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 1);
+	vld3q_lane_f64(arg_f64_ptr, arg_f64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_f64(arg_f64_ptr, arg_f64x1x4, 0);
+	vld4_lane_f64(arg_f64_ptr, arg_f64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_f64(arg_f64_ptr, arg_f64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 0);
+	vld4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 1);
+	vld4q_lane_f64(arg_f64_ptr, arg_f64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-accumulate-by-scalar.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-accumulate-by-scalar.c
new file mode 100644
index 00000000000000..a306200c62f1c0
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-accumulate-by-scalar.c
@@ -0,0 +1,161 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vmla_lane_s32, vmlaq_lane_s32, vmla_laneq_s32, vmlaq_laneq_s32, vmlal_lane_s32
+// vmlal_high_lane_s32, vmlal_laneq_s32, vmlal_high_laneq_s32 are tested under
+// clang/test/CodeGen/arm-neon-range-checks.c
+
+void test_vector_multiply_accumulate_by_scalar_s16(int32x4_t arg_i32x4, int16x8_t arg_i16x8, int16x4_t arg_i16x4) {
+	vmla_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 0);
+	vmla_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 3);
+	vmla_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 0);
+	vmlaq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 3);
+	vmlaq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmla_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 0);
+	vmla_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 7);
+	vmla_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 0);
+	vmlaq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 7);
+	vmlaq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 0);
+	vmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 3);
+	vmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 0);
+	vmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 3);
+	vmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 0);
+	vmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 7);
+	vmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 0);
+	vmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 7);
+	vmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_accumulate_by_scalar_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8, uint32x4_t arg_u32x4) {
+	vmla_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 0);
+	vmla_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 3);
+	vmla_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 0);
+	vmlaq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 3);
+	vmlaq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmla_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 0);
+	vmla_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 7);
+	vmla_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 0);
+	vmlaq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 7);
+	vmlaq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 0);
+	vmlal_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 3);
+	vmlal_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 0);
+	vmlal_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 3);
+	vmlal_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 0);
+	vmlal_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 7);
+	vmlal_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 0);
+	vmlal_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 7);
+	vmlal_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_accumulate_by_scalar_u32(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vmla_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 0);
+	vmla_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 1);
+	vmla_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 0);
+	vmlaq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 1);
+	vmlaq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmla_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 0);
+	vmla_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 3);
+	vmla_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 0);
+	vmlaq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 3);
+	vmlaq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 0);
+	vmlal_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 1);
+	vmlal_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 0);
+	vmlal_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 1);
+	vmlal_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 0);
+	vmlal_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 3);
+	vmlal_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 0);
+	vmlal_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 3);
+	vmlal_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_accumulate_by_scalar_f32(float32x4_t arg_f32x4, float32x2_t arg_f32x2) {
+	vmla_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 0);
+	vmla_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 1);
+	vmla_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 0);
+	vmlaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 1);
+	vmlaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmla_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 0);
+	vmla_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 3);
+	vmla_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 0);
+	vmlaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 3);
+	vmlaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar-and-widen.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar-and-widen.c
new file mode 100644
index 00000000000000..1363a4ec453347
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar-and-widen.c
@@ -0,0 +1,78 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vmull_lane_s32, vmull_high_lane_s32, vmull_laneq_s32, vmull_high_laneq_s32
+// are tested under clang/test/CodeGen/arm-neon-range-checks.c
+
+void test_vector_multiply_by_scalar_and_widen_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vmull_lane_s16(arg_i16x4, arg_i16x4, 0);
+	vmull_lane_s16(arg_i16x4, arg_i16x4, 3);
+	vmull_lane_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_lane_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_lane_s16(arg_i16x8, arg_i16x4, 0);
+	vmull_high_lane_s16(arg_i16x8, arg_i16x4, 3);
+	vmull_high_lane_s16(arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_lane_s16(arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_laneq_s16(arg_i16x4, arg_i16x8, 0);
+	vmull_laneq_s16(arg_i16x4, arg_i16x8, 7);
+	vmull_laneq_s16(arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_laneq_s16(arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_laneq_s16(arg_i16x8, arg_i16x8, 0);
+	vmull_high_laneq_s16(arg_i16x8, arg_i16x8, 7);
+	vmull_high_laneq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_laneq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+
+void test_vector_multiply_by_scalar_and_widen_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vmull_lane_u16(arg_u16x4, arg_u16x4, 0);
+	vmull_lane_u16(arg_u16x4, arg_u16x4, 3);
+	vmull_lane_u16(arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_lane_u16(arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_lane_u16(arg_u16x8, arg_u16x4, 0);
+	vmull_high_lane_u16(arg_u16x8, arg_u16x4, 3);
+	vmull_high_lane_u16(arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_lane_u16(arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_laneq_u16(arg_u16x4, arg_u16x8, 0);
+	vmull_laneq_u16(arg_u16x4, arg_u16x8, 7);
+	vmull_laneq_u16(arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_laneq_u16(arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_laneq_u16(arg_u16x8, arg_u16x8, 0);
+	vmull_high_laneq_u16(arg_u16x8, arg_u16x8, 7);
+	vmull_high_laneq_u16(arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_laneq_u16(arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_and_widen_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vmull_lane_u32(arg_u32x2, arg_u32x2, 0);
+	vmull_lane_u32(arg_u32x2, arg_u32x2, 1);
+	vmull_lane_u32(arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_lane_u32(arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_lane_u32(arg_u32x4, arg_u32x2, 0);
+	vmull_high_lane_u32(arg_u32x4, arg_u32x2, 1);
+	vmull_high_lane_u32(arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_lane_u32(arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_laneq_u32(arg_u32x2, arg_u32x4, 0);
+	vmull_laneq_u32(arg_u32x2, arg_u32x4, 3);
+	vmull_laneq_u32(arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_laneq_u32(arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_laneq_u32(arg_u32x4, arg_u32x4, 0);
+	vmull_high_laneq_u32(arg_u32x4, arg_u32x4, 3);
+	vmull_high_laneq_u32(arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_laneq_u32(arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar.c
new file mode 100644
index 00000000000000..8d762f32b35af5
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar.c
@@ -0,0 +1,158 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// vmul_lane_f64, vmul_laneq_f64
+// are tested under clang/test/aarch64-neon-ranges.c
+
+void test_vector_multiply_by_scalar_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vmul_lane_s16(arg_i16x4, arg_i16x4, 0);
+	vmul_lane_s16(arg_i16x4, arg_i16x4, 3);
+	vmul_lane_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_s16(arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_s16(arg_i16x8, arg_i16x4, 0);
+	vmulq_lane_s16(arg_i16x8, arg_i16x4, 3);
+	vmulq_lane_s16(arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_s16(arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_s16(arg_i16x4, arg_i16x8, 0);
+	vmul_laneq_s16(arg_i16x4, arg_i16x8, 7);
+	vmul_laneq_s16(arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_s16(arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_s16(arg_i16x8, arg_i16x8, 0);
+	vmulq_laneq_s16(arg_i16x8, arg_i16x8, 7);
+	vmulq_laneq_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_s16(arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vmul_lane_s32(arg_i32x2, arg_i32x2, 0);
+	vmul_lane_s32(arg_i32x2, arg_i32x2, 1);
+	vmul_lane_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_s32(arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_s32(arg_i32x4, arg_i32x2, 0);
+	vmulq_lane_s32(arg_i32x4, arg_i32x2, 1);
+	vmulq_lane_s32(arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_s32(arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_s32(arg_i32x2, arg_i32x4, 0);
+	vmul_laneq_s32(arg_i32x2, arg_i32x4, 3);
+	vmul_laneq_s32(arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_s32(arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_s32(arg_i32x4, arg_i32x4, 0);
+	vmulq_laneq_s32(arg_i32x4, arg_i32x4, 3);
+	vmulq_laneq_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_s32(arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vmul_lane_u16(arg_u16x4, arg_u16x4, 0);
+	vmul_lane_u16(arg_u16x4, arg_u16x4, 3);
+	vmul_lane_u16(arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_u16(arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_u16(arg_u16x8, arg_u16x4, 0);
+	vmulq_lane_u16(arg_u16x8, arg_u16x4, 3);
+	vmulq_lane_u16(arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_u16(arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_u16(arg_u16x4, arg_u16x8, 0);
+	vmul_laneq_u16(arg_u16x4, arg_u16x8, 7);
+	vmul_laneq_u16(arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_u16(arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_u16(arg_u16x8, arg_u16x8, 0);
+	vmulq_laneq_u16(arg_u16x8, arg_u16x8, 7);
+	vmulq_laneq_u16(arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_u16(arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vmul_lane_u32(arg_u32x2, arg_u32x2, 0);
+	vmul_lane_u32(arg_u32x2, arg_u32x2, 1);
+	vmul_lane_u32(arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_u32(arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_u32(arg_u32x4, arg_u32x2, 0);
+	vmulq_lane_u32(arg_u32x4, arg_u32x2, 1);
+	vmulq_lane_u32(arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_u32(arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_u32(arg_u32x2, arg_u32x4, 0);
+	vmul_laneq_u32(arg_u32x2, arg_u32x4, 3);
+	vmul_laneq_u32(arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_u32(arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_u32(arg_u32x4, arg_u32x4, 0);
+	vmulq_laneq_u32(arg_u32x4, arg_u32x4, 3);
+	vmulq_laneq_u32(arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_u32(arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_f32(float32_t arg_f32, float32x2_t arg_f32x2, float32x4_t arg_f32x4) {
+	vmul_lane_f32(arg_f32x2, arg_f32x2, 0);
+	vmul_lane_f32(arg_f32x2, arg_f32x2, 1);
+	vmul_lane_f32(arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_f32(arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_f32(arg_f32x4, arg_f32x2, 0);
+	vmulq_lane_f32(arg_f32x4, arg_f32x2, 1);
+	vmulq_lane_f32(arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_f32(arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmuls_lane_f32(arg_f32, arg_f32x2, 0);
+	vmuls_lane_f32(arg_f32, arg_f32x2, 1);
+	vmuls_lane_f32(arg_f32, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmuls_lane_f32(arg_f32, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmul_laneq_f32(arg_f32x2, arg_f32x4, 0);
+	vmul_laneq_f32(arg_f32x2, arg_f32x4, 3);
+	vmul_laneq_f32(arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_f32(arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_f32(arg_f32x4, arg_f32x4, 0);
+	vmulq_laneq_f32(arg_f32x4, arg_f32x4, 3);
+	vmulq_laneq_f32(arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_f32(arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmuls_laneq_f32(arg_f32, arg_f32x4, 0);
+	vmuls_laneq_f32(arg_f32, arg_f32x4, 3);
+	vmuls_laneq_f32(arg_f32, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmuls_laneq_f32(arg_f32, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_by_scalar_f64(float64x2_t arg_f64x2, float64_t arg_f64, float64x1_t arg_f64x1) {
+	vmul_lane_f64(arg_f64x1, arg_f64x1, 0);
+	vmul_lane_f64(arg_f64x1, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_lane_f64(arg_f64x1, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_lane_f64(arg_f64x2, arg_f64x1, 0);
+	vmulq_lane_f64(arg_f64x2, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_lane_f64(arg_f64x2, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmuld_lane_f64(arg_f64, arg_f64x1, 0);
+	vmuld_lane_f64(arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmuld_lane_f64(arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmulq_laneq_f64(arg_f64x2, arg_f64x2, 0);
+	vmulq_laneq_f64(arg_f64x2, arg_f64x2, 1);
+	vmulq_laneq_f64(arg_f64x2, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmulq_laneq_f64(arg_f64x2, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmuld_laneq_f64(arg_f64, arg_f64x2, 0);
+	vmuld_laneq_f64(arg_f64, arg_f64x2, 1);
+	vmuld_laneq_f64(arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmuld_laneq_f64(arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-subtract-by-scalar.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-subtract-by-scalar.c
new file mode 100644
index 00000000000000..81194427b3b792
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-subtract-by-scalar.c
@@ -0,0 +1,161 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// clang/test/CodeGen/arm-neon-range-checks.c includes tests for:
+// vmls_lane_s32, vmlsq_lane_s3, vmls_laneq_s32, vmlsq_laneq_s32
+// vmlsl_lane_s32, vmlsl_high_lane_s32, vmlsl_laneq_s32, vmlsl_high_laneq_s32
+
+void test_vector_multiply_subtract_by_scalar_s16(int16x8_t arg_i16x8, int16x4_t arg_i16x4, int32x4_t arg_i32x4) {
+	vmls_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 0);
+	vmls_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 3);
+	vmls_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 0);
+	vmlsq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 3);
+	vmlsq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_lane_s16(arg_i16x8, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmls_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 0);
+	vmls_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 7);
+	vmls_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_laneq_s16(arg_i16x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 0);
+	vmlsq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 7);
+	vmlsq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_laneq_s16(arg_i16x8, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 0);
+	vmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 3);
+	vmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_lane_s16(arg_i32x4, arg_i16x4, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 0);
+	vmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 3);
+	vmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_lane_s16(arg_i32x4, arg_i16x8, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 0);
+	vmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 7);
+	vmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_laneq_s16(arg_i32x4, arg_i16x4, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 0);
+	vmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 7);
+	vmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_laneq_s16(arg_i32x4, arg_i16x8, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_subtract_by_scalar_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4, uint32x4_t arg_u32x4) {
+	vmls_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 0);
+	vmls_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 3);
+	vmls_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 0);
+	vmlsq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 3);
+	vmlsq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_lane_u16(arg_u16x8, arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmls_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 0);
+	vmls_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 7);
+	vmls_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_laneq_u16(arg_u16x4, arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 0);
+	vmlsq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 7);
+	vmlsq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_laneq_u16(arg_u16x8, arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 0);
+	vmlsl_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 3);
+	vmlsl_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_lane_u16(arg_u32x4, arg_u16x4, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 0);
+	vmlsl_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 3);
+	vmlsl_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_lane_u16(arg_u32x4, arg_u16x8, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 0);
+	vmlsl_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 7);
+	vmlsl_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_laneq_u16(arg_u32x4, arg_u16x4, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 0);
+	vmlsl_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 7);
+	vmlsl_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_laneq_u16(arg_u32x4, arg_u16x8, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_subtract_by_scalar_u32(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vmls_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 0);
+	vmls_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 1);
+	vmls_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_lane_u32(arg_u32x2, arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 0);
+	vmlsq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 1);
+	vmlsq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_lane_u32(arg_u32x4, arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmls_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 0);
+	vmls_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 3);
+	vmls_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_laneq_u32(arg_u32x2, arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 0);
+	vmlsq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 3);
+	vmlsq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_laneq_u32(arg_u32x4, arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 0);
+	vmlsl_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 1);
+	vmlsl_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_lane_u32(arg_u64x2, arg_u32x2, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 0);
+	vmlsl_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 1);
+	vmlsl_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_lane_u32(arg_u64x2, arg_u32x4, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 0);
+	vmlsl_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 3);
+	vmlsl_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_laneq_u32(arg_u64x2, arg_u32x2, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 0);
+	vmlsl_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 3);
+	vmlsl_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_laneq_u32(arg_u64x2, arg_u32x4, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_multiply_subtract_by_scalar_f32(float32x4_t arg_f32x4, float32x2_t arg_f32x2) {
+	vmls_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 0);
+	vmls_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 1);
+	vmls_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 0);
+	vmlsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 1);
+	vmlsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_lane_f32(arg_f32x4, arg_f32x4, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmls_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 0);
+	vmls_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 3);
+	vmls_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_laneq_f32(arg_f32x2, arg_f32x2, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 0);
+	vmlsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 3);
+	vmlsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_laneq_f32(arg_f32x4, arg_f32x4, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-left.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-left.c
new file mode 100644
index 00000000000000..1def72fc843d9b
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-left.c
@@ -0,0 +1,542 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// Widening left-shifts should have a range of 0..(sizeinbits(arg)-1), this range has had
+// to be weakened to 0..((sizeinbits(arg)*2)-1) due to a use of vshll_n_s16 with an
+// out-of-bounds immediate in the defintiion of vcvt_f32_bf16. As a result, the upper bounds
+// of widening left-shift intrinsics are not currently tested here.
+
+void test_vector_shift_left_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vshl_n_s8(arg_i8x8, 0);
+	vshl_n_s8(arg_i8x8, 7);
+	vshl_n_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_s8(arg_i8x16, 0);
+	vshlq_n_s8(arg_i8x16, 7);
+	vshlq_n_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_s8(arg_i8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vshl_n_s16(arg_i16x4, 0);
+	vshl_n_s16(arg_i16x4, 15);
+	vshl_n_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_s16(arg_i16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_s16(arg_i16x8, 0);
+	vshlq_n_s16(arg_i16x8, 15);
+	vshlq_n_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_s16(arg_i16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vshl_n_s32(arg_i32x2, 0);
+	vshl_n_s32(arg_i32x2, 31);
+	vshl_n_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_s32(arg_i32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_s32(arg_i32x4, 0);
+	vshlq_n_s32(arg_i32x4, 31);
+	vshlq_n_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_s32(arg_i32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_s64(int64_t arg_i64, int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vshl_n_s64(arg_i64x1, 0);
+	vshl_n_s64(arg_i64x1, 63);
+	vshl_n_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_s64(arg_i64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_s64(arg_i64x2, 0);
+	vshlq_n_s64(arg_i64x2, 63);
+	vshlq_n_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_s64(arg_i64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshld_n_s64(arg_i64, 0);
+	vshld_n_s64(arg_i64, 63);
+	vshld_n_s64(arg_i64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshld_n_s64(arg_i64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vshl_n_u8(arg_u8x8, 0);
+	vshl_n_u8(arg_u8x8, 7);
+	vshl_n_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_u8(arg_u8x16, 0);
+	vshlq_n_u8(arg_u8x16, 7);
+	vshlq_n_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_u8(arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vshl_n_u16(arg_u16x4, 0);
+	vshl_n_u16(arg_u16x4, 15);
+	vshl_n_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_u16(arg_u16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_u16(arg_u16x8, 0);
+	vshlq_n_u16(arg_u16x8, 15);
+	vshlq_n_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_u16(arg_u16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vshl_n_u32(arg_u32x2, 0);
+	vshl_n_u32(arg_u32x2, 31);
+	vshl_n_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_u32(arg_u32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_u32(arg_u32x4, 0);
+	vshlq_n_u32(arg_u32x4, 31);
+	vshlq_n_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_u32(arg_u32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_u64(uint64x1_t arg_u64x1, uint64_t arg_u64, uint64x2_t arg_u64x2) {
+	vshl_n_u64(arg_u64x1, 0);
+	vshl_n_u64(arg_u64x1, 63);
+	vshl_n_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshl_n_u64(arg_u64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshlq_n_u64(arg_u64x2, 0);
+	vshlq_n_u64(arg_u64x2, 63);
+	vshlq_n_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshlq_n_u64(arg_u64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshld_n_u64(arg_u64, 0);
+	vshld_n_u64(arg_u64, 63);
+	vshld_n_u64(arg_u64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshld_n_u64(arg_u64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16, int8_t arg_i8) {
+	vqshl_n_s8(arg_i8x8, 0);
+	vqshl_n_s8(arg_i8x8, 7);
+	vqshl_n_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_s8(arg_i8x16, 0);
+	vqshlq_n_s8(arg_i8x16, 7);
+	vqshlq_n_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_s8(arg_i8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlb_n_s8(arg_i8, 0);
+	vqshlb_n_s8(arg_i8, 7);
+	vqshlb_n_s8(arg_i8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlb_n_s8(arg_i8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlu_n_s8(arg_i8x8, 0);
+	vqshlu_n_s8(arg_i8x8, 7);
+	vqshlu_n_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlu_n_s8(arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshluq_n_s8(arg_i8x16, 0);
+	vqshluq_n_s8(arg_i8x16, 7);
+	vqshluq_n_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshluq_n_s8(arg_i8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlub_n_s8(arg_i8, 0);
+	vqshlub_n_s8(arg_i8, 7);
+	vqshlub_n_s8(arg_i8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlub_n_s8(arg_i8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_s16(int16x4_t arg_i16x4, int16_t arg_i16, int16x8_t arg_i16x8) {
+	vqshl_n_s16(arg_i16x4, 0);
+	vqshl_n_s16(arg_i16x4, 15);
+	vqshl_n_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_s16(arg_i16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_s16(arg_i16x8, 0);
+	vqshlq_n_s16(arg_i16x8, 15);
+	vqshlq_n_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_s16(arg_i16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlh_n_s16(arg_i16, 0);
+	vqshlh_n_s16(arg_i16, 15);
+	vqshlh_n_s16(arg_i16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlh_n_s16(arg_i16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlu_n_s16(arg_i16x4, 0);
+	vqshlu_n_s16(arg_i16x4, 15);
+	vqshlu_n_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlu_n_s16(arg_i16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshluq_n_s16(arg_i16x8, 0);
+	vqshluq_n_s16(arg_i16x8, 15);
+	vqshluq_n_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshluq_n_s16(arg_i16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshluh_n_s16(arg_i16, 0);
+	vqshluh_n_s16(arg_i16, 15);
+	vqshluh_n_s16(arg_i16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshluh_n_s16(arg_i16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_s32(int32x2_t arg_i32x2, int32_t arg_i32, int32x4_t arg_i32x4) {
+	vqshl_n_s32(arg_i32x2, 0);
+	vqshl_n_s32(arg_i32x2, 31);
+	vqshl_n_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_s32(arg_i32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_s32(arg_i32x4, 0);
+	vqshlq_n_s32(arg_i32x4, 31);
+	vqshlq_n_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_s32(arg_i32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshls_n_s32(arg_i32, 0);
+	vqshls_n_s32(arg_i32, 31);
+	vqshls_n_s32(arg_i32, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshls_n_s32(arg_i32, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlu_n_s32(arg_i32x2, 0);
+	vqshlu_n_s32(arg_i32x2, 31);
+	vqshlu_n_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlu_n_s32(arg_i32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshluq_n_s32(arg_i32x4, 0);
+	vqshluq_n_s32(arg_i32x4, 31);
+	vqshluq_n_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshluq_n_s32(arg_i32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlus_n_s32(arg_i32, 0);
+	vqshlus_n_s32(arg_i32, 31);
+	vqshlus_n_s32(arg_i32, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlus_n_s32(arg_i32, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_s64(int64_t arg_i64, int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vqshl_n_s64(arg_i64x1, 0);
+	vqshl_n_s64(arg_i64x1, 63);
+	vqshl_n_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_s64(arg_i64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_s64(arg_i64x2, 0);
+	vqshlq_n_s64(arg_i64x2, 63);
+	vqshlq_n_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_s64(arg_i64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshld_n_s64(arg_i64, 0);
+	vqshld_n_s64(arg_i64, 63);
+	vqshld_n_s64(arg_i64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshld_n_s64(arg_i64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlu_n_s64(arg_i64x1, 0);
+	vqshlu_n_s64(arg_i64x1, 63);
+	vqshlu_n_s64(arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlu_n_s64(arg_i64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshluq_n_s64(arg_i64x2, 0);
+	vqshluq_n_s64(arg_i64x2, 63);
+	vqshluq_n_s64(arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshluq_n_s64(arg_i64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlud_n_s64(arg_i64, 0);
+	vqshlud_n_s64(arg_i64, 63);
+	vqshlud_n_s64(arg_i64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlud_n_s64(arg_i64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_u8(uint8x8_t arg_u8x8, uint8_t arg_u8, uint8x16_t arg_u8x16) {
+	vqshl_n_u8(arg_u8x8, 0);
+	vqshl_n_u8(arg_u8x8, 7);
+	vqshl_n_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_u8(arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_u8(arg_u8x16, 0);
+	vqshlq_n_u8(arg_u8x16, 7);
+	vqshlq_n_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_u8(arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlb_n_u8(arg_u8, 0);
+	vqshlb_n_u8(arg_u8, 7);
+	vqshlb_n_u8(arg_u8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlb_n_u8(arg_u8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_u16(uint16_t arg_u16, uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vqshl_n_u16(arg_u16x4, 0);
+	vqshl_n_u16(arg_u16x4, 15);
+	vqshl_n_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_u16(arg_u16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_u16(arg_u16x8, 0);
+	vqshlq_n_u16(arg_u16x8, 15);
+	vqshlq_n_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_u16(arg_u16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlh_n_u16(arg_u16, 0);
+	vqshlh_n_u16(arg_u16, 15);
+	vqshlh_n_u16(arg_u16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlh_n_u16(arg_u16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4, uint32_t arg_u32) {
+	vqshl_n_u32(arg_u32x2, 0);
+	vqshl_n_u32(arg_u32x2, 31);
+	vqshl_n_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_u32(arg_u32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_u32(arg_u32x4, 0);
+	vqshlq_n_u32(arg_u32x4, 31);
+	vqshlq_n_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_u32(arg_u32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshls_n_u32(arg_u32, 0);
+	vqshls_n_u32(arg_u32, 31);
+	vqshls_n_u32(arg_u32, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshls_n_u32(arg_u32, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_left_u64(uint64x1_t arg_u64x1, uint64_t arg_u64, uint64x2_t arg_u64x2) {
+	vqshl_n_u64(arg_u64x1, 0);
+	vqshl_n_u64(arg_u64x1, 63);
+	vqshl_n_u64(arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshl_n_u64(arg_u64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshlq_n_u64(arg_u64x2, 0);
+	vqshlq_n_u64(arg_u64x2, 63);
+	vqshlq_n_u64(arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshlq_n_u64(arg_u64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshld_n_u64(arg_u64, 0);
+	vqshld_n_u64(arg_u64, 63);
+	vqshld_n_u64(arg_u64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshld_n_u64(arg_u64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_widen_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vshll_n_s8(arg_i8x8, 0);
+	vshll_n_s8(arg_i8x8, 7);
+	vshll_n_s8(arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+
+	vshll_high_n_s8(arg_i8x16, 0);
+	vshll_high_n_s8(arg_i8x16, 7);
+	vshll_high_n_s8(arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_widen_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vshll_n_s16(arg_i16x4, 0);
+	vshll_n_s16(arg_i16x4, 15);
+	vshll_n_s16(arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshll_high_n_s16(arg_i16x8, 0);
+	vshll_high_n_s16(arg_i16x8, 15);
+	vshll_high_n_s16(arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_widen_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vshll_n_s32(arg_i32x2, 0);
+	vshll_n_s32(arg_i32x2, 31);
+	vshll_n_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshll_high_n_s32(arg_i32x4, 0);
+	vshll_high_n_s32(arg_i32x4, 31);
+	vshll_high_n_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_widen_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vshll_n_u8(arg_u8x8, 0);
+	vshll_n_u8(arg_u8x8, 7);
+	vshll_n_u8(arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshll_high_n_u8(arg_u8x16, 0);
+	vshll_high_n_u8(arg_u8x16, 7);
+	vshll_high_n_u8(arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_widen_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vshll_n_u16(arg_u16x4, 0);
+	vshll_n_u16(arg_u16x4, 15);
+	vshll_n_u16(arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshll_high_n_u16(arg_u16x8, 0);
+	vshll_high_n_u16(arg_u16x8, 15);
+	vshll_high_n_u16(arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_widen_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vshll_n_u32(arg_u32x2, 0);
+	vshll_n_u32(arg_u32x2, 31);
+	vshll_n_u32(arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshll_high_n_u32(arg_u32x4, 0);
+	vshll_high_n_u32(arg_u32x4, 31);
+	vshll_high_n_u32(arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vector_shift_left_and_insert_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vsli_n_s8(arg_i8x8, arg_i8x8, 0);
+	vsli_n_s8(arg_i8x8, arg_i8x8, 7);
+	vsli_n_s8(arg_i8x8, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_s8(arg_i8x8, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_s8(arg_i8x16, arg_i8x16, 0);
+	vsliq_n_s8(arg_i8x16, arg_i8x16, 7);
+	vsliq_n_s8(arg_i8x16, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_s8(arg_i8x16, arg_i8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vsli_n_s16(arg_i16x4, arg_i16x4, 0);
+	vsli_n_s16(arg_i16x4, arg_i16x4, 15);
+	vsli_n_s16(arg_i16x4, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_s16(arg_i16x4, arg_i16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_s16(arg_i16x8, arg_i16x8, 0);
+	vsliq_n_s16(arg_i16x8, arg_i16x8, 15);
+	vsliq_n_s16(arg_i16x8, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_s16(arg_i16x8, arg_i16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vsli_n_s32(arg_i32x2, arg_i32x2, 0);
+	vsli_n_s32(arg_i32x2, arg_i32x2, 31);
+	vsli_n_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_s32(arg_i32x2, arg_i32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_s32(arg_i32x4, arg_i32x4, 0);
+	vsliq_n_s32(arg_i32x4, arg_i32x4, 31);
+	vsliq_n_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_s32(arg_i32x4, arg_i32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_s64(int64_t arg_i64, int64x2_t arg_i64x2, int64x1_t arg_i64x1) {
+	vsli_n_s64(arg_i64x1, arg_i64x1, 0);
+	vsli_n_s64(arg_i64x1, arg_i64x1, 63);
+	vsli_n_s64(arg_i64x1, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_s64(arg_i64x1, arg_i64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_s64(arg_i64x2, arg_i64x2, 0);
+	vsliq_n_s64(arg_i64x2, arg_i64x2, 63);
+	vsliq_n_s64(arg_i64x2, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_s64(arg_i64x2, arg_i64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vslid_n_s64(arg_i64, arg_i64, 0);
+	vslid_n_s64(arg_i64, arg_i64, 63);
+	vslid_n_s64(arg_i64, arg_i64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vslid_n_s64(arg_i64, arg_i64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vsli_n_u8(arg_u8x8, arg_u8x8, 0);
+	vsli_n_u8(arg_u8x8, arg_u8x8, 7);
+	vsli_n_u8(arg_u8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_u8(arg_u8x8, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_u8(arg_u8x16, arg_u8x16, 0);
+	vsliq_n_u8(arg_u8x16, arg_u8x16, 7);
+	vsliq_n_u8(arg_u8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_u8(arg_u8x16, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vsli_n_u16(arg_u16x4, arg_u16x4, 0);
+	vsli_n_u16(arg_u16x4, arg_u16x4, 15);
+	vsli_n_u16(arg_u16x4, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_u16(arg_u16x4, arg_u16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_u16(arg_u16x8, arg_u16x8, 0);
+	vsliq_n_u16(arg_u16x8, arg_u16x8, 15);
+	vsliq_n_u16(arg_u16x8, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_u16(arg_u16x8, arg_u16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vsli_n_u32(arg_u32x2, arg_u32x2, 0);
+	vsli_n_u32(arg_u32x2, arg_u32x2, 31);
+	vsli_n_u32(arg_u32x2, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_u32(arg_u32x2, arg_u32x2, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_u32(arg_u32x4, arg_u32x4, 0);
+	vsliq_n_u32(arg_u32x4, arg_u32x4, 31);
+	vsliq_n_u32(arg_u32x4, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_u32(arg_u32x4, arg_u32x4, 32); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_u64(uint64x1_t arg_u64x1, uint64_t arg_u64, uint64x2_t arg_u64x2) {
+	vsli_n_u64(arg_u64x1, arg_u64x1, 0);
+	vsli_n_u64(arg_u64x1, arg_u64x1, 63);
+	vsli_n_u64(arg_u64x1, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_u64(arg_u64x1, arg_u64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_u64(arg_u64x2, arg_u64x2, 0);
+	vsliq_n_u64(arg_u64x2, arg_u64x2, 63);
+	vsliq_n_u64(arg_u64x2, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_u64(arg_u64x2, arg_u64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vslid_n_u64(arg_u64, arg_u64, 0);
+	vslid_n_u64(arg_u64, arg_u64, 63);
+	vslid_n_u64(arg_u64, arg_u64, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vslid_n_u64(arg_u64, arg_u64, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_p64(poly64x2_t arg_p64x2, poly64x1_t arg_p64x1) {
+	vsli_n_p64(arg_p64x1, arg_p64x1, 0);
+	vsli_n_p64(arg_p64x1, arg_p64x1, 63);
+	vsli_n_p64(arg_p64x1, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_p64(arg_p64x1, arg_p64x1, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_p64(arg_p64x2, arg_p64x2, 0);
+	vsliq_n_p64(arg_p64x2, arg_p64x2, 63);
+	vsliq_n_p64(arg_p64x2, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_p64(arg_p64x2, arg_p64x2, 64); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vsli_n_p8(arg_p8x8, arg_p8x8, 0);
+	vsli_n_p8(arg_p8x8, arg_p8x8, 7);
+	vsli_n_p8(arg_p8x8, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_p8(arg_p8x8, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_p8(arg_p8x16, arg_p8x16, 0);
+	vsliq_n_p8(arg_p8x16, arg_p8x16, 7);
+	vsliq_n_p8(arg_p8x16, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_p8(arg_p8x16, arg_p8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_left_and_insert_p16(poly16x4_t arg_p16x4, poly16x8_t arg_p16x8) {
+	vsli_n_p16(arg_p16x4, arg_p16x4, 0);
+	vsli_n_p16(arg_p16x4, arg_p16x4, 15);
+	vsli_n_p16(arg_p16x4, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsli_n_p16(arg_p16x4, arg_p16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsliq_n_p16(arg_p16x8, arg_p16x8, 0);
+	vsliq_n_p16(arg_p16x8, arg_p16x8, 15);
+	vsliq_n_p16(arg_p16x8, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsliq_n_p16(arg_p16x8, arg_p16x8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-right.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-right.c
new file mode 100644
index 00000000000000..ad4677fe436660
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-shift-right.c
@@ -0,0 +1,1083 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+
+void test_vector_shift_right_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vshr_n_s8(arg_i8x8, 1);
+	vshr_n_s8(arg_i8x8, 8);
+	vshr_n_s8(arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_s8(arg_i8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_s8(arg_i8x16, 1);
+	vshrq_n_s8(arg_i8x16, 8);
+	vshrq_n_s8(arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_s8(arg_i8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vshr_n_s16(arg_i16x4, 1);
+	vshr_n_s16(arg_i16x4, 16);
+	vshr_n_s16(arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_s16(arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_s16(arg_i16x8, 1);
+	vshrq_n_s16(arg_i16x8, 16);
+	vshrq_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_s16(arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vshr_n_s32(arg_i32x2, 1);
+	vshr_n_s32(arg_i32x2, 32);
+	vshr_n_s32(arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_s32(arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_s32(arg_i32x4, 1);
+	vshrq_n_s32(arg_i32x4, 32);
+	vshrq_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_s32(arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_s64(int64_t arg_i64, int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vshr_n_s64(arg_i64x1, 1);
+	vshr_n_s64(arg_i64x1, 64);
+	vshr_n_s64(arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_s64(arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_s64(arg_i64x2, 1);
+	vshrq_n_s64(arg_i64x2, 64);
+	vshrq_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_s64(arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrd_n_s64(arg_i64, 1);
+	vshrd_n_s64(arg_i64, 64);
+	vshrd_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrd_n_s64(arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vshr_n_u8(arg_u8x8, 1);
+	vshr_n_u8(arg_u8x8, 8);
+	vshr_n_u8(arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_u8(arg_u8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_u8(arg_u8x16, 1);
+	vshrq_n_u8(arg_u8x16, 8);
+	vshrq_n_u8(arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_u8(arg_u8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vshr_n_u16(arg_u16x4, 1);
+	vshr_n_u16(arg_u16x4, 16);
+	vshr_n_u16(arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_u16(arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_u16(arg_u16x8, 1);
+	vshrq_n_u16(arg_u16x8, 16);
+	vshrq_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_u16(arg_u16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vshr_n_u32(arg_u32x2, 1);
+	vshr_n_u32(arg_u32x2, 32);
+	vshr_n_u32(arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_u32(arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_u32(arg_u32x4, 1);
+	vshrq_n_u32(arg_u32x4, 32);
+	vshrq_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_u32(arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vshr_n_u64(arg_u64x1, 1);
+	vshr_n_u64(arg_u64x1, 64);
+	vshr_n_u64(arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshr_n_u64(arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrq_n_u64(arg_u64x2, 1);
+	vshrq_n_u64(arg_u64x2, 64);
+	vshrq_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrq_n_u64(arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrd_n_u64(arg_u64, 1);
+	vshrd_n_u64(arg_u64, 64);
+	vshrd_n_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrd_n_u64(arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vrshr_n_s8(arg_i8x8, 1);
+	vrshr_n_s8(arg_i8x8, 8);
+	vrshr_n_s8(arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_s8(arg_i8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_s8(arg_i8x16, 1);
+	vrshrq_n_s8(arg_i8x16, 8);
+	vrshrq_n_s8(arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_s8(arg_i8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vrshr_n_s16(arg_i16x4, 1);
+	vrshr_n_s16(arg_i16x4, 16);
+	vrshr_n_s16(arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_s16(arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_s16(arg_i16x8, 1);
+	vrshrq_n_s16(arg_i16x8, 16);
+	vrshrq_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_s16(arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vrshr_n_s32(arg_i32x2, 1);
+	vrshr_n_s32(arg_i32x2, 32);
+	vrshr_n_s32(arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_s32(arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_s32(arg_i32x4, 1);
+	vrshrq_n_s32(arg_i32x4, 32);
+	vrshrq_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_s32(arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_s64(int64_t arg_i64, int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vrshr_n_s64(arg_i64x1, 1);
+	vrshr_n_s64(arg_i64x1, 64);
+	vrshr_n_s64(arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_s64(arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_s64(arg_i64x2, 1);
+	vrshrq_n_s64(arg_i64x2, 64);
+	vrshrq_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_s64(arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrd_n_s64(arg_i64, 1);
+	vrshrd_n_s64(arg_i64, 64);
+	vrshrd_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrd_n_s64(arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vrshr_n_u8(arg_u8x8, 1);
+	vrshr_n_u8(arg_u8x8, 8);
+	vrshr_n_u8(arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_u8(arg_u8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_u8(arg_u8x16, 1);
+	vrshrq_n_u8(arg_u8x16, 8);
+	vrshrq_n_u8(arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_u8(arg_u8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vrshr_n_u16(arg_u16x4, 1);
+	vrshr_n_u16(arg_u16x4, 16);
+	vrshr_n_u16(arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_u16(arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_u16(arg_u16x8, 1);
+	vrshrq_n_u16(arg_u16x8, 16);
+	vrshrq_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_u16(arg_u16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vrshr_n_u32(arg_u32x2, 1);
+	vrshr_n_u32(arg_u32x2, 32);
+	vrshr_n_u32(arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_u32(arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_u32(arg_u32x4, 1);
+	vrshrq_n_u32(arg_u32x4, 32);
+	vrshrq_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_u32(arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vrshr_n_u64(arg_u64x1, 1);
+	vrshr_n_u64(arg_u64x1, 64);
+	vrshr_n_u64(arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshr_n_u64(arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrq_n_u64(arg_u64x2, 1);
+	vrshrq_n_u64(arg_u64x2, 64);
+	vrshrq_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrq_n_u64(arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrd_n_u64(arg_u64, 1);
+	vrshrd_n_u64(arg_u64, 64);
+	vrshrd_n_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrd_n_u64(arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vsra_n_s8(arg_i8x8, arg_i8x8, 1);
+	vsra_n_s8(arg_i8x8, arg_i8x8, 8);
+	vsra_n_s8(arg_i8x8, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_s8(arg_i8x8, arg_i8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_s8(arg_i8x16, arg_i8x16, 1);
+	vsraq_n_s8(arg_i8x16, arg_i8x16, 8);
+	vsraq_n_s8(arg_i8x16, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_s8(arg_i8x16, arg_i8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vsra_n_s16(arg_i16x4, arg_i16x4, 1);
+	vsra_n_s16(arg_i16x4, arg_i16x4, 16);
+	vsra_n_s16(arg_i16x4, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_s16(arg_i16x4, arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_s16(arg_i16x8, arg_i16x8, 1);
+	vsraq_n_s16(arg_i16x8, arg_i16x8, 16);
+	vsraq_n_s16(arg_i16x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_s16(arg_i16x8, arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vsra_n_s32(arg_i32x2, arg_i32x2, 1);
+	vsra_n_s32(arg_i32x2, arg_i32x2, 32);
+	vsra_n_s32(arg_i32x2, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_s32(arg_i32x2, arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_s32(arg_i32x4, arg_i32x4, 1);
+	vsraq_n_s32(arg_i32x4, arg_i32x4, 32);
+	vsraq_n_s32(arg_i32x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_s32(arg_i32x4, arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_s64(int64_t arg_i64, int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vsra_n_s64(arg_i64x1, arg_i64x1, 1);
+	vsra_n_s64(arg_i64x1, arg_i64x1, 64);
+	vsra_n_s64(arg_i64x1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_s64(arg_i64x1, arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_s64(arg_i64x2, arg_i64x2, 1);
+	vsraq_n_s64(arg_i64x2, arg_i64x2, 64);
+	vsraq_n_s64(arg_i64x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_s64(arg_i64x2, arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsrad_n_s64(arg_i64, arg_i64, 1);
+	vsrad_n_s64(arg_i64, arg_i64, 64);
+	vsrad_n_s64(arg_i64, arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsrad_n_s64(arg_i64, arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vsra_n_u8(arg_u8x8, arg_u8x8, 1);
+	vsra_n_u8(arg_u8x8, arg_u8x8, 8);
+	vsra_n_u8(arg_u8x8, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_u8(arg_u8x8, arg_u8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_u8(arg_u8x16, arg_u8x16, 1);
+	vsraq_n_u8(arg_u8x16, arg_u8x16, 8);
+	vsraq_n_u8(arg_u8x16, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_u8(arg_u8x16, arg_u8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vsra_n_u16(arg_u16x4, arg_u16x4, 1);
+	vsra_n_u16(arg_u16x4, arg_u16x4, 16);
+	vsra_n_u16(arg_u16x4, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_u16(arg_u16x4, arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_u16(arg_u16x8, arg_u16x8, 1);
+	vsraq_n_u16(arg_u16x8, arg_u16x8, 16);
+	vsraq_n_u16(arg_u16x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_u16(arg_u16x8, arg_u16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vsra_n_u32(arg_u32x2, arg_u32x2, 1);
+	vsra_n_u32(arg_u32x2, arg_u32x2, 32);
+	vsra_n_u32(arg_u32x2, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_u32(arg_u32x2, arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_u32(arg_u32x4, arg_u32x4, 1);
+	vsraq_n_u32(arg_u32x4, arg_u32x4, 32);
+	vsraq_n_u32(arg_u32x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_u32(arg_u32x4, arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_accumulate_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vsra_n_u64(arg_u64x1, arg_u64x1, 1);
+	vsra_n_u64(arg_u64x1, arg_u64x1, 64);
+	vsra_n_u64(arg_u64x1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsra_n_u64(arg_u64x1, arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsraq_n_u64(arg_u64x2, arg_u64x2, 1);
+	vsraq_n_u64(arg_u64x2, arg_u64x2, 64);
+	vsraq_n_u64(arg_u64x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsraq_n_u64(arg_u64x2, arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsrad_n_u64(arg_u64, arg_u64, 1);
+	vsrad_n_u64(arg_u64, arg_u64, 64);
+	vsrad_n_u64(arg_u64, arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsrad_n_u64(arg_u64, arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vrsra_n_s8(arg_i8x8, arg_i8x8, 1);
+	vrsra_n_s8(arg_i8x8, arg_i8x8, 8);
+	vrsra_n_s8(arg_i8x8, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_s8(arg_i8x8, arg_i8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_s8(arg_i8x16, arg_i8x16, 1);
+	vrsraq_n_s8(arg_i8x16, arg_i8x16, 8);
+	vrsraq_n_s8(arg_i8x16, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_s8(arg_i8x16, arg_i8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vrsra_n_s16(arg_i16x4, arg_i16x4, 1);
+	vrsra_n_s16(arg_i16x4, arg_i16x4, 16);
+	vrsra_n_s16(arg_i16x4, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_s16(arg_i16x4, arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_s16(arg_i16x8, arg_i16x8, 1);
+	vrsraq_n_s16(arg_i16x8, arg_i16x8, 16);
+	vrsraq_n_s16(arg_i16x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_s16(arg_i16x8, arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vrsra_n_s32(arg_i32x2, arg_i32x2, 1);
+	vrsra_n_s32(arg_i32x2, arg_i32x2, 32);
+	vrsra_n_s32(arg_i32x2, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_s32(arg_i32x2, arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_s32(arg_i32x4, arg_i32x4, 1);
+	vrsraq_n_s32(arg_i32x4, arg_i32x4, 32);
+	vrsraq_n_s32(arg_i32x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_s32(arg_i32x4, arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_s64(int64_t arg_i64, int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vrsra_n_s64(arg_i64x1, arg_i64x1, 1);
+	vrsra_n_s64(arg_i64x1, arg_i64x1, 64);
+	vrsra_n_s64(arg_i64x1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_s64(arg_i64x1, arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_s64(arg_i64x2, arg_i64x2, 1);
+	vrsraq_n_s64(arg_i64x2, arg_i64x2, 64);
+	vrsraq_n_s64(arg_i64x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_s64(arg_i64x2, arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsrad_n_s64(arg_i64, arg_i64, 1);
+	vrsrad_n_s64(arg_i64, arg_i64, 64);
+	vrsrad_n_s64(arg_i64, arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsrad_n_s64(arg_i64, arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vrsra_n_u8(arg_u8x8, arg_u8x8, 1);
+	vrsra_n_u8(arg_u8x8, arg_u8x8, 8);
+	vrsra_n_u8(arg_u8x8, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_u8(arg_u8x8, arg_u8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_u8(arg_u8x16, arg_u8x16, 1);
+	vrsraq_n_u8(arg_u8x16, arg_u8x16, 8);
+	vrsraq_n_u8(arg_u8x16, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_u8(arg_u8x16, arg_u8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vrsra_n_u16(arg_u16x4, arg_u16x4, 1);
+	vrsra_n_u16(arg_u16x4, arg_u16x4, 16);
+	vrsra_n_u16(arg_u16x4, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_u16(arg_u16x4, arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_u16(arg_u16x8, arg_u16x8, 1);
+	vrsraq_n_u16(arg_u16x8, arg_u16x8, 16);
+	vrsraq_n_u16(arg_u16x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_u16(arg_u16x8, arg_u16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vrsra_n_u32(arg_u32x2, arg_u32x2, 1);
+	vrsra_n_u32(arg_u32x2, arg_u32x2, 32);
+	vrsra_n_u32(arg_u32x2, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_u32(arg_u32x2, arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_u32(arg_u32x4, arg_u32x4, 1);
+	vrsraq_n_u32(arg_u32x4, arg_u32x4, 32);
+	vrsraq_n_u32(arg_u32x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_u32(arg_u32x4, arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_accumulate_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vrsra_n_u64(arg_u64x1, arg_u64x1, 1);
+	vrsra_n_u64(arg_u64x1, arg_u64x1, 64);
+	vrsra_n_u64(arg_u64x1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsra_n_u64(arg_u64x1, arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsraq_n_u64(arg_u64x2, arg_u64x2, 1);
+	vrsraq_n_u64(arg_u64x2, arg_u64x2, 64);
+	vrsraq_n_u64(arg_u64x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsraq_n_u64(arg_u64x2, arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrsrad_n_u64(arg_u64, arg_u64, 1);
+	vrsrad_n_u64(arg_u64, arg_u64, 64);
+	vrsrad_n_u64(arg_u64, arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrsrad_n_u64(arg_u64, arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_s16(int16x8_t arg_i16x8, int8x8_t arg_i8x8) {
+	vshrn_n_s16(arg_i16x8, 1);
+	vshrn_n_s16(arg_i16x8, 8);
+	vshrn_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_s16(arg_i8x8, arg_i16x8, 1);
+	vshrn_high_n_s16(arg_i8x8, arg_i16x8, 8);
+	vshrn_high_n_s16(arg_i8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_s16(arg_i8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_s32(int32x4_t arg_i32x4, int16x4_t arg_i16x4) {
+	vshrn_n_s32(arg_i32x4, 1);
+	vshrn_n_s32(arg_i32x4, 16);
+	vshrn_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_s32(arg_i16x4, arg_i32x4, 1);
+	vshrn_high_n_s32(arg_i16x4, arg_i32x4, 16);
+	vshrn_high_n_s32(arg_i16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_s32(arg_i16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_s64(int32x2_t arg_i32x2, int64x2_t arg_i64x2) {
+	vshrn_n_s64(arg_i64x2, 1);
+	vshrn_n_s64(arg_i64x2, 32);
+	vshrn_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_s64(arg_i32x2, arg_i64x2, 1);
+	vshrn_high_n_s64(arg_i32x2, arg_i64x2, 32);
+	vshrn_high_n_s64(arg_i32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_s64(arg_i32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_u16(uint16x8_t arg_u16x8, uint8x8_t arg_u8x8) {
+	vshrn_n_u16(arg_u16x8, 1);
+	vshrn_n_u16(arg_u16x8, 8);
+	vshrn_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_u16(arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_u16(arg_u8x8, arg_u16x8, 1);
+	vshrn_high_n_u16(arg_u8x8, arg_u16x8, 8);
+	vshrn_high_n_u16(arg_u8x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_u16(arg_u8x8, arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_u32(uint32x4_t arg_u32x4, uint16x4_t arg_u16x4) {
+	vshrn_n_u32(arg_u32x4, 1);
+	vshrn_n_u32(arg_u32x4, 16);
+	vshrn_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_u32(arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_u32(arg_u16x4, arg_u32x4, 1);
+	vshrn_high_n_u32(arg_u16x4, arg_u32x4, 16);
+	vshrn_high_n_u32(arg_u16x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_u32(arg_u16x4, arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_narrow_u64(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2) {
+	vshrn_n_u64(arg_u64x2, 1);
+	vshrn_n_u64(arg_u64x2, 32);
+	vshrn_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_n_u64(arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vshrn_high_n_u64(arg_u32x2, arg_u64x2, 1);
+	vshrn_high_n_u64(arg_u32x2, arg_u64x2, 32);
+	vshrn_high_n_u64(arg_u32x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vshrn_high_n_u64(arg_u32x2, arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_s16(int16x8_t arg_i16x8, uint8x8_t arg_u8x8, int16_t arg_i16, int8x8_t arg_i8x8) {
+	vqshrun_n_s16(arg_i16x8, 1);
+	vqshrun_n_s16(arg_i16x8, 8);
+	vqshrun_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrunh_n_s16(arg_i16, 1);
+	vqshrunh_n_s16(arg_i16, 8);
+	vqshrunh_n_s16(arg_i16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrunh_n_s16(arg_i16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrun_high_n_s16(arg_u8x8, arg_i16x8, 1);
+	vqshrun_high_n_s16(arg_u8x8, arg_i16x8, 8);
+	vqshrun_high_n_s16(arg_u8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_high_n_s16(arg_u8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_n_s16(arg_i16x8, 1);
+	vqshrn_n_s16(arg_i16x8, 8);
+	vqshrn_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrnh_n_s16(arg_i16, 1);
+	vqshrnh_n_s16(arg_i16, 8);
+	vqshrnh_n_s16(arg_i16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrnh_n_s16(arg_i16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_s16(arg_i8x8, arg_i16x8, 1);
+	vqshrn_high_n_s16(arg_i8x8, arg_i16x8, 8);
+	vqshrn_high_n_s16(arg_i8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_s16(arg_i8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_s32(int16x4_t arg_i16x4, int32_t arg_i32, int32x4_t arg_i32x4, uint16x4_t arg_u16x4) {
+	vqshrun_n_s32(arg_i32x4, 1);
+	vqshrun_n_s32(arg_i32x4, 16);
+	vqshrun_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshruns_n_s32(arg_i32, 1);
+	vqshruns_n_s32(arg_i32, 16);
+	vqshruns_n_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshruns_n_s32(arg_i32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrun_high_n_s32(arg_u16x4, arg_i32x4, 1);
+	vqshrun_high_n_s32(arg_u16x4, arg_i32x4, 16);
+	vqshrun_high_n_s32(arg_u16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_high_n_s32(arg_u16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_n_s32(arg_i32x4, 1);
+	vqshrn_n_s32(arg_i32x4, 16);
+	vqshrn_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrns_n_s32(arg_i32, 1);
+	vqshrns_n_s32(arg_i32, 16);
+	vqshrns_n_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrns_n_s32(arg_i32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_s32(arg_i16x4, arg_i32x4, 1);
+	vqshrn_high_n_s32(arg_i16x4, arg_i32x4, 16);
+	vqshrn_high_n_s32(arg_i16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_s32(arg_i16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_s64(uint32x2_t arg_u32x2, int64x2_t arg_i64x2, int32x2_t arg_i32x2, int64_t arg_i64) {
+	vqshrun_n_s64(arg_i64x2, 1);
+	vqshrun_n_s64(arg_i64x2, 32);
+	vqshrun_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrund_n_s64(arg_i64, 1);
+	vqshrund_n_s64(arg_i64, 32);
+	vqshrund_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrund_n_s64(arg_i64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrun_high_n_s64(arg_u32x2, arg_i64x2, 1);
+	vqshrun_high_n_s64(arg_u32x2, arg_i64x2, 32);
+	vqshrun_high_n_s64(arg_u32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrun_high_n_s64(arg_u32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_n_s64(arg_i64x2, 1);
+	vqshrn_n_s64(arg_i64x2, 32);
+	vqshrn_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrnd_n_s64(arg_i64, 1);
+	vqshrnd_n_s64(arg_i64, 32);
+	vqshrnd_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrnd_n_s64(arg_i64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_s64(arg_i32x2, arg_i64x2, 1);
+	vqshrn_high_n_s64(arg_i32x2, arg_i64x2, 32);
+	vqshrn_high_n_s64(arg_i32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_s64(arg_i32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_u16(uint16x8_t arg_u16x8, uint16_t arg_u16, uint8x8_t arg_u8x8) {
+	vqshrn_n_u16(arg_u16x8, 1);
+	vqshrn_n_u16(arg_u16x8, 8);
+	vqshrn_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_u16(arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrnh_n_u16(arg_u16, 1);
+	vqshrnh_n_u16(arg_u16, 8);
+	vqshrnh_n_u16(arg_u16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrnh_n_u16(arg_u16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_u16(arg_u8x8, arg_u16x8, 1);
+	vqshrn_high_n_u16(arg_u8x8, arg_u16x8, 8);
+	vqshrn_high_n_u16(arg_u8x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_u16(arg_u8x8, arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_u32(uint32x4_t arg_u32x4, uint32_t arg_u32, uint16x4_t arg_u16x4) {
+	vqshrn_n_u32(arg_u32x4, 1);
+	vqshrn_n_u32(arg_u32x4, 16);
+	vqshrn_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_u32(arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrns_n_u32(arg_u32, 1);
+	vqshrns_n_u32(arg_u32, 16);
+	vqshrns_n_u32(arg_u32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrns_n_u32(arg_u32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_u32(arg_u16x4, arg_u32x4, 1);
+	vqshrn_high_n_u32(arg_u16x4, arg_u32x4, 16);
+	vqshrn_high_n_u32(arg_u16x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_u32(arg_u16x4, arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_shift_right_and_narrow_u64(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2, uint64_t arg_u64) {
+	vqshrn_n_u64(arg_u64x2, 1);
+	vqshrn_n_u64(arg_u64x2, 32);
+	vqshrn_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_n_u64(arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrnd_n_u64(arg_u64, 1);
+	vqshrnd_n_u64(arg_u64, 32);
+	vqshrnd_n_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrnd_n_u64(arg_u64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqshrn_high_n_u64(arg_u32x2, arg_u64x2, 1);
+	vqshrn_high_n_u64(arg_u32x2, arg_u64x2, 32);
+	vqshrn_high_n_u64(arg_u32x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqshrn_high_n_u64(arg_u32x2, arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_s16(int16x8_t arg_i16x8, uint8x8_t arg_u8x8,
+																int16_t arg_i16, int8x8_t arg_i8x8) {
+	vqrshrun_n_s16(arg_i16x8, 1);
+	vqrshrun_n_s16(arg_i16x8, 8);
+	vqrshrun_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrunh_n_s16(arg_i16, 1);
+	vqrshrunh_n_s16(arg_i16, 8);
+	vqrshrunh_n_s16(arg_i16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrunh_n_s16(arg_i16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrun_high_n_s16(arg_u8x8, arg_i16x8, 1);
+	vqrshrun_high_n_s16(arg_u8x8, arg_i16x8, 8);
+	vqrshrun_high_n_s16(arg_u8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_high_n_s16(arg_u8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_n_s16(arg_i16x8, 1);
+	vqrshrn_n_s16(arg_i16x8, 8);
+	vqrshrn_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrnh_n_s16(arg_i16, 1);
+	vqrshrnh_n_s16(arg_i16, 8);
+	vqrshrnh_n_s16(arg_i16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrnh_n_s16(arg_i16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_s16(arg_i8x8, arg_i16x8, 1);
+	vqrshrn_high_n_s16(arg_i8x8, arg_i16x8, 8);
+	vqrshrn_high_n_s16(arg_i8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_s16(arg_i8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_s32(int16x4_t arg_i16x4, int32_t arg_i32,
+																 int32x4_t arg_i32x4, uint16x4_t arg_u16x4) {
+	vqrshrun_n_s32(arg_i32x4, 1);
+	vqrshrun_n_s32(arg_i32x4, 16);
+	vqrshrun_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshruns_n_s32(arg_i32, 1);
+	vqrshruns_n_s32(arg_i32, 16);
+	vqrshruns_n_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshruns_n_s32(arg_i32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrun_high_n_s32(arg_u16x4, arg_i32x4, 1);
+	vqrshrun_high_n_s32(arg_u16x4, arg_i32x4, 16);
+	vqrshrun_high_n_s32(arg_u16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_high_n_s32(arg_u16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_n_s32(arg_i32x4, 1);
+	vqrshrn_n_s32(arg_i32x4, 16);
+	vqrshrn_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrns_n_s32(arg_i32, 1);
+	vqrshrns_n_s32(arg_i32, 16);
+	vqrshrns_n_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrns_n_s32(arg_i32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_s32(arg_i16x4, arg_i32x4, 1);
+	vqrshrn_high_n_s32(arg_i16x4, arg_i32x4, 16);
+	vqrshrn_high_n_s32(arg_i16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_s32(arg_i16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_s64(uint32x2_t arg_u32x2, int64x2_t arg_i64x2,
+																int32x2_t arg_i32x2, int64_t arg_i64) {
+	vqrshrun_n_s64(arg_i64x2, 1);
+	vqrshrun_n_s64(arg_i64x2, 32);
+	vqrshrun_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrund_n_s64(arg_i64, 1);
+	vqrshrund_n_s64(arg_i64, 32);
+	vqrshrund_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrund_n_s64(arg_i64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrun_high_n_s64(arg_u32x2, arg_i64x2, 1);
+	vqrshrun_high_n_s64(arg_u32x2, arg_i64x2, 32);
+	vqrshrun_high_n_s64(arg_u32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrun_high_n_s64(arg_u32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_n_s64(arg_i64x2, 1);
+	vqrshrn_n_s64(arg_i64x2, 32);
+	vqrshrn_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrnd_n_s64(arg_i64, 1);
+	vqrshrnd_n_s64(arg_i64, 32);
+	vqrshrnd_n_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrnd_n_s64(arg_i64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_s64(arg_i32x2, arg_i64x2, 1);
+	vqrshrn_high_n_s64(arg_i32x2, arg_i64x2, 32);
+	vqrshrn_high_n_s64(arg_i32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_s64(arg_i32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_u16(uint16x8_t arg_u16x8, uint16_t arg_u16,
+																uint8x8_t arg_u8x8) {
+	vqrshrn_n_u16(arg_u16x8, 1);
+	vqrshrn_n_u16(arg_u16x8, 8);
+	vqrshrn_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_u16(arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrnh_n_u16(arg_u16, 1);
+	vqrshrnh_n_u16(arg_u16, 8);
+	vqrshrnh_n_u16(arg_u16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrnh_n_u16(arg_u16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_u16(arg_u8x8, arg_u16x8, 1);
+	vqrshrn_high_n_u16(arg_u8x8, arg_u16x8, 8);
+	vqrshrn_high_n_u16(arg_u8x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_u16(arg_u8x8, arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_u32(uint32x4_t arg_u32x4, uint32_t arg_u32,
+																uint16x4_t arg_u16x4) {
+	vqrshrn_n_u32(arg_u32x4, 1);
+	vqrshrn_n_u32(arg_u32x4, 16);
+	vqrshrn_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_u32(arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrns_n_u32(arg_u32, 1);
+	vqrshrns_n_u32(arg_u32, 16);
+	vqrshrns_n_u32(arg_u32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrns_n_u32(arg_u32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_u32(arg_u16x4, arg_u32x4, 1);
+	vqrshrn_high_n_u32(arg_u16x4, arg_u32x4, 16);
+	vqrshrn_high_n_u32(arg_u16x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_u32(arg_u16x4, arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_saturating_rounding_shift_right_and_narrow_u64(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2,
+																uint64_t arg_u64) {
+	vqrshrn_n_u64(arg_u64x2, 1);
+	vqrshrn_n_u64(arg_u64x2, 32);
+	vqrshrn_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_n_u64(arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrnd_n_u64(arg_u64, 1);
+	vqrshrnd_n_u64(arg_u64, 32);
+	vqrshrnd_n_u64(arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrnd_n_u64(arg_u64, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrshrn_high_n_u64(arg_u32x2, arg_u64x2, 1);
+	vqrshrn_high_n_u64(arg_u32x2, arg_u64x2, 32);
+	vqrshrn_high_n_u64(arg_u32x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrshrn_high_n_u64(arg_u32x2, arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_s16(int16x8_t arg_i16x8, int8x8_t arg_i8x8) {
+	vrshrn_n_s16(arg_i16x8, 1);
+	vrshrn_n_s16(arg_i16x8, 8);
+	vrshrn_n_s16(arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_s16(arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_s16(arg_i8x8, arg_i16x8, 1);
+	vrshrn_high_n_s16(arg_i8x8, arg_i16x8, 8);
+	vrshrn_high_n_s16(arg_i8x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_s16(arg_i8x8, arg_i16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_s32(int32x4_t arg_i32x4, int16x4_t arg_i16x4) {
+	vrshrn_n_s32(arg_i32x4, 1);
+	vrshrn_n_s32(arg_i32x4, 16);
+	vrshrn_n_s32(arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_s32(arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_s32(arg_i16x4, arg_i32x4, 1);
+	vrshrn_high_n_s32(arg_i16x4, arg_i32x4, 16);
+	vrshrn_high_n_s32(arg_i16x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_s32(arg_i16x4, arg_i32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_s64(int32x2_t arg_i32x2, int64x2_t arg_i64x2) {
+	vrshrn_n_s64(arg_i64x2, 1);
+	vrshrn_n_s64(arg_i64x2, 32);
+	vrshrn_n_s64(arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_s64(arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_s64(arg_i32x2, arg_i64x2, 1);
+	vrshrn_high_n_s64(arg_i32x2, arg_i64x2, 32);
+	vrshrn_high_n_s64(arg_i32x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_s64(arg_i32x2, arg_i64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_u16(uint16x8_t arg_u16x8, uint8x8_t arg_u8x8) {
+	vrshrn_n_u16(arg_u16x8, 1);
+	vrshrn_n_u16(arg_u16x8, 8);
+	vrshrn_n_u16(arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_u16(arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_u16(arg_u8x8, arg_u16x8, 1);
+	vrshrn_high_n_u16(arg_u8x8, arg_u16x8, 8);
+	vrshrn_high_n_u16(arg_u8x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_u16(arg_u8x8, arg_u16x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_u32(uint32x4_t arg_u32x4, uint16x4_t arg_u16x4) {
+	vrshrn_n_u32(arg_u32x4, 1);
+	vrshrn_n_u32(arg_u32x4, 16);
+	vrshrn_n_u32(arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_u32(arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_u32(arg_u16x4, arg_u32x4, 1);
+	vrshrn_high_n_u32(arg_u16x4, arg_u32x4, 16);
+	vrshrn_high_n_u32(arg_u16x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_u32(arg_u16x4, arg_u32x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_rounding_shift_right_and_narrow_u64(uint64x2_t arg_u64x2, uint32x2_t arg_u32x2) {
+	vrshrn_n_u64(arg_u64x2, 1);
+	vrshrn_n_u64(arg_u64x2, 32);
+	vrshrn_n_u64(arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_n_u64(arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vrshrn_high_n_u64(arg_u32x2, arg_u64x2, 1);
+	vrshrn_high_n_u64(arg_u32x2, arg_u64x2, 32);
+	vrshrn_high_n_u64(arg_u32x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vrshrn_high_n_u64(arg_u32x2, arg_u64x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vsri_n_s8(arg_i8x8, arg_i8x8, 1);
+	vsri_n_s8(arg_i8x8, arg_i8x8, 8);
+	vsri_n_s8(arg_i8x8, arg_i8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_s8(arg_i8x8, arg_i8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_s8(arg_i8x16, arg_i8x16, 1);
+	vsriq_n_s8(arg_i8x16, arg_i8x16, 8);
+	vsriq_n_s8(arg_i8x16, arg_i8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_s8(arg_i8x16, arg_i8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
+	vsri_n_s16(arg_i16x4, arg_i16x4, 1);
+	vsri_n_s16(arg_i16x4, arg_i16x4, 16);
+	vsri_n_s16(arg_i16x4, arg_i16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_s16(arg_i16x4, arg_i16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_s16(arg_i16x8, arg_i16x8, 1);
+	vsriq_n_s16(arg_i16x8, arg_i16x8, 16);
+	vsriq_n_s16(arg_i16x8, arg_i16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_s16(arg_i16x8, arg_i16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4) {
+	vsri_n_s32(arg_i32x2, arg_i32x2, 1);
+	vsri_n_s32(arg_i32x2, arg_i32x2, 32);
+	vsri_n_s32(arg_i32x2, arg_i32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_s32(arg_i32x2, arg_i32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_s32(arg_i32x4, arg_i32x4, 1);
+	vsriq_n_s32(arg_i32x4, arg_i32x4, 32);
+	vsriq_n_s32(arg_i32x4, arg_i32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_s32(arg_i32x4, arg_i32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_s64(int64_t arg_i64, int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
+	vsri_n_s64(arg_i64x1, arg_i64x1, 1);
+	vsri_n_s64(arg_i64x1, arg_i64x1, 64);
+	vsri_n_s64(arg_i64x1, arg_i64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_s64(arg_i64x1, arg_i64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_s64(arg_i64x2, arg_i64x2, 1);
+	vsriq_n_s64(arg_i64x2, arg_i64x2, 64);
+	vsriq_n_s64(arg_i64x2, arg_i64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_s64(arg_i64x2, arg_i64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsrid_n_s64(arg_i64, arg_i64, 1);
+	vsrid_n_s64(arg_i64, arg_i64, 64);
+	vsrid_n_s64(arg_i64, arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsrid_n_s64(arg_i64, arg_i64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vsri_n_u8(arg_u8x8, arg_u8x8, 1);
+	vsri_n_u8(arg_u8x8, arg_u8x8, 8);
+	vsri_n_u8(arg_u8x8, arg_u8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_u8(arg_u8x8, arg_u8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_u8(arg_u8x16, arg_u8x16, 1);
+	vsriq_n_u8(arg_u8x16, arg_u8x16, 8);
+	vsriq_n_u8(arg_u8x16, arg_u8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_u8(arg_u8x16, arg_u8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
+	vsri_n_u16(arg_u16x4, arg_u16x4, 1);
+	vsri_n_u16(arg_u16x4, arg_u16x4, 16);
+	vsri_n_u16(arg_u16x4, arg_u16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_u16(arg_u16x4, arg_u16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_u16(arg_u16x8, arg_u16x8, 1);
+	vsriq_n_u16(arg_u16x8, arg_u16x8, 16);
+	vsriq_n_u16(arg_u16x8, arg_u16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_u16(arg_u16x8, arg_u16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_u32(uint32x2_t arg_u32x2, uint32x4_t arg_u32x4) {
+	vsri_n_u32(arg_u32x2, arg_u32x2, 1);
+	vsri_n_u32(arg_u32x2, arg_u32x2, 32);
+	vsri_n_u32(arg_u32x2, arg_u32x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_u32(arg_u32x2, arg_u32x2, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_u32(arg_u32x4, arg_u32x4, 1);
+	vsriq_n_u32(arg_u32x4, arg_u32x4, 32);
+	vsriq_n_u32(arg_u32x4, arg_u32x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_u32(arg_u32x4, arg_u32x4, 33); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_u64(uint64x2_t arg_u64x2, uint64_t arg_u64, uint64x1_t arg_u64x1) {
+	vsri_n_u64(arg_u64x1, arg_u64x1, 1);
+	vsri_n_u64(arg_u64x1, arg_u64x1, 64);
+	vsri_n_u64(arg_u64x1, arg_u64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_u64(arg_u64x1, arg_u64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_u64(arg_u64x2, arg_u64x2, 1);
+	vsriq_n_u64(arg_u64x2, arg_u64x2, 64);
+	vsriq_n_u64(arg_u64x2, arg_u64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_u64(arg_u64x2, arg_u64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsrid_n_u64(arg_u64, arg_u64, 1);
+	vsrid_n_u64(arg_u64, arg_u64, 64);
+	vsrid_n_u64(arg_u64, arg_u64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsrid_n_u64(arg_u64, arg_u64, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_p64(poly64x2_t arg_p64x2, poly64x1_t arg_p64x1) {
+	vsri_n_p64(arg_p64x1, arg_p64x1, 1);
+	vsri_n_p64(arg_p64x1, arg_p64x1, 64);
+	vsri_n_p64(arg_p64x1, arg_p64x1, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_p64(arg_p64x1, arg_p64x1, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_p64(arg_p64x2, arg_p64x2, 1);
+	vsriq_n_p64(arg_p64x2, arg_p64x2, 64);
+	vsriq_n_p64(arg_p64x2, arg_p64x2, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_p64(arg_p64x2, arg_p64x2, 65); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8) {
+	vsri_n_p8(arg_p8x8, arg_p8x8, 1);
+	vsri_n_p8(arg_p8x8, arg_p8x8, 8);
+	vsri_n_p8(arg_p8x8, arg_p8x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_p8(arg_p8x8, arg_p8x8, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_p8(arg_p8x16, arg_p8x16, 1);
+	vsriq_n_p8(arg_p8x16, arg_p8x16, 8);
+	vsriq_n_p8(arg_p8x16, arg_p8x16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_p8(arg_p8x16, arg_p8x16, 9); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_shift_right_and_insert_p16(poly16x4_t arg_p16x4, poly16x8_t arg_p16x8) {
+	vsri_n_p16(arg_p16x4, arg_p16x4, 1);
+	vsri_n_p16(arg_p16x4, arg_p16x4, 16);
+	vsri_n_p16(arg_p16x4, arg_p16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsri_n_p16(arg_p16x4, arg_p16x4, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vsriq_n_p16(arg_p16x8, arg_p16x8, 1);
+	vsriq_n_p16(arg_p16x8, arg_p16x8, 16);
+	vsriq_n_p16(arg_p16x8, arg_p16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vsriq_n_p16(arg_p16x8, arg_p16x8, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-store.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-store.c
new file mode 100644
index 00000000000000..69469fd0d09cdd
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-store.c
@@ -0,0 +1,448 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon  -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// s8, s16, s32, and s64 variants are tested under 
+// clang/test/Sema/arm-neon-ranges.c 
+
+void test_store_u8(uint8_t* arg_u8_ptr, uint8x8x2_t arg_u8x8x2, uint8x16_t arg_u8x16,
+				   uint8x16x2_t arg_u8x16x2, uint8x16x4_t arg_u8x16x4, uint8x8_t arg_u8x8,
+				   uint8x16x3_t arg_u8x16x3, uint8x8x4_t arg_u8x8x4, uint8x8x3_t arg_u8x8x3) {
+	vst1_lane_u8(arg_u8_ptr, arg_u8x8, 0);
+	vst1_lane_u8(arg_u8_ptr, arg_u8x8, 7);
+	vst1_lane_u8(arg_u8_ptr, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_u8(arg_u8_ptr, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_u8(arg_u8_ptr, arg_u8x16, 0);
+	vst1q_lane_u8(arg_u8_ptr, arg_u8x16, 15);
+	vst1q_lane_u8(arg_u8_ptr, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_u8(arg_u8_ptr, arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_u8(arg_u8_ptr, arg_u8x8x2, 0);
+	vst2_lane_u8(arg_u8_ptr, arg_u8x8x2, 7);
+	vst2_lane_u8(arg_u8_ptr, arg_u8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_u8(arg_u8_ptr, arg_u8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_u8(arg_u8_ptr, arg_u8x8x3, 0);
+	vst3_lane_u8(arg_u8_ptr, arg_u8x8x3, 7);
+	vst3_lane_u8(arg_u8_ptr, arg_u8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_u8(arg_u8_ptr, arg_u8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_u8(arg_u8_ptr, arg_u8x8x4, 0);
+	vst4_lane_u8(arg_u8_ptr, arg_u8x8x4, 7);
+	vst4_lane_u8(arg_u8_ptr, arg_u8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_u8(arg_u8_ptr, arg_u8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 0);
+	vst2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 15);
+	vst2q_lane_u8(arg_u8_ptr, arg_u8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_u8(arg_u8_ptr, arg_u8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 0);
+	vst3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 15);
+	vst3q_lane_u8(arg_u8_ptr, arg_u8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_u8(arg_u8_ptr, arg_u8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 0);
+	vst4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 15);
+	vst4q_lane_u8(arg_u8_ptr, arg_u8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_u8(arg_u8_ptr, arg_u8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_u16(uint16_t* arg_u16_ptr, uint16x8x2_t arg_u16x8x2, uint16x8x3_t arg_u16x8x3,
+					uint16x4x3_t arg_u16x4x3, uint16x4x2_t arg_u16x4x2, uint16x4x4_t arg_u16x4x4,
+					uint16x8x4_t arg_u16x8x4, uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+	vst1_lane_u16(arg_u16_ptr, arg_u16x4, 0);
+	vst1_lane_u16(arg_u16_ptr, arg_u16x4, 3);
+	vst1_lane_u16(arg_u16_ptr, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_u16(arg_u16_ptr, arg_u16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_u16(arg_u16_ptr, arg_u16x8, 0);
+	vst1q_lane_u16(arg_u16_ptr, arg_u16x8, 7);
+	vst1q_lane_u16(arg_u16_ptr, arg_u16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_u16(arg_u16_ptr, arg_u16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_u16(arg_u16_ptr, arg_u16x4x2, 0);
+	vst2_lane_u16(arg_u16_ptr, arg_u16x4x2, 3);
+	vst2_lane_u16(arg_u16_ptr, arg_u16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_u16(arg_u16_ptr, arg_u16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 0);
+	vst2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 7);
+	vst2q_lane_u16(arg_u16_ptr, arg_u16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_u16(arg_u16_ptr, arg_u16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_u16(arg_u16_ptr, arg_u16x4x3, 0);
+	vst3_lane_u16(arg_u16_ptr, arg_u16x4x3, 3);
+	vst3_lane_u16(arg_u16_ptr, arg_u16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_u16(arg_u16_ptr, arg_u16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 0);
+	vst3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 7);
+	vst3q_lane_u16(arg_u16_ptr, arg_u16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_u16(arg_u16_ptr, arg_u16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_u16(arg_u16_ptr, arg_u16x4x4, 0);
+	vst4_lane_u16(arg_u16_ptr, arg_u16x4x4, 3);
+	vst4_lane_u16(arg_u16_ptr, arg_u16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_u16(arg_u16_ptr, arg_u16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 0);
+	vst4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 7);
+	vst4q_lane_u16(arg_u16_ptr, arg_u16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_u16(arg_u16_ptr, arg_u16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_u32(uint32x2x3_t arg_u32x2x3, uint32_t* arg_u32_ptr, uint32x4x3_t arg_u32x4x3,
+					uint32x4_t arg_u32x4, uint32x2x4_t arg_u32x2x4, uint32x4x4_t arg_u32x4x4,
+					uint32x2_t arg_u32x2, uint32x2x2_t arg_u32x2x2, uint32x4x2_t arg_u32x4x2) {
+	vst1_lane_u32(arg_u32_ptr, arg_u32x2, 0);
+	vst1_lane_u32(arg_u32_ptr, arg_u32x2, 1);
+	vst1_lane_u32(arg_u32_ptr, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_u32(arg_u32_ptr, arg_u32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_u32(arg_u32_ptr, arg_u32x4, 0);
+	vst1q_lane_u32(arg_u32_ptr, arg_u32x4, 3);
+	vst1q_lane_u32(arg_u32_ptr, arg_u32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_u32(arg_u32_ptr, arg_u32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_u32(arg_u32_ptr, arg_u32x2x2, 0);
+	vst2_lane_u32(arg_u32_ptr, arg_u32x2x2, 1);
+	vst2_lane_u32(arg_u32_ptr, arg_u32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_u32(arg_u32_ptr, arg_u32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 0);
+	vst2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 3);
+	vst2q_lane_u32(arg_u32_ptr, arg_u32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_u32(arg_u32_ptr, arg_u32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_u32(arg_u32_ptr, arg_u32x2x3, 0);
+	vst3_lane_u32(arg_u32_ptr, arg_u32x2x3, 1);
+	vst3_lane_u32(arg_u32_ptr, arg_u32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_u32(arg_u32_ptr, arg_u32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 0);
+	vst3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 3);
+	vst3q_lane_u32(arg_u32_ptr, arg_u32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_u32(arg_u32_ptr, arg_u32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_u32(arg_u32_ptr, arg_u32x2x4, 0);
+	vst4_lane_u32(arg_u32_ptr, arg_u32x2x4, 1);
+	vst4_lane_u32(arg_u32_ptr, arg_u32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_u32(arg_u32_ptr, arg_u32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 0);
+	vst4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 3);
+	vst4q_lane_u32(arg_u32_ptr, arg_u32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_u32(arg_u32_ptr, arg_u32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_u64(uint64x1_t arg_u64x1, uint64x2_t arg_u64x2, uint64x2x4_t arg_u64x2x4,
+					uint64x1x4_t arg_u64x1x4, uint64_t* arg_u64_ptr, uint64x1x3_t arg_u64x1x3,
+					uint64x2x2_t arg_u64x2x2, uint64x2x3_t arg_u64x2x3, uint64x1x2_t arg_u64x1x2) {
+	vst1_lane_u64(arg_u64_ptr, arg_u64x1, 0);
+	vst1_lane_u64(arg_u64_ptr, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_u64(arg_u64_ptr, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_u64(arg_u64_ptr, arg_u64x2, 0);
+	vst1q_lane_u64(arg_u64_ptr, arg_u64x2, 1);
+	vst1q_lane_u64(arg_u64_ptr, arg_u64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_u64(arg_u64_ptr, arg_u64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_u64(arg_u64_ptr, arg_u64x1x2, 0);
+	vst2_lane_u64(arg_u64_ptr, arg_u64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_u64(arg_u64_ptr, arg_u64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 0);
+	vst2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 1);
+	vst2q_lane_u64(arg_u64_ptr, arg_u64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_u64(arg_u64_ptr, arg_u64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_u64(arg_u64_ptr, arg_u64x1x3, 0);
+	vst3_lane_u64(arg_u64_ptr, arg_u64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_u64(arg_u64_ptr, arg_u64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 0);
+	vst3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 1);
+	vst3q_lane_u64(arg_u64_ptr, arg_u64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_u64(arg_u64_ptr, arg_u64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_u64(arg_u64_ptr, arg_u64x1x4, 0);
+	vst4_lane_u64(arg_u64_ptr, arg_u64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_u64(arg_u64_ptr, arg_u64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 0);
+	vst4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 1);
+	vst4q_lane_u64(arg_u64_ptr, arg_u64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_u64(arg_u64_ptr, arg_u64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_p64(poly64_t* arg_p64_ptr, poly64x1x4_t arg_p64x1x4, poly64x2x3_t arg_p64x2x3,
+					poly64x1x2_t arg_p64x1x2, poly64x2x4_t arg_p64x2x4, poly64x2_t arg_p64x2,
+					poly64x1x3_t arg_p64x1x3, poly64x1_t arg_p64x1, poly64x2x2_t arg_p64x2x2) {
+	vst1_lane_p64(arg_p64_ptr, arg_p64x1, 0);
+	vst1_lane_p64(arg_p64_ptr, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_p64(arg_p64_ptr, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_p64(arg_p64_ptr, arg_p64x2, 0);
+	vst1q_lane_p64(arg_p64_ptr, arg_p64x2, 1);
+	vst1q_lane_p64(arg_p64_ptr, arg_p64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_p64(arg_p64_ptr, arg_p64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_p64(arg_p64_ptr, arg_p64x1x2, 0);
+	vst2_lane_p64(arg_p64_ptr, arg_p64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_p64(arg_p64_ptr, arg_p64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 0);
+	vst2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 1);
+	vst2q_lane_p64(arg_p64_ptr, arg_p64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_p64(arg_p64_ptr, arg_p64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_p64(arg_p64_ptr, arg_p64x1x3, 0);
+	vst3_lane_p64(arg_p64_ptr, arg_p64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_p64(arg_p64_ptr, arg_p64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 0);
+	vst3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 1);
+	vst3q_lane_p64(arg_p64_ptr, arg_p64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_p64(arg_p64_ptr, arg_p64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_p64(arg_p64_ptr, arg_p64x1x4, 0);
+	vst4_lane_p64(arg_p64_ptr, arg_p64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_p64(arg_p64_ptr, arg_p64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 0);
+	vst4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 1);
+	vst4q_lane_p64(arg_p64_ptr, arg_p64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_p64(arg_p64_ptr, arg_p64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_f16(float16x4x2_t arg_f16x4x2, float16x8x3_t arg_f16x8x3, float16x8_t arg_f16x8,
+					float16x4_t arg_f16x4, float16x4x3_t arg_f16x4x3, float16x8x4_t arg_f16x8x4,
+					float16x8x2_t arg_f16x8x2, float16_t* arg_f16_ptr, float16x4x4_t arg_f16x4x4) {
+	vst1_lane_f16(arg_f16_ptr, arg_f16x4, 0);
+	vst1_lane_f16(arg_f16_ptr, arg_f16x4, 3);
+	vst1_lane_f16(arg_f16_ptr, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_f16(arg_f16_ptr, arg_f16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_f16(arg_f16_ptr, arg_f16x8, 0);
+	vst1q_lane_f16(arg_f16_ptr, arg_f16x8, 7);
+	vst1q_lane_f16(arg_f16_ptr, arg_f16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_f16(arg_f16_ptr, arg_f16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_f16(arg_f16_ptr, arg_f16x4x2, 0);
+	vst2_lane_f16(arg_f16_ptr, arg_f16x4x2, 3);
+	vst2_lane_f16(arg_f16_ptr, arg_f16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_f16(arg_f16_ptr, arg_f16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 0);
+	vst2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 7);
+	vst2q_lane_f16(arg_f16_ptr, arg_f16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_f16(arg_f16_ptr, arg_f16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_f16(arg_f16_ptr, arg_f16x4x3, 0);
+	vst3_lane_f16(arg_f16_ptr, arg_f16x4x3, 3);
+	vst3_lane_f16(arg_f16_ptr, arg_f16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_f16(arg_f16_ptr, arg_f16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 0);
+	vst3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 7);
+	vst3q_lane_f16(arg_f16_ptr, arg_f16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_f16(arg_f16_ptr, arg_f16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_f16(arg_f16_ptr, arg_f16x4x4, 0);
+	vst4_lane_f16(arg_f16_ptr, arg_f16x4x4, 3);
+	vst4_lane_f16(arg_f16_ptr, arg_f16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_f16(arg_f16_ptr, arg_f16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 0);
+	vst4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 7);
+	vst4q_lane_f16(arg_f16_ptr, arg_f16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_f16(arg_f16_ptr, arg_f16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_f32(float32x2x4_t arg_f32x2x4, float32x4x4_t arg_f32x4x4, float32x4x3_t arg_f32x4x3,
+					float32x4x2_t arg_f32x4x2, float32_t* arg_f32_ptr, float32x4_t arg_f32x4,
+					float32x2_t arg_f32x2, float32x2x2_t arg_f32x2x2, float32x2x3_t arg_f32x2x3) {
+	vst1_lane_f32(arg_f32_ptr, arg_f32x2, 0);
+	vst1_lane_f32(arg_f32_ptr, arg_f32x2, 1);
+	vst1_lane_f32(arg_f32_ptr, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_f32(arg_f32_ptr, arg_f32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_f32(arg_f32_ptr, arg_f32x4, 0);
+	vst1q_lane_f32(arg_f32_ptr, arg_f32x4, 3);
+	vst1q_lane_f32(arg_f32_ptr, arg_f32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_f32(arg_f32_ptr, arg_f32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_f32(arg_f32_ptr, arg_f32x2x2, 0);
+	vst2_lane_f32(arg_f32_ptr, arg_f32x2x2, 1);
+	vst2_lane_f32(arg_f32_ptr, arg_f32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_f32(arg_f32_ptr, arg_f32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 0);
+	vst2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 3);
+	vst2q_lane_f32(arg_f32_ptr, arg_f32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_f32(arg_f32_ptr, arg_f32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_f32(arg_f32_ptr, arg_f32x2x3, 0);
+	vst3_lane_f32(arg_f32_ptr, arg_f32x2x3, 1);
+	vst3_lane_f32(arg_f32_ptr, arg_f32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_f32(arg_f32_ptr, arg_f32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 0);
+	vst3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 3);
+	vst3q_lane_f32(arg_f32_ptr, arg_f32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_f32(arg_f32_ptr, arg_f32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_f32(arg_f32_ptr, arg_f32x2x4, 0);
+	vst4_lane_f32(arg_f32_ptr, arg_f32x2x4, 1);
+	vst4_lane_f32(arg_f32_ptr, arg_f32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_f32(arg_f32_ptr, arg_f32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 0);
+	vst4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 3);
+	vst4q_lane_f32(arg_f32_ptr, arg_f32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_f32(arg_f32_ptr, arg_f32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_p8(poly8x16_t arg_p8x16, poly8x16x4_t arg_p8x16x4, poly8x8_t arg_p8x8,
+				   poly8x16x2_t arg_p8x16x2, poly8x8x4_t arg_p8x8x4, poly8x16x3_t arg_p8x16x3,
+				   poly8_t* arg_p8_ptr, poly8x8x3_t arg_p8x8x3, poly8x8x2_t arg_p8x8x2) {
+	vst1_lane_p8(arg_p8_ptr, arg_p8x8, 0);
+	vst1_lane_p8(arg_p8_ptr, arg_p8x8, 7);
+	vst1_lane_p8(arg_p8_ptr, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_p8(arg_p8_ptr, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_p8(arg_p8_ptr, arg_p8x16, 0);
+	vst1q_lane_p8(arg_p8_ptr, arg_p8x16, 15);
+	vst1q_lane_p8(arg_p8_ptr, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_p8(arg_p8_ptr, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_p8(arg_p8_ptr, arg_p8x8x2, 0);
+	vst2_lane_p8(arg_p8_ptr, arg_p8x8x2, 7);
+	vst2_lane_p8(arg_p8_ptr, arg_p8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_p8(arg_p8_ptr, arg_p8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_p8(arg_p8_ptr, arg_p8x8x3, 0);
+	vst3_lane_p8(arg_p8_ptr, arg_p8x8x3, 7);
+	vst3_lane_p8(arg_p8_ptr, arg_p8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_p8(arg_p8_ptr, arg_p8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_p8(arg_p8_ptr, arg_p8x8x4, 0);
+	vst4_lane_p8(arg_p8_ptr, arg_p8x8x4, 7);
+	vst4_lane_p8(arg_p8_ptr, arg_p8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_p8(arg_p8_ptr, arg_p8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 0);
+	vst2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 15);
+	vst2q_lane_p8(arg_p8_ptr, arg_p8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_p8(arg_p8_ptr, arg_p8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 0);
+	vst3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 15);
+	vst3q_lane_p8(arg_p8_ptr, arg_p8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_p8(arg_p8_ptr, arg_p8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 0);
+	vst4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 15);
+	vst4q_lane_p8(arg_p8_ptr, arg_p8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_p8(arg_p8_ptr, arg_p8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_p16(poly16x4_t arg_p16x4, poly16x4x3_t arg_p16x4x3, poly16x4x2_t arg_p16x4x2,
+					poly16x8x2_t arg_p16x8x2, poly16x8x3_t arg_p16x8x3, poly16_t* arg_p16_ptr,
+					poly16x8x4_t arg_p16x8x4, poly16x8_t arg_p16x8, poly16x4x4_t arg_p16x4x4) {
+	vst1_lane_p16(arg_p16_ptr, arg_p16x4, 0);
+	vst1_lane_p16(arg_p16_ptr, arg_p16x4, 3);
+	vst1_lane_p16(arg_p16_ptr, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_p16(arg_p16_ptr, arg_p16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_p16(arg_p16_ptr, arg_p16x8, 0);
+	vst1q_lane_p16(arg_p16_ptr, arg_p16x8, 7);
+	vst1q_lane_p16(arg_p16_ptr, arg_p16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_p16(arg_p16_ptr, arg_p16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_p16(arg_p16_ptr, arg_p16x4x2, 0);
+	vst2_lane_p16(arg_p16_ptr, arg_p16x4x2, 3);
+	vst2_lane_p16(arg_p16_ptr, arg_p16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_p16(arg_p16_ptr, arg_p16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 0);
+	vst2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 7);
+	vst2q_lane_p16(arg_p16_ptr, arg_p16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_p16(arg_p16_ptr, arg_p16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_p16(arg_p16_ptr, arg_p16x4x3, 0);
+	vst3_lane_p16(arg_p16_ptr, arg_p16x4x3, 3);
+	vst3_lane_p16(arg_p16_ptr, arg_p16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_p16(arg_p16_ptr, arg_p16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 0);
+	vst3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 7);
+	vst3q_lane_p16(arg_p16_ptr, arg_p16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_p16(arg_p16_ptr, arg_p16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_p16(arg_p16_ptr, arg_p16x4x4, 0);
+	vst4_lane_p16(arg_p16_ptr, arg_p16x4x4, 3);
+	vst4_lane_p16(arg_p16_ptr, arg_p16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_p16(arg_p16_ptr, arg_p16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 0);
+	vst4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 7);
+	vst4q_lane_p16(arg_p16_ptr, arg_p16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_p16(arg_p16_ptr, arg_p16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_store_f64(float64x1x2_t arg_f64x1x2, float64x1_t arg_f64x1, float64x2x2_t arg_f64x2x2,
+					float64x1x3_t arg_f64x1x3, float64x2x3_t arg_f64x2x3, float64x2_t arg_f64x2,
+					float64_t* arg_f64_ptr, float64x1x4_t arg_f64x1x4, float64x2x4_t arg_f64x2x4) {
+	vst1_lane_f64(arg_f64_ptr, arg_f64x1, 0);
+	vst1_lane_f64(arg_f64_ptr, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_f64(arg_f64_ptr, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_f64(arg_f64_ptr, arg_f64x2, 0);
+	vst1q_lane_f64(arg_f64_ptr, arg_f64x2, 1);
+	vst1q_lane_f64(arg_f64_ptr, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_f64(arg_f64_ptr, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_f64(arg_f64_ptr, arg_f64x1x2, 0);
+	vst2_lane_f64(arg_f64_ptr, arg_f64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_f64(arg_f64_ptr, arg_f64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	// Differs from current innacurate ACLE spec -----
+	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 0);
+	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 1);
+	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	// ----------------------------
+	
+	vst3_lane_f64(arg_f64_ptr, arg_f64x1x3, 0);
+	vst3_lane_f64(arg_f64_ptr, arg_f64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_f64(arg_f64_ptr, arg_f64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 0);
+	vst3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 1);
+	vst3q_lane_f64(arg_f64_ptr, arg_f64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_f64(arg_f64_ptr, arg_f64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_f64(arg_f64_ptr, arg_f64x1x4, 0);
+	vst4_lane_f64(arg_f64_ptr, arg_f64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_f64(arg_f64_ptr, arg_f64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 0);
+	vst4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 1);
+	vst4q_lane_f64(arg_f64_ptr, arg_f64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_f64(arg_f64_ptr, arg_f64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+

>From 630cde553051d55cb68df37af3d14f7af202d6bb Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 21 Aug 2024 10:23:39 +0000
Subject: [PATCH 07/17] Remove redundant immrange flags from NEON instructions

- Changes
	- clang/include/clang/Basic/arm_immcheck_incl.td
	- clang/include/clang/Basic/arm_neon.td
		- Restore immediate range of vcmla{_ROT}_laneq_f16 intrinsics to [0..3]
	- clang/include/clang/Basic/arm_fp16.td
	- clang/include/clang/Basic/arm_neon_incl.td
	- clang/utils/TableGen/NeonEmitter.cpp
		- The flags 'isVXAR', 'isVCVT_N', and 'isLaneQ' erve no justifiable purpose
	          following the work of this patch, and so are removed from NEON instructions.

	- clang/include/clang/Basic/arm_immcheck_incl.td
	- clang/utils/TableGen/SveEmitter.cpp
		- The fields of the ImmCheck tablegen class are also renamed to better explain
		  their purposes.
		- The description of ImmCheckTypes is updated to reflect that we are now sharing
		  them with NEON.
	- clang/test/CodeGen/aarch64-neon-vcmla.c
		- Revert file main following resolution of vcmla{_ROT}_laneq_f16 range
---
 clang/include/clang/Basic/arm_fp16.td         |   5 +-
 .../include/clang/Basic/arm_immcheck_incl.td  |  12 +-
 clang/include/clang/Basic/arm_neon.td         | 271 ++------
 clang/include/clang/Basic/arm_neon_incl.td    |   6 -
 clang/test/CodeGen/aarch64-neon-vcmla.c       | 624 +++++++-----------
 clang/utils/TableGen/NeonEmitter.cpp          |   9 +-
 clang/utils/TableGen/SveEmitter.cpp           |  14 +-
 7 files changed, 323 insertions(+), 618 deletions(-)

diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td
index 81d257fc73033e..ed26e84af075ed 100644
--- a/clang/include/clang/Basic/arm_fp16.td
+++ b/clang/include/clang/Basic/arm_fp16.td
@@ -76,12 +76,13 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def SCALAR_FCVTPUH  : SInst<"vcvtp_u16", "(1U)1", "Sh">;
   def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "(1U>)1", "Sh">;
   def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "(1U>>)1", "Sh">;
-  let isVCVT_N = 1, ImmChecks = [ImmCheck<1, ImmCheck1_16>] in {
+  let ImmChecks = [ImmCheck<1, ImmCheck1_16>] in {
     def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "(1F)(1!)I", "sUs">;
     def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "(1F<)(1!)I", "iUi">;
     def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "(1F<<)(1!)I", "lUl">;
-    def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "(1S)1I", "Sh">;
   }
+    def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "(1S)1I", "Sh",
+                                [ImmCheck<1, ImmCheckCvt, 0>]>;
     def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "(1S>)1I", "Sh",
                                 [ImmCheck<1, ImmCheckCvt, 0>]>;
     def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "(1S>>)1I", "Sh",
diff --git a/clang/include/clang/Basic/arm_immcheck_incl.td b/clang/include/clang/Basic/arm_immcheck_incl.td
index 88440532799839..c6a7ca26855496 100644
--- a/clang/include/clang/Basic/arm_immcheck_incl.td
+++ b/clang/include/clang/Basic/arm_immcheck_incl.td
@@ -10,10 +10,10 @@ def ImmCheckShiftRight          : ImmCheckType<3>;  // 1..sizeinbits(elt)
 def ImmCheckShiftRightNarrow    : ImmCheckType<4>;  // 1..sizeinbits(elt)/2
 def ImmCheckShiftLeft           : ImmCheckType<5>;  // 0..(sizeinbits(elt) - 1)
 def ImmCheck0_7                 : ImmCheckType<6>;  // 0..7
-def ImmCheckLaneIndex           : ImmCheckType<7>;  // 0..(128/(1*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndex           : ImmCheckType<7>;  // 0..(sizeinbits(vec)/(sizeinbits(elt)) - 1)
 def ImmCheckCvt                 : ImmCheckType<8>;  // 1..sizeinbits(elt) (same as ShiftRight)
-def ImmCheckLaneIndexCompRotate : ImmCheckType<9>;  // 0..(128/(2*sizeinbits(elt)) - 1)
-def ImmCheckLaneIndexDot        : ImmCheckType<10>; // 0..(128/(4*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexCompRotate : ImmCheckType<9>;  // 0..(sizeinbits(vec)/(2*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexDot        : ImmCheckType<10>; // 0..(sizeinbits(vec)/(4*sizeinbits(elt)) - 1)
 def ImmCheckComplexRot90_270    : ImmCheckType<11>; // [90,270]
 def ImmCheckComplexRotAll90     : ImmCheckType<12>; // [0, 90, 180,270]
 def ImmCheck0_13                : ImmCheckType<13>; // 0..13
@@ -31,9 +31,9 @@ def ImmCheck1_32                : ImmCheckType<24>; // 1..32
 def ImmCheck1_64                : ImmCheckType<25>; // 1..64
 def ImmCheck0_63                : ImmCheckType<26>; // 0..63
 
-class ImmCheck<int arg, ImmCheckType kind, int typeArg = -1> {
-  int Arg = arg;
+class ImmCheck<int immArgIdx, ImmCheckType kind, int typeArgIdx = -1> {
+  int ImmArgIdx = immArgIdx;
   // The index of the argument whose type should be referred to when validating this immedaite.
-  int TypeContextArg = typeArg;
+  int TypeContextArgIdx = typeArgIdx;
   ImmCheckType Kind = kind;
 }
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 2752e540207114..7e5b965fbe4b0c 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -288,16 +288,13 @@ def SPLAT  : WInst<"splat_lane", ".(!q)I",
                     [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
                    "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl", 
-                   [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
-  let isLaneQ = 1;
-}
+                   [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+
 let TargetGuard = "bf16,neon" in {
   def SPLAT_BF  : WInst<"splat_lane", ".(!q)I", "bQb", 
                       [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
   def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb", 
-                      [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
-    let isLaneQ = 1;
-  }
+                      [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -612,14 +609,12 @@ let ArchGuard = "(__ARM_FP & 2)" in {
 def VCVT_S32     : SInst<"vcvt_s32", "S.",  "fQf">;
 def VCVT_U32     : SInst<"vcvt_u32", "U.",  "fQf">;
 def VCVT_F32     : SInst<"vcvt_f32", "F(.!)",  "iUiQiQUi">;
-let isVCVT_N = 1 in {
 def VCVT_N_S32   : SInst<"vcvt_n_s32", "S.I", "fQf", 
                         [ImmCheck<1, ImmCheck1_32>]>;
 def VCVT_N_U32   : SInst<"vcvt_n_u32", "U.I", "fQf", 
                         [ImmCheck<1, ImmCheck1_32>]>;
 def VCVT_N_F32   : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi", 
                         [ImmCheck<1, ImmCheck1_32>]>;
-}
 
 def VMOVN        : IInst<"vmovn", "<Q",  "silUsUiUl">;
 def VMOVL        : SInst<"vmovl", "(>Q).",  "csiUcUsUi">;
@@ -990,14 +985,12 @@ def QRSHRN_HIGH_N  : SOpInst<"vqrshrn_high_n", "<(<q).I",
 // Converting vectors
 def VMOVL_HIGH   : SOpInst<"vmovl_high", ">.", "HcHsHiHUcHUsHUi", OP_MOVL_HI>;
 
-let isVCVT_N = 1 in {
 def CVTF_N_F64   : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl", 
                         [ImmCheck<1, ImmCheck1_64>]>;
 def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd", 
                         [ImmCheck<1, ImmCheck1_64>]>;
 def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd", 
                         [ImmCheck<1, ImmCheck1_64>]>;
-}
 
 ////////////////////////////////////////////////////////////////////////////////
 // 3VDiff class using high 64-bit in operands
@@ -1044,22 +1037,16 @@ def COPY_LANE : IOpInst<"vcopy_lane", "..I.I",
 def COPYQ_LANE : IOpInst<"vcopy_lane", "..IqI",
                         "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>;
 def COPY_LANEQ : IOpInst<"vcopy_laneq", "..IQI",
-                     "csilPcPsPlUcUsUiUlfd", OP_COPY_LN> {
-  let isLaneQ = 1;
-}
+                     "csilPcPsPlUcUsUiUlfd", OP_COPY_LN>;
 def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "..I.I",
-                     "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN> {
-  let isLaneQ = 1;
-}
+                     "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Set all lanes to same value
 def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "dQdPlQPl", OP_DUP_LN>;
 def VDUP_LANE2: WOpInst<"vdup_laneq", ".QI",
                   "csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
-                        OP_DUP_LN> {
-  let isLaneQ = 1;
-}
+                        OP_DUP_LN>;
 def DUP_N   : WOpInst<"vdup_n", ".1", "dQdPlQPl", OP_DUP>;
 def MOV_N   : WOpInst<"vmov_n", ".1", "dQdPlQPl", OP_DUP>;
 
@@ -1075,62 +1062,36 @@ def CREATE : NoTestOpInst<"vcreate", ".(IU>)", "dPl", OP_CAST> {
 ////////////////////////////////////////////////////////////////////////////////
 
 def VMLA_LANEQ   : IOpInst<"vmla_laneq", "...QI",
-                           "siUsUifQsQiQUsQUiQf", OP_MLA_LN> {
-  let isLaneQ = 1;
-}
+                           "siUsUifQsQiQUsQUiQf", OP_MLA_LN>;
 def VMLS_LANEQ   : IOpInst<"vmls_laneq", "...QI",
-                           "siUsUifQsQiQUsQUiQf", OP_MLS_LN> {
-  let isLaneQ = 1;
-}
-
+                           "siUsUifQsQiQUsQUiQf", OP_MLS_LN>;
 def VFMA_LANE    : IInst<"vfma_lane", "...qI", "fdQfQd", 
                         [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 def VFMA_LANEQ   : IInst<"vfma_laneq", "...QI", "fdQfQd", 
-                        [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
-  let isLaneQ = 1;
-}
+                        [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 def VFMS_LANE    : IOpInst<"vfms_lane", "...qI", "fdQfQd", OP_FMS_LN>;
-def VFMS_LANEQ   : IOpInst<"vfms_laneq", "...QI", "fdQfQd", OP_FMS_LNQ> {
-  let isLaneQ = 1;
-}
+def VFMS_LANEQ   : IOpInst<"vfms_laneq", "...QI", "fdQfQd", OP_FMS_LNQ>;
 
-def VMLAL_LANEQ  : SOpInst<"vmlal_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLAL_LN> {
-  let isLaneQ = 1;
-}
+def VMLAL_LANEQ  : SOpInst<"vmlal_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLAL_LN>;
 def VMLAL_HIGH_LANE   : SOpInst<"vmlal_high_lane", "(>Q)(>Q)Q.I", "siUsUi",
                                 OP_MLALHi_LN>;
 def VMLAL_HIGH_LANEQ  : SOpInst<"vmlal_high_laneq", "(>Q)(>Q)QQI", "siUsUi",
-                                OP_MLALHi_LN> {
-  let isLaneQ = 1;
-}
-def VMLSL_LANEQ  : SOpInst<"vmlsl_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLSL_LN> {
-  let isLaneQ = 1;
-}
+                                OP_MLALHi_LN>;
+def VMLSL_LANEQ  : SOpInst<"vmlsl_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLSL_LN>;
 def VMLSL_HIGH_LANE   : SOpInst<"vmlsl_high_lane", "(>Q)(>Q)Q.I", "siUsUi",
                                 OP_MLSLHi_LN>;
 def VMLSL_HIGH_LANEQ  : SOpInst<"vmlsl_high_laneq", "(>Q)(>Q)QQI", "siUsUi",
-                                OP_MLSLHi_LN> {
-  let isLaneQ = 1;
-}
-
-def VQDMLAL_LANEQ  : SOpInst<"vqdmlal_laneq", "(>Q)(>Q).QI", "si", OP_QDMLAL_LN> {
-  let isLaneQ = 1;
-}
+                                OP_MLSLHi_LN>;
+def VQDMLAL_LANEQ  : SOpInst<"vqdmlal_laneq", "(>Q)(>Q).QI", "si", OP_QDMLAL_LN>;
 def VQDMLAL_HIGH_LANE   : SOpInst<"vqdmlal_high_lane", "(>Q)(>Q)Q.I", "si",
                                 OP_QDMLALHi_LN>;
 def VQDMLAL_HIGH_LANEQ  : SOpInst<"vqdmlal_high_laneq", "(>Q)(>Q)QQI", "si",
-                                OP_QDMLALHi_LN> {
-  let isLaneQ = 1;
-}
-def VQDMLSL_LANEQ  : SOpInst<"vqdmlsl_laneq", "(>Q)(>Q).QI", "si", OP_QDMLSL_LN> {
-  let isLaneQ = 1;
-}
+                                OP_QDMLALHi_LN>;
+def VQDMLSL_LANEQ  : SOpInst<"vqdmlsl_laneq", "(>Q)(>Q).QI", "si", OP_QDMLSL_LN>;
 def VQDMLSL_HIGH_LANE   : SOpInst<"vqdmlsl_high_lane", "(>Q)(>Q)Q.I", "si",
                                 OP_QDMLSLHi_LN>;
 def VQDMLSL_HIGH_LANEQ  : SOpInst<"vqdmlsl_high_laneq", "(>Q)(>Q)QQI", "si",
-                                OP_QDMLSLHi_LN> {
-  let isLaneQ = 1;
-}
+                                OP_QDMLSLHi_LN>;
 
 // Newly add double parameter for vmul_lane in aarch64
 // Note: d type is handled by SCALAR_VMUL_LANE
@@ -1138,50 +1099,31 @@ def VMUL_LANE_A64 : IOpInst<"vmul_lane", "..qI", "Qd", OP_MUL_LN>;
 
 // Note: d type is handled by SCALAR_VMUL_LANEQ
 def VMUL_LANEQ   : IOpInst<"vmul_laneq", "..QI",
-                           "sifUsUiQsQiQUsQUiQfQd", OP_MUL_LN> {
-  let isLaneQ = 1;
-}
-def VMULL_LANEQ  : SOpInst<"vmull_laneq", "(>Q).QI", "siUsUi", OP_MULL_LN> {
-  let isLaneQ = 1;
-}
+                           "sifUsUiQsQiQUsQUiQfQd", OP_MUL_LN>;
+def VMULL_LANEQ  : SOpInst<"vmull_laneq", "(>Q).QI", "siUsUi", OP_MULL_LN>;
 def VMULL_HIGH_LANE   : SOpInst<"vmull_high_lane", "(>Q)Q.I", "siUsUi",
                                 OP_MULLHi_LN>;
 def VMULL_HIGH_LANEQ  : SOpInst<"vmull_high_laneq", "(>Q)QQI", "siUsUi",
-                                OP_MULLHi_LN> {
-  let isLaneQ = 1;
-}
-
-def VQDMULL_LANEQ  : SOpInst<"vqdmull_laneq", "(>Q).QI", "si", OP_QDMULL_LN> {
-  let isLaneQ = 1;
-}
+                                OP_MULLHi_LN>;
+def VQDMULL_LANEQ  : SOpInst<"vqdmull_laneq", "(>Q).QI", "si", OP_QDMULL_LN>;
 def VQDMULL_HIGH_LANE   : SOpInst<"vqdmull_high_lane", "(>Q)Q.I", "si",
                                   OP_QDMULLHi_LN>;
 def VQDMULL_HIGH_LANEQ  : SOpInst<"vqdmull_high_laneq", "(>Q)QQI", "si",
-                                  OP_QDMULLHi_LN> {
-  let isLaneQ = 1;
-}
-
-let isLaneQ = 1 in {
+                                  OP_QDMULLHi_LN>;
 def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi", 
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi", 
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-}
+
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a,neon" in {
-def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN> {
-  let isLaneQ = 1;
-}
-def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", OP_QRDMLSH_LN> {
-  let isLaneQ = 1;
-}
+def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN>;
+def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", OP_QRDMLSH_LN>;
 } // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a"
 
 // Note: d type implemented by SCALAR_VMULX_LANE
 def VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "fQfQd", OP_MULX_LN>;
 // Note: d type is implemented by SCALAR_VMULX_LANEQ
-def VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "fQfQd", OP_MULX_LN> {
-  let isLaneQ = 1;
-}
+def VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "fQfQd", OP_MULX_LN>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Across vectors class
@@ -1224,11 +1166,8 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "s
 def BCAX : SInst<"vbcax", "....", "QUcQUsQUiQUlQcQsQiQl">;
 def EOR3 : SInst<"veor3", "....", "QUcQUsQUiQUlQcQsQiQl">;
 def RAX1 : SInst<"vrax1", "...", "QUl">;
-
-let isVXAR = 1 in {
 def XAR :  SInst<"vxar", "...I", "QUl", [ImmCheck<2, ImmCheck0_63>]>;
 }
-}
 
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sha3,neon" in {
 def SHA512SU0 : SInst<"vsha512su0", "...", "QUl">;
@@ -1658,15 +1597,11 @@ def SCALAR_UQXTN : SInst<"vqmovn", "(1<)1", "SUsSUiSUl">;
 
 // Scalar Floating Point  multiply (scalar, by element)
 def SCALAR_FMUL_LANE : IOpInst<"vmul_lane", "11.I", "SfSd", OP_SCALAR_MUL_LN>;
-def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "11QI", "SfSd", OP_SCALAR_MUL_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "11QI", "SfSd", OP_SCALAR_MUL_LN>;
 
 // Scalar Floating Point  multiply extended (scalar, by element)
 def SCALAR_FMULX_LANE : IOpInst<"vmulx_lane", "11.I", "SfSd", OP_SCALAR_MULX_LN>;
-def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_LN>;
 
 def SCALAR_VMUL_N : IInst<"vmul_n", "..1", "d">;
 
@@ -1676,86 +1611,58 @@ def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d",
 
 // VMUL_LANEQ d type implemented using scalar mul lane
 def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d", 
-                              [ImmCheck<2, ImmCheckLaneIndex, 1>]> {
-  let isLaneQ = 1;
-}
-
+                              [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 // VMULX_LANE d type implemented using scalar vmulx_lane
 def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "d", OP_SCALAR_VMULX_LN>;
 
 // VMULX_LANEQ d type implemented using scalar vmulx_laneq
-def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ> {
-  let isLaneQ = 1;
-}
-
+def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ>;
 // Scalar Floating Point fused multiply-add (scalar, by element)
 def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd", 
                             [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd", 
-                            [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
-  let isLaneQ = 1;
-}
+                            [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
 // Scalar Floating Point fused multiply-subtract (scalar, by element)
 def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "111.I", "SfSd", OP_FMS_LN>;
-def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "111QI", "SfSd", OP_FMS_LNQ> {
-  let isLaneQ = 1;
-}
+def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "111QI", "SfSd", OP_FMS_LNQ>;
 
 // Signed Saturating Doubling Multiply Long (scalar by element)
 def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "(1>)1.I", "SsSi", OP_SCALAR_QDMULL_LN>;
-def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR_QDMULL_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR_QDMULL_LN>;
 
 // Signed Saturating Doubling Multiply-Add Long (scalar by element)
 def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi", 
                                 [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
 def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi", 
-                                [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
-  let isLaneQ = 1;
-}
+                                [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
 // Signed Saturating Doubling Multiply-Subtract Long (scalar by element)
 def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi", 
                               [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
 def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi", 
-                              [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
-  let isLaneQ = 1;
-}
-
+                              [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 // Scalar Integer Saturating Doubling Multiply Half High (scalar by element)
 def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "11.I", "SsSi", OP_SCALAR_QDMULH_LN>;
-def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QDMULH_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QDMULH_LN>;
 
 // Scalar Integer Saturating Rounding Doubling Multiply Half High
 def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "11.I", "SsSi", OP_SCALAR_QRDMULH_LN>;
-def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QRDMULH_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QRDMULH_LN>;
 
 let TargetGuard = "v8.1a,neon" in {
 // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
 def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "111.I", "SsSi", OP_SCALAR_QRDMLAH_LN>;
-def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLAH_LN> {
-  let isLaneQ = 1;
-}
-
+def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLAH_LN>;
 // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half
 def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "111.I", "SsSi", OP_SCALAR_QRDMLSH_LN>;
-def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN> {
-  let isLaneQ = 1;
-}
+def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN>;
 } // TargetGuard = "v8.1a"
 
 def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
                             [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
-                            [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
-  let isLaneQ = 1;
-}
+                            [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
 } // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)"
 
@@ -1825,14 +1732,12 @@ let TargetGuard = "fullfp16,neon" in {
     def VCLTH      : SOpInst<"vclt", "U..", "hQh", OP_LT>;
 
   // Vector conversion
-  let isVCVT_N = 1 in {
     def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs", 
                           [ImmCheck<1, ImmCheck1_16>]>;
     def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh", 
                           [ImmCheck<1, ImmCheck1_16>]>;
     def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh", 
                           [ImmCheck<1, ImmCheck1_16>]>;
-  }
 
   // Max/Min
   def VMAXH         : SInst<"vmax", "...", "hQh">;
@@ -1913,9 +1818,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def VFMA_LANEH   : IInst<"vfma_lane", "...qI", "hQh", 
                           [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
   def VFMA_LANEQH  : IInst<"vfma_laneq", "...QI", "hQh", 
-                          [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
-    let isLaneQ = 1;
-  }
+                          [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
   // FMA lane with scalar argument
   def FMLA_NH      : SOpInst<"vfma_n", "...1", "hQh", OP_FMLA_N>;
@@ -1923,47 +1826,31 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def SCALAR_FMLA_LANEH  : IInst<"vfma_lane", "111.I", "Sh", 
                                 [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
   def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh", 
-                                [ImmCheck<3, ImmCheckLaneIndex, 2>]> {
-    let isLaneQ = 1;
-  }
+                                [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
   // FMS lane
   def VFMS_LANEH   : IOpInst<"vfms_lane", "...qI", "hQh", OP_FMS_LN>;
-  def VFMS_LANEQH  : IOpInst<"vfms_laneq", "...QI", "hQh", OP_FMS_LNQ> {
-    let isLaneQ = 1;
-  }
+  def VFMS_LANEQH  : IOpInst<"vfms_laneq", "...QI", "hQh", OP_FMS_LNQ>;
   // FMS lane with scalar argument
   def FMLS_NH      : SOpInst<"vfms_n", "...1", "hQh", OP_FMLS_N>;
   // Scalar floating foint fused multiply-subtract (scalar, by element)
   def SCALAR_FMLS_LANEH  : IOpInst<"vfms_lane", "111.I", "Sh", OP_FMS_LN>;
-  def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "111QI", "Sh", OP_FMS_LNQ> {
-    let isLaneQ = 1;
-  }
-
+  def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "111QI", "Sh", OP_FMS_LNQ>;
   // Mul lane
-  def VMUL_LANEQH   : IOpInst<"vmul_laneq", "..QI", "hQh", OP_MUL_LN> {
-    let isLaneQ = 1;
-  }
+  def VMUL_LANEQH   : IOpInst<"vmul_laneq", "..QI", "hQh", OP_MUL_LN>;
   // Scalar floating point  multiply (scalar, by element)
   def SCALAR_FMUL_LANEH  : IOpInst<"vmul_lane", "11.I", "Sh", OP_SCALAR_MUL_LN>;
-  def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "11QI", "Sh", OP_SCALAR_MUL_LN> {
-    let isLaneQ = 1;
-  }
+  def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "11QI", "Sh", OP_SCALAR_MUL_LN>;
 
   // Mulx lane
   def VMULX_LANEH   : IOpInst<"vmulx_lane", "..qI", "hQh", OP_MULX_LN>;
-  def VMULX_LANEQH  : IOpInst<"vmulx_laneq", "..QI", "hQh", OP_MULX_LN> {
-    let isLaneQ = 1;
-  }
+  def VMULX_LANEQH  : IOpInst<"vmulx_laneq", "..QI", "hQh", OP_MULX_LN>;
   def VMULX_NH      : IOpInst<"vmulx_n", "..1", "hQh", OP_MULX_N>;
   // Scalar floating point  mulx (scalar, by element)
   def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh", 
                                 [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
   def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh", 
-                                [ImmCheck<2, ImmCheckLaneIndex, 1>]> {
-    let isLaneQ = 1;
-  }
-
+                                [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
   // ARMv8.2-A FP16 reduction vector intrinsics.
   def VMAXVH   : SInst<"vmaxv", "1.", "hQh">;
   def VMINVH   : SInst<"vminv", "1.", "hQh">;
@@ -1983,9 +1870,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
   def SCALAR_VDUP_LANEH  : IInst<"vdup_lane", "1.I", "Sh", 
                                 [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
   def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh", 
-                                [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
-    let isLaneQ = 1;
-  }
+                                [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 }
 
 // v8.2-A dot product instructions.
@@ -1995,9 +1880,7 @@ let TargetGuard = "dotprod,neon" in {
 }
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "dotprod,neon" in {
   // Variants indexing into a 128-bit vector are A64 only.
-  def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(<<Q)I", "iUiQiQUi", OP_DOT_LNQ> {
-    let isLaneQ = 1;
-  }
+  def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(<<Q)I", "iUiQiQUi", OP_DOT_LNQ>;
 }
 
 // v8.2-A FP16 fused multiply-add long instructions.
@@ -2012,18 +1895,10 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def VFMLAL_LANE_HIGH : SOpInst<"vfmlal_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLAL_LN_Hi>;
   def VFMLSL_LANE_HIGH : SOpInst<"vfmlsl_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLSL_LN_Hi>;
 
-  def VFMLAL_LANEQ_LOW  : SOpInst<"vfmlal_laneq_low",  "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN> {
-    let isLaneQ = 1;
-  }
-  def VFMLSL_LANEQ_LOW  : SOpInst<"vfmlsl_laneq_low",  "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN> {
-    let isLaneQ = 1;
-  }
-  def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN_Hi> {
-    let isLaneQ = 1;
-  }
-  def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi> {
-    let isLaneQ = 1;
-  }
+  def VFMLAL_LANEQ_LOW  : SOpInst<"vfmlal_laneq_low",  "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN>;
+  def VFMLSL_LANEQ_LOW  : SOpInst<"vfmlsl_laneq_low",  "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN>;
+  def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN_Hi>;
+  def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi>;
 }
 
 let TargetGuard = "i8mm,neon" in {
@@ -2036,19 +1911,15 @@ let TargetGuard = "i8mm,neon" in {
   def VSUDOT_LANE  : SOpInst<"vsudot_lane", "..(<<)(<<qU)I", "iQi", OP_SUDOT_LN>;
 
   let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
-    let isLaneQ = 1 in {
-      def VUSDOT_LANEQ  : SOpInst<"vusdot_laneq", "..(<<U)(<<Q)I", "iQi", OP_USDOT_LNQ>;
-      def VSUDOT_LANEQ  : SOpInst<"vsudot_laneq", "..(<<)(<<QU)I", "iQi", OP_SUDOT_LNQ>;
-    }
+    def VUSDOT_LANEQ  : SOpInst<"vusdot_laneq", "..(<<U)(<<Q)I", "iQi", OP_USDOT_LNQ>;
+    def VSUDOT_LANEQ  : SOpInst<"vsudot_laneq", "..(<<)(<<QU)I", "iQi", OP_SUDOT_LNQ>;
   }
 }
 
 let TargetGuard = "bf16,neon" in {
   def VDOT_BF : SInst<"vbfdot", "..BB", "fQf">;
   def VDOT_LANE_BF : SOpInst<"vbfdot_lane", "..B(Bq)I", "fQf", OP_BFDOT_LN>;
-  def VDOT_LANEQ_BF : SOpInst<"vbfdot_laneq", "..B(BQ)I", "fQf", OP_BFDOT_LNQ> {
-    let isLaneQ = 1;
-  }
+  def VDOT_LANEQ_BF : SOpInst<"vbfdot_laneq", "..B(BQ)I", "fQf", OP_BFDOT_LNQ>;
 
   def VFMMLA_BF : SInst<"vbfmmla", "..BB", "Qf">;
 
@@ -2074,17 +1945,13 @@ multiclass VCMLA_ROTS<string type, string lanety, string laneqty> {
     def : SOpInst<"vcmla" # ROT # "_lane", "...qI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
            (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast lanety, $p2), $p3))))>>;
 
-    let isLaneQ = 1 in  {
-      // ACLE specifies that the f16 variant of vcmla_#ROT_laneq has an immediate range 0<=lane<=1,
-      // whereas the f16 variant of vcmlaq_#ROT_laneq has an immediate range 0<=lane<=3.
-      // f16 is the only type for which these two differ.
-      defvar getlanety = !if(!eq(type, "h"), lanety, laneqty);
-      def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type,  Op<(call "vcmla" # ROT, $p0, $p1,
-                (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast getlanety, $p2), $p3))))>>;
-      // vcmlaq{ROT}_laneq
-      def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
-             (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
-    }
+    // vcmla_{ROT}_laneq
+    def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type,  Op<(call "vcmla" # ROT, $p0, $p1,
+            (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
+    // vcmlaq{ROT}_laneq
+    def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
+            (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
+
   }
 }
 
@@ -2121,9 +1988,7 @@ let TargetGuard = "bf16,neon" in {
   def VDUP_N_BF    : WOpInst<"vdup_n", ".1", "bQb", OP_DUP>;
 
   def VDUP_LANE_BF : WOpInst<"vdup_lane", ".qI", "bQb", OP_DUP_LN>;
-  def VDUP_LANEQ_BF: WOpInst<"vdup_laneq", ".QI", "bQb", OP_DUP_LN> {
-    let isLaneQ = 1;
-  }
+  def VDUP_LANEQ_BF: WOpInst<"vdup_laneq", ".QI", "bQb", OP_DUP_LN>;
 
   def VCOMBINE_BF  : NoTestOpInst<"vcombine", "Q..", "b", OP_CONC>;
 
@@ -2137,9 +2002,7 @@ let TargetGuard = "bf16,neon" in {
   def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb", 
                           [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
   def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb", 
-                          [ImmCheck<1, ImmCheckLaneIndex, 0>]> {
-    let isLaneQ = 1;
-  }
+                          [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
   def VLD1_BF : WInst<"vld1", ".(c*!)", "bQb">;
   def VLD2_BF : WInst<"vld2", "2(c*!)", "bQb">;
diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td
index 2b5acd41e7bbd4..b088e0794cdea3 100644
--- a/clang/include/clang/Basic/arm_neon_incl.td
+++ b/clang/include/clang/Basic/arm_neon_incl.td
@@ -274,12 +274,6 @@ class Inst <string n, string p, string t, Operation o, list<ImmCheck> ch = []>{
   bit isShift = 0;
   bit isScalarShift = 0;
   bit isScalarNarrowShift = 0;
-  bit isVCVT_N = 0;
-  bit isVXAR = 0;
-  // For immediate checks: the immediate will be assumed to specify the lane of
-  // a Q register. Only used for intrinsics which end up calling polymorphic
-  // builtins.
-  bit isLaneQ = 0;
   list<ImmCheck> ImmChecks = ch;
 
   // Certain intrinsics have different names than their representative
diff --git a/clang/test/CodeGen/aarch64-neon-vcmla.c b/clang/test/CodeGen/aarch64-neon-vcmla.c
index d82d74d019c012..02171527cc6a32 100644
--- a/clang/test/CodeGen/aarch64-neon-vcmla.c
+++ b/clang/test/CodeGen/aarch64-neon-vcmla.c
@@ -1,596 +1,444 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon  \
-// RUN:            -target-feature +v8.3a -target-feature +fullfp16 \
-// RUN:            -disable-O0-optnone -emit-llvm -o - %s | opt -S -O1 | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-apple-ios -target-feature +neon \
+// RUN:        -target-feature +v8.3a \
+// RUN:        -target-feature +fullfp16 \
+// RUN:        -disable-O0-optnone -emit-llvm -o - %s | opt -S -O1 | FileCheck %s
 
 // REQUIRES: aarch64-registered-target
 
 #include <arm_neon.h>
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_f16(
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_f16(
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_f32(
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_f64(
-// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
-// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_F643_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_f64(
+// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
+// CHECK: ret <2 x double> [[RES]]
 float64x2_t test_vcmlaq_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT90_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT90_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot90_f16(
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot90_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot90_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT90_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT90_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot90_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot90_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot90_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT90_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot90_f16(
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot90_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot90_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT90_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot90_f32(
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot90_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot90_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot90_f64(
-// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT90_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
-// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_ROT90_F643_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot90_f64(
+// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
+// CHECK: ret <2 x double> [[RES]]
 float64x2_t test_vcmlaq_rot90_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_rot90_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT180_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT180_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot180_f16(
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot180_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot180_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT180_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT180_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot180_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot180_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot180_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT180_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot180_f16(
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot180_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot180_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT180_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot180_f32(
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot180_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot180_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot180_f64(
-// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT180_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
-// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_ROT180_F643_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot180_f64(
+// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
+// CHECK: ret <2 x double> [[RES]]
 float64x2_t test_vcmlaq_rot180_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_rot180_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT270_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT270_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot270_f16(
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot270_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot270_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT270_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT270_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot270_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot270_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot270_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT270_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot270_f16(
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot270_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot270_f16(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT270_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot270_f32(
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot270_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot270_f32(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot270_f64(
-// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLAQ_ROT270_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
-// CHECK-NEXT:    ret <2 x double> [[VCMLAQ_ROT270_F643_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot270_f64(
+// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
+// CHECK: ret <2 x double> [[RES]]
 float64x2_t test_vcmlaq_rot270_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
   return vcmlaq_rot270_f64(acc, lhs, rhs);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_lane_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLA_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_laneq_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
-// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLA_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_F163_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_laneq_f16(acc, lhs, rhs, 1);
+  return vcmla_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_lane_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_laneq_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
-// CHECK-NEXT:    [[VCMLAQ_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_lane_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_lane_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_laneq_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
-// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
-// CHECK-NEXT:    [[VCMLA_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_F323_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_laneq_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
+// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_lane_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
-// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK-NEXT:    [[VCMLAQ_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_lane_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
+// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
+// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_laneq_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_laneq_f32(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_lane_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLA_ROT90_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT90_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot90_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot90_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot90_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_laneq_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
-// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK-NEXT:    [[VCMLA_ROT90_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT90_F163_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_rot90_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot90_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_rot90_laneq_f16(acc, lhs, rhs, 0);
+  return vcmla_rot90_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_lane_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_ROT90_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot90_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot90_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_rot90_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_laneq_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
-// CHECK-NEXT:    [[VCMLAQ_ROT90_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot90_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot90_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot90_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_lane_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT90_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT90_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot90_lane_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot90_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot90_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_laneq_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
-// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
-// CHECK-NEXT:    [[VCMLA_ROT90_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT90_F323_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_rot90_laneq_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
+// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot90_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_rot90_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_lane_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
-// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK-NEXT:    [[VCMLAQ_ROT90_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot90_lane_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
+// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
+// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot90_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_rot90_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_laneq_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_ROT90_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot90_laneq_f32(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot90_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot90_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_lane_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLA_ROT180_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT180_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot180_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot180_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot180_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_laneq_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
-// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLA_ROT180_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT180_F163_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_rot180_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot180_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_rot180_laneq_f16(acc, lhs, rhs, 1);
+  return vcmla_rot180_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_lane_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_ROT180_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot180_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot180_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_rot180_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_laneq_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
-// CHECK-NEXT:    [[VCMLAQ_ROT180_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot180_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot180_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot180_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_lane_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT180_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT180_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot180_lane_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot180_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot180_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_laneq_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
-// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
-// CHECK-NEXT:    [[VCMLA_ROT180_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT180_F323_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_rot180_laneq_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
+// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot180_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_rot180_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_lane_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
-// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK-NEXT:    [[VCMLAQ_ROT180_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot180_lane_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
+// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
+// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot180_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_rot180_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_laneq_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_ROT180_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot180_laneq_f32(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot180_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot180_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_lane_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLA_ROT270_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT270_F163_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot270_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot270_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
   return vcmla_rot270_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_laneq_f16(
-// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
-// CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK-NEXT:    [[VCMLA_ROT270_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
-// CHECK-NEXT:    ret <4 x half> [[VCMLA_ROT270_F163_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_rot270_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
+// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
+// CHECK: ret <4 x half> [[RES]]
 float16x4_t test_vcmla_rot270_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
-  return vcmla_rot270_laneq_f16(acc, lhs, rhs, 0);
+  return vcmla_rot270_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_lane_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_ROT270_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot270_lane_f16(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot270_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
   return vcmlaq_rot270_lane_f16(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_laneq_f16(
-// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
-// CHECK-NEXT:    [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
-// CHECK-NEXT:    [[VCMLAQ_ROT270_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot270_laneq_f16(
+// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
+// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
+// CHECK: ret <8 x half> [[RES]]
 float16x8_t test_vcmlaq_rot270_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
   return vcmlaq_rot270_laneq_f16(acc, lhs, rhs, 3);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_lane_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VCMLA_ROT270_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT270_F323_I]]
-//
+// CHECK-LABEL: @test_vcmla_rot270_lane_f32(
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot270_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
   return vcmla_rot270_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_laneq_f32(
-// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
-// CHECK-NEXT:    [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
-// CHECK-NEXT:    [[VCMLA_ROT270_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT:    ret <2 x float> [[VCMLA_ROT270_F323_I]]
-//
+// ACLE says this exists, but it won't map to a single instruction if lane > 1.
+// CHECK-LABEL: @test_vcmla_rot270_laneq_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
+// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
+// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
+// CHECK: ret <2 x float> [[RES]]
 float32x2_t test_vcmla_rot270_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
   return vcmla_rot270_laneq_f32(acc, lhs, rhs, 1);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_lane_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
-// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK-NEXT:    [[VCMLAQ_ROT270_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot270_lane_f32(
+// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
+// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
+// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[DUP]] to <4 x float>
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot270_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
   return vcmlaq_rot270_lane_f32(acc, lhs, rhs, 0);
 }
 
-// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_laneq_f32(
-// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK-NEXT:    [[VCMLAQ_ROT270_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
-// CHECK-NEXT:    ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
-//
+// CHECK-LABEL: @test_vcmlaq_rot270_laneq_f32(
+// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
+// CHECK: ret <4 x float> [[RES]]
 float32x4_t test_vcmlaq_rot270_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
   return vcmlaq_rot270_laneq_f32(acc, lhs, rhs, 1);
 }
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 2330448d9ce5ed..95ec53ce1081a6 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -413,11 +413,11 @@ class Intrinsic {
     for (const auto *I : ImmCheckList) {
       unsigned EltSizeInBits = 0, VecSizeInBits = 0;
 
-      ArgIdx = I->getValueAsInt("Arg");
-      TypeArgIdx = I->getValueAsInt("TypeContextArg");
+      ArgIdx = I->getValueAsInt("ImmArgIdx");
+      TypeArgIdx = I->getValueAsInt("TypeContextArgIdx");
       Kind = I->getValueAsDef("Kind")->getValueAsInt("Value");
 
-      assert((ArgIdx >= 0 && Kind >= 0) && "Arg and Kind must be nonnegative");
+      assert((ArgIdx >= 0 && Kind >= 0) && "ImmArgIdx and Kind must be nonnegative");
 
       if (TypeArgIdx >= 0) {
         EltSizeInBits = getParamType(TypeArgIdx).getElementSizeInBits();
@@ -1492,7 +1492,7 @@ Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) {
     N = emitDagArg(DI->getArg(0), "").second;
   std::optional<std::string> MangledName;
   if (MatchMangledName) {
-    if (Intr.getRecord()->getValueAsBit("isLaneQ"))
+    if(Intr.getRecord()->getValueAsString("Name").ends_with("laneq"))
       N += "q";
     MangledName = Intr.mangleName(N, ClassS);
   }
@@ -1975,7 +1975,6 @@ void NeonEmitter::createIntrinsic(Record *R,
   bool BigEndianSafe  = R->getValueAsBit("BigEndianSafe");
   std::string ArchGuard = std::string(R->getValueAsString("ArchGuard"));
   std::string TargetGuard = std::string(R->getValueAsString("TargetGuard"));
-
   bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
   std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith"));
 
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index a47e16a28fcd68..634c9daffa7544 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -372,7 +372,7 @@ class SVEEmitter {
   /// Emit all the range checks for the immediates.
   void createRangeChecks(raw_ostream &o);
 
-  // Emit all the ImmCheckTypes to arm_immcheck_types.h
+  // Emit all the ImmCheckTypes to arm_immcheck_types.inc
   void createImmCheckTypes(raw_ostream &OS);
 
   /// Create the SVETypeFlags used in CGBuiltins
@@ -1196,18 +1196,18 @@ void SVEEmitter::createIntrinsic(
     // Collate a list of range/option checks for the immediates.
     SmallVector<ImmCheck, 2> ImmChecks;
     for (auto *R : ImmCheckList) {
-      int64_t Arg = R->getValueAsInt("Arg");
-      int64_t EltSizeArg = R->getValueAsInt("TypeContextArg");
+      int64_t ArgIdx = R->getValueAsInt("ImmArgIdx");
+      int64_t EltSizeArgIdx = R->getValueAsInt("TypeContextArgIdx");
       int64_t Kind = R->getValueAsDef("Kind")->getValueAsInt("Value");
-      assert(Arg >= 0 && Kind >= 0 && "Arg and Kind must be nonnegative");
+      assert(ArgIdx >= 0 && Kind >= 0 && "ImmArgIdx and Kind must be nonnegative");
 
       unsigned ElementSizeInBits = 0;
       char Mod;
       unsigned NumVectors;
-      std::tie(Mod, NumVectors) = getProtoModifier(Proto, EltSizeArg + 1);
-      if (EltSizeArg >= 0)
+      std::tie(Mod, NumVectors) = getProtoModifier(Proto, EltSizeArgIdx + 1);
+      if (EltSizeArgIdx >= 0)
         ElementSizeInBits = SVEType(TS, Mod, NumVectors).getElementSizeInBits();
-      ImmChecks.push_back(ImmCheck(Arg, Kind, ElementSizeInBits));
+      ImmChecks.push_back(ImmCheck(ArgIdx, Kind, ElementSizeInBits));
     }
 
     Out.push_back(std::make_unique<Intrinsic>(

>From 7990b20360406f150112d6ea392c0297d031ef6b Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 21 Aug 2024 10:49:41 +0000
Subject: [PATCH 08/17] Update and move vcmla immediate range tests

	- update tests for vcmla{_ROT}_laneq_f16 after immedaite range resolution
	- move test file to clang/test/Sema/aarc64-neon-immedaite-ranges
	- remove trailing whitespace from test lines
---
 .../vcmla.c}                                  | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)
 rename clang/test/Sema/{aarch64-neon-vcmla-ranges.c => aarch64-neon-immediate-ranges/vcmla.c} (93%)

diff --git a/clang/test/Sema/aarch64-neon-vcmla-ranges.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vcmla.c
similarity index 93%
rename from clang/test/Sema/aarch64-neon-vcmla-ranges.c
rename to clang/test/Sema/aarch64-neon-immediate-ranges/vcmla.c
index 9b42e68670da08..21c24975b38b37 100644
--- a/clang/test/Sema/aarch64-neon-vcmla-ranges.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vcmla.c
@@ -15,17 +15,18 @@ void test_vcmla_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
 void test_vcmla_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
   vcmla_laneq_f16(a, b, c, 0);
   vcmla_laneq_f16(a, b, c, 1);
+  vcmla_laneq_f16(a, b, c, 3);
 
-  vcmla_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
-  vcmla_laneq_f16(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+  vcmla_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_laneq_f16(a, b, c, 4);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
 }
 
 void test_vcmlaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c){
   vcmlaq_lane_f16(a, b, c, 0);
   vcmlaq_lane_f16(a, b, c, 1);
 
-  vcmlaq_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
-  vcmlaq_lane_f16(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+  vcmlaq_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_lane_f16(a, b, c, 2);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
 }
 
 void test_vcmlaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
@@ -33,8 +34,8 @@ void test_vcmlaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
   vcmlaq_laneq_f16(a, b, c, 1);
   vcmlaq_laneq_f16(a, b, c, 3);
 
-  vcmlaq_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
-  vcmlaq_laneq_f16(a, b, c, 4);  // expected-error-re +{{argument value {{.*}} is outside the valid range}} 
+  vcmlaq_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmlaq_laneq_f16(a, b, c, 4);  // expected-error-re +{{argument value {{.*}} is outside the valid range}}
 }
 
 void test_vcmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
@@ -70,10 +71,10 @@ void test_vcmla_rot90_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
 
 void test_vcmla_rot90_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
   vcmla_rot90_laneq_f16(a, b, c, 0);
-  vcmla_rot90_laneq_f16(a, b, c, 1);
+  vcmla_rot90_laneq_f16(a, b, c, 3);
 
   vcmla_rot90_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
-  vcmla_rot90_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot90_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
 }
 
 void test_vcmlaq_rot90_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
@@ -94,10 +95,10 @@ void test_vcmla_rot180_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
 
 void test_vcmla_rot180_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
   vcmla_rot180_laneq_f16(a, b, c, 0);
-  vcmla_rot180_laneq_f16(a, b, c, 1);
+  vcmla_rot180_laneq_f16(a, b, c, 3);
 
   vcmla_rot180_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
-  vcmla_rot180_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot180_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
 }
 
 void test_vcmlaq_rot180_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
@@ -118,10 +119,10 @@ void test_vcmla_rot270_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
 
 void test_vcmla_rot270_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
   vcmla_rot270_laneq_f16(a, b, c, 0);
-  vcmla_rot270_laneq_f16(a, b, c, 1);
+  vcmla_rot270_laneq_f16(a, b, c, 3);
 
   vcmla_rot270_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
-  vcmla_rot270_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+  vcmla_rot270_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
 }
 
 void test_vcmlaq_rot270_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){

>From df0672b095058b2dcdccc583e6e8a44858214657 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 21 Aug 2024 11:31:17 +0000
Subject: [PATCH 09/17] [Fixup] tablegen command descr and SemaArm

---
 clang/include/clang/Sema/SemaARM.h | 1 -
 clang/lib/Sema/SemaARM.cpp         | 3 ++-
 clang/utils/TableGen/TableGen.cpp  | 7 ++++---
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index ebeb82e9455ba9..c431b1182ce8bc 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -15,7 +15,6 @@
 
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/Expr.h"
-#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Sema/SemaBase.h"
 #include "llvm/ADT/StringRef.h"
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 3dfeb1bd02adeb..4ab288e2804f41 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -12,6 +12,7 @@
 
 #include "clang/Sema/SemaARM.h"
 #include "clang/Basic/DiagnosticSema.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/ParsedAttr.h"
@@ -447,7 +448,7 @@ bool SemaARM::CheckImmediateArg(CallExpr *TheCall, unsigned CheckTy,
     break;
   case ImmCheckType::ImmCheckLaneIndex:
     if (SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
-                                        (VecBitWidth / (1 * EltBitWidth)) - 1))
+                                        (VecBitWidth / EltBitWidth) - 1))
       return true;
     break;
   case ImmCheckType::ImmCheckLaneIndexCompRotate:
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index dab447ff7d944f..84afd4c0afb269 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -235,9 +235,10 @@ cl::opt<ActionType> Action(
                    "Generate ARM NEON sema support for clang"),
         clEnumValN(GenArmNeonTest, "gen-arm-neon-test",
                    "Generate ARM NEON tests for clang"),
-        clEnumValN(GenArmImmCheckTypes, "gen-arm-immcheck-types",
-                   "Generate arm_immchecktypes.h (immediate range check types)"
-                   " for clang"),
+        clEnumValN(
+            GenArmImmCheckTypes, "gen-arm-immcheck-types",
+            "Generate arm_immcheck_types.inc (immediate range check types)"
+            " for clang"),
         clEnumValN(GenArmSveHeader, "gen-arm-sve-header",
                    "Generate arm_sve.h for clang"),
         clEnumValN(GenArmSveBuiltins, "gen-arm-sve-builtins",

>From 1c66ace702eb21344db80926a20e9f2254a6b918 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 21 Aug 2024 18:20:42 +0000
Subject: [PATCH 10/17] Add check to ensure builtin range is not redefined

- Changes
	- llvm/inlcude/llvm/TableGen/AArch64ImmCheck.h
		- Add TypeArgIdx field for ImmCheck (third argument in tablegen)
		- introduce equality operator for ImmCheck
		- rename 'Arg' to 'ImmArgIdx' for consistency with the tablegen class.
	- clang/utils/TableGen/NeonEmitter.cpp
		- Add assertion to ensure that a neon builtin's range is not
		  redefined. This problem could arise when two intrinsics share the same builtin
		  but define different immediate checks in tablgen (range checking is done via
		  the builtin rather than the intrinsic).
	- clang/include/clang/Basic/arm_neon.td
		- ensure consistent TypeContextArg is used for vsri/vsli intrinsics.
---
 clang/include/clang/Basic/arm_neon.td        |  8 ++--
 clang/utils/TableGen/NeonEmitter.cpp         | 39 +++++++++++++-------
 clang/utils/TableGen/SveEmitter.cpp          | 10 +++--
 llvm/include/llvm/TableGen/AArch64ImmCheck.h | 18 ++++++---
 4 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 7e5b965fbe4b0c..09d9b88637abd6 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -449,10 +449,10 @@ def VSHLL_N    : SInst<"vshll_n", "(>Q).I", "csiUcUsUi",
 // E.3.13 Shifts with insert
 def VSRI_N : WInst<"vsri_n", "...I",
                    "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs", 
-                    [ImmCheck<2, ImmCheckShiftRight>]>;
+                    [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 def VSLI_N : WInst<"vsli_n", "...I",
                    "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs", 
-                   [ImmCheck<2, ImmCheckShiftLeft>]>;
+                   [ImmCheck<2, ImmCheckShiftLeft, 0>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -963,8 +963,8 @@ def SHLL_HIGH_N    : SOpInst<"vshll_high_n", ">.I", "HcHsHiHUcHUsHUi",
                              OP_LONG_HI>;
 
 ////////////////////////////////////////////////////////////////////////////////
-def SRI_N : WInst<"vsri_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftRight, 1>]>;
-def SLI_N : WInst<"vsli_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftLeft, 1>]>;
+def SRI_N : WInst<"vsri_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftRight, 0>]>;
+def SLI_N : WInst<"vsli_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftLeft, 0>]>;
 
 // Right shift narrow high
 def SHRN_HIGH_N    : IOpInst<"vshrn_high_n", "<(<q).I",
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 95ec53ce1081a6..2feb1eca11ebb9 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -425,8 +425,13 @@ class Intrinsic {
       }
 
       ImmChecks.emplace_back(
-          ImmCheck(ArgIdx, Kind, EltSizeInBits, VecSizeInBits));
+          ImmCheck(ArgIdx, Kind, EltSizeInBits, VecSizeInBits, TypeArgIdx));
     }
+    llvm::sort(ImmChecks.begin(), ImmChecks.end(),
+               [](const ImmCheck &a, const ImmCheck &b) {
+                 return a.getImmArgIdx() < b.getImmArgIdx();
+               }); // Sort for comparison with other intrinsics which map to the
+                   // same builtin
   }
 
   /// Get the Record that this intrinsic is based off.
@@ -2167,27 +2172,33 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
 
 void NeonEmitter::genIntrinsicRangeCheckCode(
     raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
-  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
-  // Ensure these are only emitted once.
-  std::set<std::string> Emitted;
+  std::map<std::string, ArrayRef<ImmCheck>> Emitted;
 
+  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
   for (auto &Def : Defs) {
-    if (Emitted.find(Def->getMangledName()) != Emitted.end() ||
-        !Def->hasImmediate())
-      continue;
-
     // If the Def has a body (operation DAGs), it is not a __builtin_neon_
-    if (Def->hasBody())
+    if (Def->hasBody() || !Def->hasImmediate())
       continue;
 
+    // Sorted by immediate argument index
+    ArrayRef<ImmCheck> Checks = Def->getImmChecks();
+
+    const auto it = Emitted.find(Def->getMangledName());
+    if (it != Emitted.end()) {
+      assert(it->second.equals(Checks) &&
+             "Neon builtin's immediate range checks cannot be redefined.");
+      continue; // Ensure this is emitted only once
+    }
+
+    // Emit builtin's range checks
     OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
-    for (const auto &Check : Def->getImmChecks()) {
-      OS << " ImmChecks.push_back(std::make_tuple(" << Check.getArg() << ", "
-         << Check.getKind() << ", " << Check.getElementSizeInBits() << ", "
-         << Check.getVecSizeInBits() << "));\n"
+    for (const auto &Check : Checks) {
+      OS << " ImmChecks.push_back(std::make_tuple(" << Check.getImmArgIdx()
+         << ", " << Check.getKind() << ", " << Check.getElementSizeInBits()
+         << ", " << Check.getVecSizeInBits() << "));\n"
          << " break;\n";
     }
-    Emitted.insert(Def->getMangledName());
+    Emitted[Def->getMangledName()] = Checks;
   }
 
   OS << "#endif\n\n";
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 634c9daffa7544..b86c8ca24883c2 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1527,8 +1527,9 @@ void SVEEmitter::createRangeChecks(raw_ostream &OS) {
 
     OS << "case SVE::BI__builtin_sve_" << Def->getMangledName() << ":\n";
     for (auto &Check : Def->getImmChecks())
-      OS << "ImmChecks.push_back(std::make_tuple(" << Check.getArg() << ", "
-         << Check.getKind() << ", " << Check.getElementSizeInBits() << "));\n";
+      OS << "ImmChecks.push_back(std::make_tuple(" << Check.getImmArgIdx()
+         << ", " << Check.getKind() << ", " << Check.getElementSizeInBits()
+         << "));\n";
     OS << "  break;\n";
 
     Emitted.insert(Def->getMangledName());
@@ -1722,8 +1723,9 @@ void SVEEmitter::createSMERangeChecks(raw_ostream &OS) {
 
     OS << "case SME::BI__builtin_sme_" << Def->getMangledName() << ":\n";
     for (auto &Check : Def->getImmChecks())
-      OS << "ImmChecks.push_back(std::make_tuple(" << Check.getArg() << ", "
-         << Check.getKind() << ", " << Check.getElementSizeInBits() << "));\n";
+      OS << "ImmChecks.push_back(std::make_tuple(" << Check.getImmArgIdx()
+         << ", " << Check.getKind() << ", " << Check.getElementSizeInBits()
+         << "));\n";
     OS << "  break;\n";
 
     Emitted.insert(Def->getMangledName());
diff --git a/llvm/include/llvm/TableGen/AArch64ImmCheck.h b/llvm/include/llvm/TableGen/AArch64ImmCheck.h
index ff8c4a1865ea34..16d88142d038ad 100644
--- a/llvm/include/llvm/TableGen/AArch64ImmCheck.h
+++ b/llvm/include/llvm/TableGen/AArch64ImmCheck.h
@@ -15,20 +15,26 @@
 #define AARCH64_IMMCHECK_H
 
 class ImmCheck {
-  unsigned Arg;
+  int ImmArgIdx;
   unsigned Kind;
   unsigned ElementSizeInBits;
   unsigned VecSizeInBits;
+  int TypeArgIdx;
 
 public:
-  ImmCheck(unsigned Arg, unsigned Kind, unsigned ElementSizeInBits = 0,
-           unsigned VecSizeInBits = 128)
-      : Arg(Arg), Kind(Kind), ElementSizeInBits(ElementSizeInBits),
-        VecSizeInBits(VecSizeInBits) {}
+  ImmCheck(int ImmArgIdx, unsigned Kind, unsigned ElementSizeInBits = 0,
+           unsigned VecSizeInBits = 128, int TypeArgIdx = -1)
+      : ImmArgIdx(ImmArgIdx), Kind(Kind), ElementSizeInBits(ElementSizeInBits),
+        VecSizeInBits(VecSizeInBits), TypeArgIdx(TypeArgIdx) {}
   ImmCheck(const ImmCheck &Other) = default;
   ~ImmCheck() = default;
 
-  unsigned getArg() const { return Arg; }
+  bool operator==(const ImmCheck &other) const {
+    return other.getImmArgIdx() == ImmArgIdx && other.getKind() == Kind &&
+           other.getTypeArgIdx() == TypeArgIdx;
+  }
+  int getImmArgIdx() const { return ImmArgIdx; }
+  int getTypeArgIdx() const { return TypeArgIdx; }
   unsigned getKind() const { return Kind; }
   unsigned getElementSizeInBits() const { return ElementSizeInBits; }
   unsigned getVecSizeInBits() const { return VecSizeInBits; }

>From c913d72280748a3dc03754b41a4bb7b4a463d38c Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Thu, 22 Aug 2024 08:51:18 +0000
Subject: [PATCH 11/17] [NFC] fix format

---
 clang/utils/TableGen/NeonEmitter.cpp | 4 ++--
 clang/utils/TableGen/SveEmitter.cpp  | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 2feb1eca11ebb9..9f8a88dfed102c 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -416,8 +416,8 @@ class Intrinsic {
       ArgIdx = I->getValueAsInt("ImmArgIdx");
       TypeArgIdx = I->getValueAsInt("TypeContextArgIdx");
       Kind = I->getValueAsDef("Kind")->getValueAsInt("Value");
-
-      assert((ArgIdx >= 0 && Kind >= 0) && "ImmArgIdx and Kind must be nonnegative");
+      assert((ArgIdx >= 0 && Kind >= 0) &&
+             "ImmArgIdx and Kind must be nonnegative");
 
       if (TypeArgIdx >= 0) {
         EltSizeInBits = getParamType(TypeArgIdx).getElementSizeInBits();
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index b86c8ca24883c2..bb7de00a7a1ba9 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1199,7 +1199,8 @@ void SVEEmitter::createIntrinsic(
       int64_t ArgIdx = R->getValueAsInt("ImmArgIdx");
       int64_t EltSizeArgIdx = R->getValueAsInt("TypeContextArgIdx");
       int64_t Kind = R->getValueAsDef("Kind")->getValueAsInt("Value");
-      assert(ArgIdx >= 0 && Kind >= 0 && "ImmArgIdx and Kind must be nonnegative");
+      assert(ArgIdx >= 0 && Kind >= 0 &&
+             "ImmArgIdx and Kind must be nonnegative");
 
       unsigned ElementSizeInBits = 0;
       char Mod;

>From dc718453fbdaaf12e0893214ae274211ff4e7f90 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Thu, 22 Aug 2024 16:28:09 +0000
Subject: [PATCH 12/17] Fix builtin range-check compatabilty

---
 clang/utils/TableGen/NeonEmitter.cpp         | 46 +++++++++++++++++---
 llvm/include/llvm/TableGen/AArch64ImmCheck.h | 10 +----
 2 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 9f8a88dfed102c..34a3739001ee71 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -48,6 +48,7 @@
 #include <set>
 #include <sstream>
 #include <string>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -416,16 +417,23 @@ class Intrinsic {
       ArgIdx = I->getValueAsInt("ImmArgIdx");
       TypeArgIdx = I->getValueAsInt("TypeContextArgIdx");
       Kind = I->getValueAsDef("Kind")->getValueAsInt("Value");
+
       assert((ArgIdx >= 0 && Kind >= 0) &&
              "ImmArgIdx and Kind must be nonnegative");
 
       if (TypeArgIdx >= 0) {
-        EltSizeInBits = getParamType(TypeArgIdx).getElementSizeInBits();
-        VecSizeInBits = getParamType(TypeArgIdx).getSizeInBits();
+        Type ContextType = getParamType(TypeArgIdx);
+
+        // Element size cannot be set for intrinscs that map to polymorphic
+        // builtins.
+        if (CK != ClassB)
+          EltSizeInBits = ContextType.getElementSizeInBits();
+
+        VecSizeInBits = ContextType.getSizeInBits();
       }
 
       ImmChecks.emplace_back(
-          ImmCheck(ArgIdx, Kind, EltSizeInBits, VecSizeInBits, TypeArgIdx));
+          ImmCheck(ArgIdx, Kind, EltSizeInBits, VecSizeInBits));
     }
     llvm::sort(ImmChecks.begin(), ImmChecks.end(),
                [](const ImmCheck &a, const ImmCheck &b) {
@@ -581,6 +589,8 @@ class NeonEmitter {
                                      SmallVectorImpl<Intrinsic *> &Defs);
   void genOverloadTypeCheckCode(raw_ostream &OS,
                                 SmallVectorImpl<Intrinsic *> &Defs);
+  bool areCompatableRangeChecks(const ArrayRef<ImmCheck> ChecksA,
+                                const ArrayRef<ImmCheck> ChecksB);
   void genIntrinsicRangeCheckCode(raw_ostream &OS,
                                   SmallVectorImpl<Intrinsic *> &Defs);
 
@@ -1497,7 +1507,7 @@ Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) {
     N = emitDagArg(DI->getArg(0), "").second;
   std::optional<std::string> MangledName;
   if (MatchMangledName) {
-    if(Intr.getRecord()->getValueAsString("Name").ends_with("laneq"))
+    if (Intr.getRecord()->getValueAsString("Name").ends_with("laneq"))
       N += "q";
     MangledName = Intr.mangleName(N, ClassS);
   }
@@ -2170,9 +2180,30 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
   OS << "#endif\n\n";
 }
 
+bool NeonEmitter::areCompatableRangeChecks(const ArrayRef<ImmCheck> ChecksA,
+                                           const ArrayRef<ImmCheck> ChecksB) {
+  // If multiple intrinsics map to the same builtin, we must ensure that the
+  // intended range checks performed in SemaArm.cpp do not contradict eachother,
+  // as these are emitted once per-buitlin.
+  //
+  // The arguments to be checked and type of each check to be performed must be
+  // the same. The element types may differ as they will be resolved
+  // per-intrinsic as overloaded types by SemaArm.cpp, though the vector sizes
+  // are not and so must be the same.
+  bool compat =
+      std::equal(ChecksA.begin(), ChecksA.end(), ChecksB.begin(), ChecksB.end(),
+                 [](const auto A, const auto B) {
+                   return A.getImmArgIdx() == B.getImmArgIdx() &&
+                          A.getKind() == B.getKind() &&
+                          A.getVecSizeInBits() == B.getVecSizeInBits();
+                 });
+
+  return compat;
+}
+
 void NeonEmitter::genIntrinsicRangeCheckCode(
     raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
-  std::map<std::string, ArrayRef<ImmCheck>> Emitted;
+  std::unordered_map<std::string, ArrayRef<ImmCheck>> Emitted;
 
   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
   for (auto &Def : Defs) {
@@ -2185,8 +2216,9 @@ void NeonEmitter::genIntrinsicRangeCheckCode(
 
     const auto it = Emitted.find(Def->getMangledName());
     if (it != Emitted.end()) {
-      assert(it->second.equals(Checks) &&
-             "Neon builtin's immediate range checks cannot be redefined.");
+      assert(areCompatableRangeChecks(Checks, it->second) &&
+             "Neon intrinsics with incompatable immediate range checks cannot "
+             "share a builtin.");
       continue; // Ensure this is emitted only once
     }
 
diff --git a/llvm/include/llvm/TableGen/AArch64ImmCheck.h b/llvm/include/llvm/TableGen/AArch64ImmCheck.h
index 16d88142d038ad..0242620c9c60ea 100644
--- a/llvm/include/llvm/TableGen/AArch64ImmCheck.h
+++ b/llvm/include/llvm/TableGen/AArch64ImmCheck.h
@@ -19,22 +19,16 @@ class ImmCheck {
   unsigned Kind;
   unsigned ElementSizeInBits;
   unsigned VecSizeInBits;
-  int TypeArgIdx;
 
 public:
   ImmCheck(int ImmArgIdx, unsigned Kind, unsigned ElementSizeInBits = 0,
-           unsigned VecSizeInBits = 128, int TypeArgIdx = -1)
+           unsigned VecSizeInBits = 128)
       : ImmArgIdx(ImmArgIdx), Kind(Kind), ElementSizeInBits(ElementSizeInBits),
-        VecSizeInBits(VecSizeInBits), TypeArgIdx(TypeArgIdx) {}
+        VecSizeInBits(VecSizeInBits) {}
   ImmCheck(const ImmCheck &Other) = default;
   ~ImmCheck() = default;
 
-  bool operator==(const ImmCheck &other) const {
-    return other.getImmArgIdx() == ImmArgIdx && other.getKind() == Kind &&
-           other.getTypeArgIdx() == TypeArgIdx;
-  }
   int getImmArgIdx() const { return ImmArgIdx; }
-  int getTypeArgIdx() const { return TypeArgIdx; }
   unsigned getKind() const { return Kind; }
   unsigned getElementSizeInBits() const { return ElementSizeInBits; }
   unsigned getVecSizeInBits() const { return VecSizeInBits; }

>From 2320f6232fa980a808545e882c8b6a4e2990c39d Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 28 Aug 2024 11:14:40 +0000
Subject: [PATCH 13/17] [Fixup] Improve comments and remove trailing whitespace

---
 clang/include/clang/Basic/TargetBuiltins.h    |   2 +-
 .../include/clang/Basic/arm_immcheck_incl.td  |   6 +-
 clang/include/clang/Basic/arm_neon.td         | 160 +++++++++---------
 clang/utils/TableGen/NeonEmitter.cpp          |   6 +-
 4 files changed, 88 insertions(+), 86 deletions(-)

diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index a85e070cad0383..02b4a4b39bbf43 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -215,7 +215,7 @@ namespace clang {
       return ET == Poly8 || ET == Poly16 || ET == Poly64;
     }
     bool isUnsigned() const { return (Flags & UnsignedFlag) != 0; }
-    bool isQuad() const { return (Flags & QuadFlag) != 0; };
+    bool isQuad() const { return (Flags & QuadFlag) != 0; }
     unsigned getEltSizeInBits() const {
       switch (getEltType()) {
       case Int8:
diff --git a/clang/include/clang/Basic/arm_immcheck_incl.td b/clang/include/clang/Basic/arm_immcheck_incl.td
index c6a7ca26855496..9d7f74a35aaa87 100644
--- a/clang/include/clang/Basic/arm_immcheck_incl.td
+++ b/clang/include/clang/Basic/arm_immcheck_incl.td
@@ -32,8 +32,12 @@ def ImmCheck1_64                : ImmCheckType<25>; // 1..64
 def ImmCheck0_63                : ImmCheckType<26>; // 0..63
 
 class ImmCheck<int immArgIdx, ImmCheckType kind, int typeArgIdx = -1> {
+  // Parameter index of immediate argument to be verified
   int ImmArgIdx = immArgIdx;
-  // The index of the argument whose type should be referred to when validating this immedaite.
+
+  // Parameter index of argument whose type determines the context of this immediate check -
+  // element type for SVE/SME, element type and vector size for NEON (ignoring element type for
+  // ClassB NEON intrinsics).
   int TypeContextArgIdx = typeArgIdx;
   ImmCheckType Kind = kind;
 }
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 09d9b88637abd6..351a3a54d596e2 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -284,10 +284,10 @@ def OP_CVT_F32_BF16
 
 // Splat operation - performs a range-checked splat over a vector
 def SPLAT  : WInst<"splat_lane", ".(!q)I",
-                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl", 
+                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl",
                     [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
-                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl", 
+                   "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl",
                    [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
 let TargetGuard = "bf16,neon" in {
@@ -404,19 +404,19 @@ def VQRSHL : SInst<"vqrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
 let isShift = 1 in {
 
 
-def VSHR_N     : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+def VSHR_N     : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
                       [ImmCheck<1, ImmCheckShiftRight>]>;
-def VSHL_N     : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+def VSHL_N     : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
                       [ImmCheck<1, ImmCheckShiftLeft>]>;
-def VRSHR_N    : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+def VRSHR_N    : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
                       [ImmCheck<1, ImmCheckShiftRight>]>;
-def VSRA_N     : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+def VSRA_N     : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
                       [ImmCheck<2, ImmCheckShiftRight>]>;
-def VRSRA_N    : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+def VRSRA_N    : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
                       [ImmCheck<2, ImmCheckShiftRight>]>;
-def VQSHL_N    : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", 
+def VQSHL_N    : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
                       [ImmCheck<1, ImmCheckShiftLeft>]>;
-def VQSHLU_N   : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl", 
+def VQSHLU_N   : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl",
                       [ImmCheck<1, ImmCheckShiftLeft>]>;
 
 // Narrowing right shifts should have an immediate range of 1..(sizeinbits(arg)/2).
@@ -442,16 +442,16 @@ def VQRSHRN_N  : SInst<"vqrshrn_n", "<QI", "silUsUiUl",
 // base type, so the range here is actually 0..(sizeinbits(arg)*2).
 // This cannot be rectified currently due to a use of vshll_n_s16 with an
 // out-of-bounds immediate in the defintiion of vcvt_f32_bf16.
-def VSHLL_N    : SInst<"vshll_n", "(>Q).I", "csiUcUsUi", 
+def VSHLL_N    : SInst<"vshll_n", "(>Q).I", "csiUcUsUi",
                       [ImmCheck<1, ImmCheckShiftLeft>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.13 Shifts with insert
 def VSRI_N : WInst<"vsri_n", "...I",
-                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs", 
+                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs",
                     [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 def VSLI_N : WInst<"vsli_n", "...I",
-                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs", 
+                   "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs",
                    [ImmCheck<2, ImmCheckShiftLeft, 0>]>;
 }
 
@@ -552,14 +552,14 @@ def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh",
 // E.3.16 Extract lanes from a vector
 let InstName = "vmov" in
 def VGET_LANE : IInst<"vget_lane", "1.I",
-                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", 
+                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
                       [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.17 Set lanes within a vector
 let InstName = "vmov" in
 def VSET_LANE : IInst<"vset_lane", ".1.I",
-                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", 
+                      "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
                       [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -609,9 +609,9 @@ let ArchGuard = "(__ARM_FP & 2)" in {
 def VCVT_S32     : SInst<"vcvt_s32", "S.",  "fQf">;
 def VCVT_U32     : SInst<"vcvt_u32", "U.",  "fQf">;
 def VCVT_F32     : SInst<"vcvt_f32", "F(.!)",  "iUiQiQUi">;
-def VCVT_N_S32   : SInst<"vcvt_n_s32", "S.I", "fQf", 
+def VCVT_N_S32   : SInst<"vcvt_n_s32", "S.I", "fQf",
                         [ImmCheck<1, ImmCheck1_32>]>;
-def VCVT_N_U32   : SInst<"vcvt_n_u32", "U.I", "fQf", 
+def VCVT_N_U32   : SInst<"vcvt_n_u32", "U.I", "fQf",
                         [ImmCheck<1, ImmCheck1_32>]>;
 def VCVT_N_F32   : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi", 
                         [ImmCheck<1, ImmCheck1_32>]>;
@@ -682,7 +682,7 @@ def VQDMLSL_N     : SOpInst<"vqdmlsl_n", "(>Q)(>Q).1", "si", OP_QDMLSL_N>;
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.26 Vector Extract
 def VEXT : WInst<"vext", "...I",
-                 "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf", 
+                 "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf",
                  [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -792,21 +792,21 @@ def ST1_X2 : WInst<"vst1_x2", "v*(2!)", "dQdPlQPl">;
 def ST1_X3 : WInst<"vst1_x3", "v*(3!)", "dQdPlQPl">;
 def ST1_X4 : WInst<"vst1_x4", "v*(4!)", "dQdPlQPl">;
 
-def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl", 
+def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl",
                     [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl",
                     [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
-def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl",
                     [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
-def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl",
                     [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
-def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl", 
+def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl",
                     [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl",
                     [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
-def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl",
                     [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
-def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl", 
+def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl",
                     [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 
 def LD1_DUP  : WInst<"vld1_dup", ".(c*!)", "dQdPlQPl">;
@@ -987,9 +987,9 @@ def VMOVL_HIGH   : SOpInst<"vmovl_high", ">.", "HcHsHiHUcHUsHUi", OP_MOVL_HI>;
 
 def CVTF_N_F64   : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl", 
                         [ImmCheck<1, ImmCheck1_64>]>;
-def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd", 
+def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd",
                         [ImmCheck<1, ImmCheck1_64>]>;
-def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd", 
+def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd",
                         [ImmCheck<1, ImmCheck1_64>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1028,9 +1028,9 @@ let TargetGuard = "aes,neon" in {
 
 ////////////////////////////////////////////////////////////////////////////////
 // Extract or insert element from vector
-def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl", 
+def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl",
                       [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
-def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl", 
+def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl",
                       [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def COPY_LANE : IOpInst<"vcopy_lane", "..I.I",
                         "csilUcUsUiUlPcPsPlfd", OP_COPY_LN>;
@@ -1110,9 +1110,9 @@ def VQDMULL_HIGH_LANE   : SOpInst<"vqdmull_high_lane", "(>Q)Q.I", "si",
                                   OP_QDMULLHi_LN>;
 def VQDMULL_HIGH_LANEQ  : SOpInst<"vqdmull_high_laneq", "(>Q)QQI", "si",
                                   OP_QDMULLHi_LN>;
-def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi", 
+def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi",
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi", 
+def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi",
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a,neon" in {
@@ -1343,67 +1343,67 @@ def SCALAR_RSHL: SInst<"vrshl", "11(S1)", "SlSUl">;
 // Scalar Shift (Immediate)
 let isScalarShift = 1 in {
 // Signed/Unsigned Shift Right (Immediate)
-def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl", 
+def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl",
                         [ImmCheck<1, ImmCheckShiftRight, 0>]>;
 // Signed/Unsigned Rounding Shift Right (Immediate)
-def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl", 
+def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl",
                           [ImmCheck<1, ImmCheckShiftRight, 0>]>;
 
 // Signed/Unsigned Shift Right and Accumulate (Immediate)
-def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl", 
+def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl",
                         [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 // Signed/Unsigned Rounding Shift Right and Accumulate (Immediate)
-def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl", 
+def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl",
                         [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 
 // Shift Left (Immediate)
-def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl", 
+def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl",
                       [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
 // Signed/Unsigned Saturating Shift Left (Immediate)
-def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl", 
+def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl",
                       [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
 // Signed Saturating Shift Left Unsigned (Immediate)
-def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl", 
+def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl",
                       [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
 
 // Shift Right And Insert (Immediate)
-def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl", 
+def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl",
                         [ImmCheck<2, ImmCheckShiftRight, 0>]>;
 // Shift Left And Insert (Immediate)
-def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl", 
+def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl",
                         [ImmCheck<2, ImmCheckShiftLeft, 0>]>;
 
 let isScalarNarrowShift = 1 in {
   // Signed/Unsigned Saturating Shift Right Narrow (Immediate)
-  def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl", 
+  def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl",
                             [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
   // Signed/Unsigned Saturating Rounded Shift Right Narrow (Immediate)
-  def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl", 
+  def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl",
                             [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
   // Signed Saturating Shift Right Unsigned Narrow (Immediate)
-  def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<U)1I", "SsSiSl", 
+  def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<U)1I", "SsSiSl",
                             [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
   // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
-  def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<U)1I", "SsSiSl", 
+  def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<U)1I", "SsSiSl",
                             [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 // Scalar Signed/Unsigned Fixed-point Convert To Floating-Point (Immediate)
-def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi", 
+def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi",
                               [ImmCheck<1, ImmCheck1_32>]>;
-def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl", 
+def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl",
                               [ImmCheck<1, ImmCheck1_64>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Scalar Floating-point Convert To Signed/Unsigned Fixed-point (Immediate)
-def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf", 
+def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf",
                                 [ImmCheck<1, ImmCheck1_32>]>;
-def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf", 
+def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf",
                                 [ImmCheck<1, ImmCheck1_32>]>;
-def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd", 
+def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd",
                                 [ImmCheck<1, ImmCheck1_64>]>;
-def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd", 
+def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd",
                                 [ImmCheck<1, ImmCheck1_64>]>;
 }
 
@@ -1606,11 +1606,11 @@ def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_L
 def SCALAR_VMUL_N : IInst<"vmul_n", "..1", "d">;
 
 // VMUL_LANE_A64 d type implemented using scalar mul lane
-def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d", 
+def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d",
                             [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 
 // VMUL_LANEQ d type implemented using scalar mul lane
-def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d", 
+def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d",
                               [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 // VMULX_LANE d type implemented using scalar vmulx_lane
 def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "d", OP_SCALAR_VMULX_LN>;
@@ -1618,9 +1618,9 @@ def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "d", OP_SCALAR_VMULX_LN>;
 // VMULX_LANEQ d type implemented using scalar vmulx_laneq
 def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ>;
 // Scalar Floating Point fused multiply-add (scalar, by element)
-def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd", 
+def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd",
                             [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
-def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd", 
+def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd",
                             [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
 // Scalar Floating Point fused multiply-subtract (scalar, by element)
@@ -1632,15 +1632,15 @@ def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "(1>)1.I", "SsSi", OP_SCALAR_Q
 def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR_QDMULL_LN>;
 
 // Signed Saturating Doubling Multiply-Add Long (scalar by element)
-def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi", 
+def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi",
                                 [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
-def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi", 
+def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi",
                                 [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
 // Signed Saturating Doubling Multiply-Subtract Long (scalar by element)
-def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi", 
+def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi",
                               [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
-def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi", 
+def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi",
                               [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 // Scalar Integer Saturating Doubling Multiply Half High (scalar by element)
 def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "11.I", "SsSi", OP_SCALAR_QDMULH_LN>;
@@ -1659,9 +1659,9 @@ def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "111.I", "SsSi", OP_SCALAR_Q
 def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN>;
 } // TargetGuard = "v8.1a"
 
-def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
+def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs",
                             [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
-def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", 
+def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs",
                             [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
 } // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)"
@@ -1815,17 +1815,17 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   // ARMv8.2-A FP16 lane vector intrinsics.
 
   // FMA lane
-  def VFMA_LANEH   : IInst<"vfma_lane", "...qI", "hQh", 
+  def VFMA_LANEH   : IInst<"vfma_lane", "...qI", "hQh",
                           [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
-  def VFMA_LANEQH  : IInst<"vfma_laneq", "...QI", "hQh", 
+  def VFMA_LANEQH  : IInst<"vfma_laneq", "...QI", "hQh",
                           [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
   // FMA lane with scalar argument
   def FMLA_NH      : SOpInst<"vfma_n", "...1", "hQh", OP_FMLA_N>;
   // Scalar floating point fused multiply-add (scalar, by element)
-  def SCALAR_FMLA_LANEH  : IInst<"vfma_lane", "111.I", "Sh", 
+  def SCALAR_FMLA_LANEH  : IInst<"vfma_lane", "111.I", "Sh",
                                 [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
-  def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh", 
+  def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh",
                                 [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 
   // FMS lane
@@ -1847,9 +1847,9 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   def VMULX_LANEQH  : IOpInst<"vmulx_laneq", "..QI", "hQh", OP_MULX_LN>;
   def VMULX_NH      : IOpInst<"vmulx_n", "..1", "hQh", OP_MULX_N>;
   // Scalar floating point  mulx (scalar, by element)
-  def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh", 
+  def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh",
                                 [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-  def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh", 
+  def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh",
                                 [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
   // ARMv8.2-A FP16 reduction vector intrinsics.
   def VMAXVH   : SInst<"vmaxv", "1.", "hQh">;
@@ -1940,18 +1940,16 @@ multiclass VCMLA_ROTS<string type, string lanety, string laneqty> {
     // vcmla{ROT}_lane
     def : SOpInst<"vcmla" # ROT # "_lane", "...qI", type, Op<(call "vcmla" # ROT, $p0, $p1,
            (bitcast $p0, (dup_typed lanety , (call "vget_lane", (bitcast lanety, $p2), $p3))))>>;
-
     // vcmlaq{ROT}_lane
     def : SOpInst<"vcmla" # ROT # "_lane", "...qI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
            (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast lanety, $p2), $p3))))>>;
 
-    // vcmla_{ROT}_laneq
+    // vcmla{ROT}_laneq
     def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type,  Op<(call "vcmla" # ROT, $p0, $p1,
             (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
     // vcmlaq{ROT}_laneq
     def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
             (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
-
   }
 }
 
@@ -1995,13 +1993,13 @@ let TargetGuard = "bf16,neon" in {
   def VGET_HIGH_BF : NoTestOpInst<"vget_high", ".Q", "b", OP_HI>;
   def VGET_LOW_BF  : NoTestOpInst<"vget_low", ".Q", "b", OP_LO>;
 
-  def VGET_LANE_BF : IInst<"vget_lane", "1.I", "bQb", 
+  def VGET_LANE_BF : IInst<"vget_lane", "1.I", "bQb",
                           [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
-  def VSET_LANE_BF : IInst<"vset_lane", ".1.I", "bQb", 
+  def VSET_LANE_BF : IInst<"vset_lane", ".1.I", "bQb",
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-  def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb", 
+  def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb",
                           [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
-  def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb", 
+  def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb",
                           [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
   def VLD1_BF : WInst<"vld1", ".(c*!)", "bQb">;
@@ -2022,21 +2020,21 @@ let TargetGuard = "bf16,neon" in {
   def VST1_X3_BF : WInst<"vst1_x3", "v*(3!)", "bQb">;
   def VST1_X4_BF : WInst<"vst1_x4", "v*(4!)", "bQb">;
 
-  def VLD1_LANE_BF : WInst<"vld1_lane", ".(c*!).I", "bQb", 
+  def VLD1_LANE_BF : WInst<"vld1_lane", ".(c*!).I", "bQb",
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-  def VLD2_LANE_BF : WInst<"vld2_lane", "2(c*!)2I", "bQb", 
+  def VLD2_LANE_BF : WInst<"vld2_lane", "2(c*!)2I", "bQb",
                           [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
-  def VLD3_LANE_BF : WInst<"vld3_lane", "3(c*!)3I", "bQb", 
+  def VLD3_LANE_BF : WInst<"vld3_lane", "3(c*!)3I", "bQb",
                           [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
-  def VLD4_LANE_BF : WInst<"vld4_lane", "4(c*!)4I", "bQb", 
+  def VLD4_LANE_BF : WInst<"vld4_lane", "4(c*!)4I", "bQb",
                           [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
-  def VST1_LANE_BF : WInst<"vst1_lane", "v*(.!)I", "bQb", 
+  def VST1_LANE_BF : WInst<"vst1_lane", "v*(.!)I", "bQb",
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-  def VST2_LANE_BF : WInst<"vst2_lane", "v*(2!)I", "bQb", 
+  def VST2_LANE_BF : WInst<"vst2_lane", "v*(2!)I", "bQb",
                           [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
   def VST3_LANE_BF : WInst<"vst3_lane", "v*(3!)I", "bQb",
                           [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
-  def VST4_LANE_BF : WInst<"vst4_lane", "v*(4!)I", "bQb", 
+  def VST4_LANE_BF : WInst<"vst4_lane", "v*(4!)I", "bQb",
                           [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 
   def VLD1_DUP_BF : WInst<"vld1_dup", ".(c*!)", "bQb">;
@@ -2089,7 +2087,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "b
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "rcpc3,neon" in {
   def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl",
                         [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-  def VSTL1_LANE  : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl", 
+  def VSTL1_LANE  : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl",
                         [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 }
 
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 34a3739001ee71..d628a88da56ea3 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -462,7 +462,6 @@ class Intrinsic {
 
   // Return if the supplied argument is an immediate
   bool isArgImmediate(unsigned idx) const {
-    assert((idx + 1) < Types.size() && "Argument type index out of range!");
     return Types[idx + 1].isImmediate();
   }
 
@@ -2180,8 +2179,9 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
   OS << "#endif\n\n";
 }
 
-bool NeonEmitter::areCompatableRangeChecks(const ArrayRef<ImmCheck> ChecksA,
-                                           const ArrayRef<ImmCheck> ChecksB) {
+inline bool
+NeonEmitter::areCompatableRangeChecks(const ArrayRef<ImmCheck> ChecksA,
+                                      const ArrayRef<ImmCheck> ChecksB) {
   // If multiple intrinsics map to the same builtin, we must ensure that the
   // intended range checks performed in SemaArm.cpp do not contradict eachother,
   // as these are emitted once per-buitlin.

>From 2c571f4c5731299ee482bc1ba4fefe3be4ee0f56 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Thu, 29 Aug 2024 16:34:46 +0000
Subject: [PATCH 14/17] Unify NEON immediate range checking tests

- Changes
	- move existing tests to new dedicated directory
	- Fix NeonEmitter typos
	- Change vector store test prefix to 'test_vector_store'
	- Remove trailing whitespace in arm_neon.td
---
 clang/include/clang/Basic/arm_neon.td         |   4 +-
 clang/test/CodeGen/arm-neon-range-checks.c    | 426 ------------------
 clang/test/Sema/aarch64-neon-bf16-ranges.c    |  49 --
 clang/test/Sema/aarch64-neon-fp16-ranges.c    |  66 ---
 .../aarch64-neon-immediate-ranges/bfloat16.c  |  37 +-
 .../aarch64-neon-immediate-ranges/dotprod.c   |  23 +-
 .../extract-vector-from-vectors.c             |  40 +-
 .../fp16-scalar.c                             |  63 ++-
 .../fused-multiply-accumulate.c               |  17 +-
 .../saturating-multiply-accumulate.c          |  51 ++-
 .../saturating-multiply-by-scalar-and-widen.c |  67 ++-
 .../set-lanes-to-value.c                      |  24 +-
 .../sqrdmlah-ranges.c                         |  46 +-
 .../vector-load.c                             | 179 +++++++-
 .../vector-multiply-accumulate-by-scalar.c    |  47 +-
 .../vector-multiply-by-scalar-and-widen.c     |  24 +-
 .../vector-multiply-by-scalar.c               |  10 +-
 .../vector-multiply-subtract-by-scalar.c      |  46 +-
 .../vector-store.c                            | 242 ++++++++--
 clang/test/Sema/aarch64-neon-ranges.c         | 220 ---------
 clang/utils/TableGen/NeonEmitter.cpp          |  10 +-
 21 files changed, 840 insertions(+), 851 deletions(-)
 delete mode 100644 clang/test/CodeGen/arm-neon-range-checks.c
 delete mode 100644 clang/test/Sema/aarch64-neon-bf16-ranges.c
 delete mode 100644 clang/test/Sema/aarch64-neon-fp16-ranges.c
 delete mode 100644 clang/test/Sema/aarch64-neon-ranges.c

diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 351a3a54d596e2..3760113fcbe494 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -291,9 +291,9 @@ def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
                    [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 
 let TargetGuard = "bf16,neon" in {
-  def SPLAT_BF  : WInst<"splat_lane", ".(!q)I", "bQb", 
+  def SPLAT_BF  : WInst<"splat_lane", ".(!q)I", "bQb",
                       [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
-  def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb", 
+  def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb",
                       [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 }
 
diff --git a/clang/test/CodeGen/arm-neon-range-checks.c b/clang/test/CodeGen/arm-neon-range-checks.c
deleted file mode 100644
index 360ff6be16654e..00000000000000
--- a/clang/test/CodeGen/arm-neon-range-checks.c
+++ /dev/null
@@ -1,426 +0,0 @@
-// RUN: %clang_cc1 -triple arm64-none-eabi -target-feature +neon -target-feature +dotprod -target-feature +v8.1a -verify %s
-// RUN: %clang_cc1 -triple armv8.1a-none-eabi -target-feature +neon -target-feature +dotprod -target-feature +v8.1a -verify %s
-
-// REQUIRES: aarch64-registered-target || arm-registered-target
-
-#include <arm_neon.h>
-
-void test_vdot_lane(int32x2_t r, int8x8_t a, int8x8_t b) {
-  vdot_lane_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vdot_lane_s32(r, a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vdot_lane_s32(r, a, b, 0);
-  vdot_lane_s32(r, a, b, 1);
-}
-
-void test_vdotq_lane(int32x4_t r, int8x16_t a, int8x8_t b) {
-  vdotq_lane_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vdotq_lane_s32(r, a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vdotq_lane_s32(r, a, b, 0);
-  vdotq_lane_s32(r, a, b, 1);
-}
-
-#if defined(__aarch64__)
-void test_vdot_laneq(int32x2_t r, int8x8_t a, int8x16_t b) {
-  vdot_laneq_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vdot_laneq_s32(r, a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vdot_laneq_s32(r, a, b, 0);
-  vdot_laneq_s32(r, a, b, 3);
-}
-
-void test_vdotq_laneq(int32x4_t r, int8x16_t a, int8x16_t b) {
-  vdotq_laneq_s32(r, a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vdotq_laneq_s32(r, a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vdotq_laneq_s32(r, a, b, 0);
-  vdotq_laneq_s32(r, a, b, 3);
-}
-#endif
-
-void test_vdup_lane(int32x2_t v) {
-  vdup_lane_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vdup_lane_s32(v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vdup_lane_s32(v, 0);
-  vdup_lane_s32(v, 1);
-}
-
-void test_vdupq_lane(int32x2_t v) {
-  vdupq_lane_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vdupq_lane_s32(v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vdupq_lane_s32(v, 0);
-  vdupq_lane_s32(v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vdup_laneq(int32x4_t v) {
-  vdup_laneq_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vdup_laneq_s32(v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vdup_laneq_s32(v, 0);
-  vdup_laneq_s32(v, 3);
-}
-
-void test_vdupq_laneq(int32x4_t v) {
-  vdupq_laneq_s32(v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vdupq_laneq_s32(v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vdupq_laneq_s32(v, 0);
-  vdupq_laneq_s32(v, 3);
-}
-#endif
-
-void test_vmla_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
-  vmla_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmla_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmla_lane_s32(a, b, v, 0);
-  vmla_lane_s32(a, b, v, 1);
-}
-
-void test_vmlaq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
-  vmlaq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmlaq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmlaq_lane_s32(a, b, v, 0);
-  vmlaq_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vmla_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
-  vmla_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmla_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmla_laneq_s32(a, b, v, 0);
-  vmla_laneq_s32(a, b, v, 3);
-}
-
-void test_vmlaq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
-  vmlaq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmlaq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmlaq_laneq_s32(a, b, v, 0);
-  vmlaq_laneq_s32(a, b, v, 3);
-}
-
-void test_vmlal_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
-  vmlal_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmlal_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmlal_high_lane_s32(a, b, v, 0);
-  vmlal_high_lane_s32(a, b, v, 1);
-}
-
-void test_vmlal_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
-  vmlal_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmlal_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmlal_high_laneq_s32(a, b, v, 0);
-  vmlal_high_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vmlal_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
-  vmlal_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmlal_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmlal_lane_s32(a, b, v, 0);
-  vmlal_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vmlal_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
-  vmlal_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmlal_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmlal_laneq_s32(a, b, v, 0);
-  vmlal_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vmls_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
-  vmls_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmls_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmls_lane_s32(a, b, v, 0);
-  vmls_lane_s32(a, b, v, 1);
-}
-
-void test_vmlsq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
-  vmlsq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmlsq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmlsq_lane_s32(a, b, v, 0);
-  vmlsq_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vmls_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
-  vmls_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmls_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmls_laneq_s32(a, b, v, 0);
-  vmls_laneq_s32(a, b, v, 3);
-}
-
-void test_vmlsq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
-  vmlsq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmlsq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmlsq_laneq_s32(a, b, v, 0);
-  vmlsq_laneq_s32(a, b, v, 3);
-}
-
-void test_vmlsl_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
-  vmlsl_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmlsl_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmlsl_high_lane_s32(a, b, v, 0);
-  vmlsl_high_lane_s32(a, b, v, 1);
-}
-
-void test_vmlsl_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
-  vmlsl_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmlsl_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmlsl_high_laneq_s32(a, b, v, 0);
-  vmlsl_high_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vmlsl_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
-  vmlsl_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmlsl_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmlsl_lane_s32(a, b, v, 0);
-  vmlsl_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vmlsl_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
-  vmlsl_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmlsl_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmlsl_laneq_s32(a, b, v, 0);
-  vmlsl_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vmull_lane(int32x2_t a, int32x2_t b) {
-  vmull_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmull_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmull_lane_s32(a, b, 0);
-  vmull_lane_s32(a, b, 1);
-}
-
-#if defined(__aarch64__)
-void test_vmull_laneq(int32x2_t a, int32x4_t b) {
-  vmull_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmull_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmull_laneq_s32(a, b, 0);
-  vmull_laneq_s32(a, b, 3);
-}
-
-void test_vmull_high_lane(int32x4_t a, int32x2_t b) {
-  vmull_high_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vmull_high_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vmull_high_lane_s32(a, b, 0);
-  vmull_high_lane_s32(a, b, 1);
-}
-
-void test_vmull_high_laneq(int32x4_t a, int32x4_t b) {
-  vmull_high_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vmull_high_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vmull_high_laneq_s32(a, b, 0);
-  vmull_high_laneq_s32(a, b, 3);
-}
-
-void test_vqdmlal_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
-  vqdmlal_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmlal_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmlal_high_lane_s32(a, b, v, 0);
-  vqdmlal_high_lane_s32(a, b, v, 1);
-}
-
-void test_vqdmlal_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
-  vqdmlal_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmlal_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmlal_high_laneq_s32(a, b, v, 0);
-  vqdmlal_high_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vqdmlal_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
-  vqdmlal_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmlal_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmlal_lane_s32(a, b, v, 0);
-  vqdmlal_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vqdmlal_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
-  vqdmlal_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmlal_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmlal_laneq_s32(a, b, v, 0);
-  vqdmlal_laneq_s32(a, b, v, 3);
-}
-
-void test_vqdmlsl_high_lane(int64x2_t a, int32x4_t b, int32x2_t v) {
-  vqdmlsl_high_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmlsl_high_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmlsl_high_lane_s32(a, b, v, 0);
-  vqdmlsl_high_lane_s32(a, b, v, 1);
-}
-
-void test_vqdmlsl_high_laneq(int64x2_t a, int32x4_t b, int32x4_t v) {
-  vqdmlsl_high_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmlsl_high_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmlsl_high_laneq_s32(a, b, v, 0);
-  vqdmlsl_high_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vqdmlsl_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
-  vqdmlsl_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmlsl_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmlsl_lane_s32(a, b, v, 0);
-  vqdmlsl_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vqdmlsl_laneq(int64x2_t a, int32x2_t b, int32x4_t v) {
-  vqdmlsl_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmlsl_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmlsl_laneq_s32(a, b, v, 0);
-  vqdmlsl_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vqdmulh_lane(int32x2_t a, int32x2_t b) {
-  vqdmulh_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmulh_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmulh_lane_s32(a, b, 0);
-  vqdmulh_lane_s32(a, b, 1);
-}
-
-void test_vqdmulhq_lane(int32x4_t a, int32x2_t b) {
-  vqdmulhq_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmulhq_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmulhq_lane_s32(a, b, 0);
-  vqdmulhq_lane_s32(a, b, 1);
-}
-
-#if defined(__aarch64__)
-void test_vqdmulh_laneq(int32x2_t a, int32x4_t b) {
-  vqdmulh_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmulh_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmulh_laneq_s32(a, b, 0);
-  vqdmulh_laneq_s32(a, b, 3);
-}
-
-void test_vqdmulhq_laneq(int32x4_t a, int32x4_t b) {
-  vqdmulhq_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmulhq_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmulhq_laneq_s32(a, b, 0);
-  vqdmulhq_laneq_s32(a, b, 3);
-}
-
-void test_vqdmull_high_lane(int32x4_t a, int32x2_t b) {
-  vqdmull_high_lane_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmull_high_lane_s32(a, b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmull_high_lane_s32(a, b, 0);
-  vqdmull_high_lane_s32(a, b, 1);
-}
-
-void test_vqdmull_high_laneq(int32x4_t a, int32x4_t b) {
-  vqdmull_high_laneq_s32(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmull_high_laneq_s32(a, b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmull_high_laneq_s32(a, b, 0);
-  vqdmull_high_laneq_s32(a, b, 3);
-}
-#endif
-
-void test_vqdmull_lane(int32x2_t a, int32x2_t v) {
-  vqdmull_lane_s32(a, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqdmull_lane_s32(a, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqdmull_lane_s32(a, v, 0);
-  vqdmull_lane_s32(a, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vqdmull_laneq(int32x2_t a, int32x4_t v) {
-  vqdmull_laneq_s32(a, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqdmull_laneq_s32(a, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqdmull_laneq_s32(a, v, 0);
-  vqdmull_laneq_s32(a, v, 3);
-}
-#endif
-
-void test_vqrdmlah_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
-  vqrdmlah_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqrdmlah_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqrdmlah_lane_s32(a, b, v, 0);
-  vqrdmlah_lane_s32(a, b, v, 1);
-}
-
-void test_vqrdmlahq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
-  vqrdmlahq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqrdmlahq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqrdmlahq_lane_s32(a, b, v, 0);
-  vqrdmlahq_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vqrdmlah_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
-  vqrdmlah_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqrdmlah_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqrdmlah_laneq_s32(a, b, v, 0);
-  vqrdmlah_laneq_s32(a, b, v, 3);
-}
-
-void test_vqrdmlahq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
-  vqrdmlahq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqrdmlahq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqrdmlahq_laneq_s32(a, b, v, 0);
-  vqrdmlahq_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vqrdmlsh_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
-  vqrdmlsh_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqrdmlsh_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqrdmlsh_lane_s32(a, b, v, 0);
-  vqrdmlsh_lane_s32(a, b, v, 1);
-}
-
-void test_vqrdmlshq_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
-  vqrdmlshq_lane_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqrdmlshq_lane_s32(a, b, v, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqrdmlshq_lane_s32(a, b, v, 0);
-  vqrdmlshq_lane_s32(a, b, v, 1);
-}
-
-#if defined(__aarch64__)
-void test_vqrdmlsh_laneq(int32x2_t a, int32x2_t b, int32x4_t v) {
-  vqrdmlsh_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqrdmlsh_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqrdmlsh_laneq_s32(a, b, v, 0);
-  vqrdmlsh_laneq_s32(a, b, v, 3);
-}
-
-void test_vqrdmlshq_laneq(int32x4_t a, int32x4_t b, int32x4_t v) {
-  vqrdmlshq_laneq_s32(a, b, v, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqrdmlshq_laneq_s32(a, b, v, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqrdmlshq_laneq_s32(a, b, v, 0);
-  vqrdmlshq_laneq_s32(a, b, v, 3);
-}
-#endif
-
-void test_vqrdmulh_lane(int32x2_t a, int32x2_t v) {
-  vqrdmulh_lane_s32(a, v,  -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqrdmulh_lane_s32(a, v,  2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqrdmulh_lane_s32(a, v,  0);
-  vqrdmulh_lane_s32(a, v,  1);
-}
-
-void test_vqrdmulhq_lane(int32x4_t a, int32x2_t v) {
-  vqrdmulhq_lane_s32(a, v,  -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
-  vqrdmulhq_lane_s32(a, v,  2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
-  vqrdmulhq_lane_s32(a, v,  0);
-  vqrdmulhq_lane_s32(a, v,  1);
-}
-
-#if defined(__aarch64__)
-void test_vqrdmulh_laneq(int32x2_t a, int32x4_t v) {
-  vqrdmulh_laneq_s32(a, v,  -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqrdmulh_laneq_s32(a, v,  4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqrdmulh_laneq_s32(a, v,  0);
-  vqrdmulh_laneq_s32(a, v,  3);
-}
-
-void test_vqrdmulhq_laneq(int32x4_t a, int32x4_t v) {
-  vqrdmulhq_laneq_s32(a, v,  -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  vqrdmulhq_laneq_s32(a, v,  4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  vqrdmulhq_laneq_s32(a, v,  0);
-  vqrdmulhq_laneq_s32(a, v,  3);
-}
-#endif
diff --git a/clang/test/Sema/aarch64-neon-bf16-ranges.c b/clang/test/Sema/aarch64-neon-bf16-ranges.c
deleted file mode 100644
index 88e6c50c593820..00000000000000
--- a/clang/test/Sema/aarch64-neon-bf16-ranges.c
+++ /dev/null
@@ -1,49 +0,0 @@
-// RUN: %clang_cc1 -fsyntax-only -verify \
-// RUN: -triple aarch64 -target-feature +neon \
-// RUN: -target-feature +bf16 %s
-
-// REQUIRES: aarch64-registered-target || arm-registered-target
-
-#include <arm_neon.h>
-
-int x;
-
-void test_vcopy_lane_bf16(bfloat16x4_t a, bfloat16x8_t b) {
-  // 0 <= lane1 <= 3; 0 <= lane2 <= 3
-  (void)vcopy_lane_bf16(a, 3, a, 3);
-  (void)vcopy_lane_bf16(a, 0, a, 4);    // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  (void)vcopy_lane_bf16(a, 1, a, -1);   // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  (void)vcopy_lane_bf16(a, 4, a, 0);    // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  (void)vcopy_lane_bf16(a, -1, a, 1);   // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  (void)vcopy_lane_bf16(a, 0, a, x);    // expected-error-re {{argument {{.*}} must be a constant integer}}
-  (void)vcopy_lane_bf16(a, x, a, 0);    // expected-error-re {{argument {{.*}} must be a constant integer}}
-
-  // 0 <= lane1 <= 7; 0 <= lane2 <= 3
-  (void)vcopyq_lane_bf16(b, 7, a, 3);
-  (void)vcopyq_lane_bf16(b, 0, a, 4);   // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  (void)vcopyq_lane_bf16(b, 1, a, -1);  // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  (void)vcopyq_lane_bf16(b, 8, a, 0);   // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  (void)vcopyq_lane_bf16(b, -1, a, 1);  // expected-error {{argument value -1 is outside the valid range [0, 7]}}
-  (void)vcopyq_lane_bf16(b, 0, a, x);   // expected-error-re {{argument {{.*}} must be a constant integer}}
-  (void)vcopyq_lane_bf16(b, x, a, 0);   // expected-error-re {{argument {{.*}} must be a constant integer}}
-
-  // 0 <= lane1 <= 3; 0 <= lane2 <= 7
-  (void)vcopy_laneq_bf16(a, 3, b, 7);
-  (void)vcopy_laneq_bf16(a, 0, b, 8);   // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  (void)vcopy_laneq_bf16(a, 1, b, -1);  // expected-error {{argument value -1 is outside the valid range [0, 7]}}
-  (void)vcopy_laneq_bf16(a, 4, b, 0);   // expected-error {{argument value 4 is outside the valid range [0, 3]}}
-  (void)vcopy_laneq_bf16(a, -1, b, 1);  // expected-error {{argument value -1 is outside the valid range [0, 3]}}
-  (void)vcopy_laneq_bf16(a, 0, b, x);   // expected-error-re {{argument {{.*}} must be a constant integer}}
-  (void)vcopy_laneq_bf16(a, x, b, 0);   // expected-error-re {{argument {{.*}} must be a constant integer}}
-
-
-  // 0 <= lane1 <= 7; 0 <= lane2 <= 7
-  (void)vcopyq_laneq_bf16(b, 7, b, 7);
-  (void)vcopyq_laneq_bf16(b, 0, b, 8);  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  (void)vcopyq_laneq_bf16(b, 1, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}}
-  (void)vcopyq_laneq_bf16(b, 8, b, 0);  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  (void)vcopyq_laneq_bf16(b, -1, b, 1); // expected-error {{argument value -1 is outside the valid range [0, 7]}}
-  (void)vcopyq_laneq_bf16(b, 0, b, x);  // expected-error-re {{argument {{.*}} must be a constant integer}}
-  (void)vcopyq_laneq_bf16(b, x, b, 0);  // expected-error-re {{argument {{.*}} must be a constant integer}}
-}
-
diff --git a/clang/test/Sema/aarch64-neon-fp16-ranges.c b/clang/test/Sema/aarch64-neon-fp16-ranges.c
deleted file mode 100644
index cb273eb56160b8..00000000000000
--- a/clang/test/Sema/aarch64-neon-fp16-ranges.c
+++ /dev/null
@@ -1,66 +0,0 @@
-// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -target-feature +fullfp16 -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +fullfp16 -target-feature +neon -ffreestanding -fsyntax-only -verify %s
-
-// REQUIRES: aarch64-registered-target || arm-registered-target
-
-#include <arm_neon.h>
-#include <arm_fp16.h>
-
-void test_vcvt_f16_16(int16_t a){
-  vcvth_n_f16_s16(a, 1);
-  vcvth_n_f16_s16(a, 16);
-  vcvth_n_f16_s16(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_f16_s16(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vcvth_n_f16_u16(a, 1);
-  vcvth_n_f16_u16(a, 16);
-  vcvth_n_f16_u16(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_f16_u16(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-void test_vcvt_f16_32(int32_t a){
-  vcvth_n_f16_u32(a, 1);
-  vcvth_n_f16_u32(a, 16);
-  vcvth_n_f16_u32(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_f16_u32(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vcvth_n_f16_s32(a, 1);
-  vcvth_n_f16_s32(a, 16);
-  vcvth_n_f16_s32(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_f16_s32(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-void test_vcvt_f16_64(int64_t a){
-  vcvth_n_f16_s64(a, 1);
-  vcvth_n_f16_s64(a, 16);
-  vcvth_n_f16_s64(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_f16_s64(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-
-void test_vcvt_su_f(float16_t a){
-  vcvth_n_s16_f16(a, 1);
-  vcvth_n_s16_f16(a, 16);
-  vcvth_n_s16_f16(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_s16_f16(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vcvth_n_s32_f16(a, 1);
-  vcvth_n_s32_f16(a, 16);
-  vcvth_n_s32_f16(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_s32_f16(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vcvth_n_s64_f16(a, 1);
-  vcvth_n_s64_f16(a, 16);
-  vcvth_n_s64_f16(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_s64_f16(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vcvth_n_u16_f16(a, 1);
-  vcvth_n_u16_f16(a, 16);
-  vcvth_n_u16_f16(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_u16_f16(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vcvth_n_u32_f16(a, 1);
-  vcvth_n_u32_f16(a, 16);
-  vcvth_n_u32_f16(a, 0);  // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vcvth_n_u32_f16(a, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/bfloat16.c b/clang/test/Sema/aarch64-neon-immediate-ranges/bfloat16.c
index bde8efc260f172..485219a9f89787 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/bfloat16.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/bfloat16.c
@@ -4,8 +4,6 @@
 #include <arm_bf16.h>
 // REQUIRES: aarch64-registered-target
 
-// clang/test/Sema/aarch64-neon-bf16-ranges.c includes tests for:
-// vcopy_lane_bf16, vcopyq_lane_bf16, vcopy_laneq_bf16, vcopyq_laneq_bf16
 
 void test_set_all_lanes_to_the_same_value_bf16(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4) {
 	vdup_lane_bf16(arg_b16x4, 0);
@@ -66,6 +64,41 @@ void test_set_vector_lane_bf16(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4, b
 
 }
 
+void test_copy_vector_lane_bf16(bfloat16x8_t arg_b16x8, bfloat16x4_t arg_b16x4) {
+	vcopy_lane_bf16(arg_b16x4, 0, arg_b16x4, 0);
+	vcopy_lane_bf16(arg_b16x4, 3, arg_b16x4, 0);
+	vcopy_lane_bf16(arg_b16x4, -1, arg_b16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_bf16(arg_b16x4, 4, arg_b16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_bf16(arg_b16x4, 0, arg_b16x4, 3);
+	vcopy_lane_bf16(arg_b16x4, 0, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_lane_bf16(arg_b16x4, 0, arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_lane_bf16(arg_b16x8, 0, arg_b16x4, 0);
+	vcopyq_lane_bf16(arg_b16x8, 7, arg_b16x4, 0);
+	vcopyq_lane_bf16(arg_b16x8, -1, arg_b16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_bf16(arg_b16x8, 8, arg_b16x4, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_bf16(arg_b16x8, 0, arg_b16x4, 3);
+	vcopyq_lane_bf16(arg_b16x8, 0, arg_b16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_lane_bf16(arg_b16x8, 0, arg_b16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopy_laneq_bf16(arg_b16x4, 0, arg_b16x8, 0);
+	vcopy_laneq_bf16(arg_b16x4, 3, arg_b16x8, 0);
+	vcopy_laneq_bf16(arg_b16x4, -1, arg_b16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_bf16(arg_b16x4, 4, arg_b16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_bf16(arg_b16x4, 0, arg_b16x8, 7);
+	vcopy_laneq_bf16(arg_b16x4, 0, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopy_laneq_bf16(arg_b16x4, 0, arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcopyq_laneq_bf16(arg_b16x8, 0, arg_b16x8, 0);
+	vcopyq_laneq_bf16(arg_b16x8, 7, arg_b16x8, 0);
+	vcopyq_laneq_bf16(arg_b16x8, -1, arg_b16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_bf16(arg_b16x8, 8, arg_b16x8, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_bf16(arg_b16x8, 0, arg_b16x8, 7);
+	vcopyq_laneq_bf16(arg_b16x8, 0, arg_b16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcopyq_laneq_bf16(arg_b16x8, 0, arg_b16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
 void test_load_bf16(bfloat16x4_t arg_b16x4, bfloat16x4x4_t arg_b16x4x4, bfloat16x8x4_t arg_b16x8x4,
 					bfloat16x4x3_t arg_b16x4x3, bfloat16x8_t arg_b16x8, bfloat16_t* arg_b16_ptr,
 					bfloat16x8x3_t arg_b16x8x3, bfloat16x4x2_t arg_b16x4x2, bfloat16x8x2_t arg_b16x8x2) {
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/dotprod.c b/clang/test/Sema/aarch64-neon-immediate-ranges/dotprod.c
index 553ee096e490c7..11f2c660a8ff24 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/dotprod.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/dotprod.c
@@ -3,7 +3,6 @@
 #include <arm_neon.h>
 // REQUIRES: aarch64-registered-target
 
-// s32 variant is tested under clang/test/CodeGen/arm-neon-range-checks
 void test_dot_product_u32(uint8x8_t arg_u8x8, uint32x2_t arg_u32x2, uint8x16_t arg_u8x16, uint32x4_t arg_u32x4) {
 	vdot_lane_u32(arg_u32x2, arg_u8x8, arg_u8x8, 0);
 	vdot_lane_u32(arg_u32x2, arg_u8x8, arg_u8x8, 1);
@@ -27,3 +26,25 @@ void test_dot_product_u32(uint8x8_t arg_u8x8, uint32x2_t arg_u32x2, uint8x16_t a
 
 }
 
+void test_dot_product_s32(int32x2_t arg_i32x2, int8x16_t arg_i8x16, int8x8_t arg_i8x8, int32x4_t arg_i32x4) {
+	vdot_lane_s32(arg_i32x2, arg_i8x8, arg_i8x8, 0);
+	vdot_lane_s32(arg_i32x2, arg_i8x8, arg_i8x8, 1);
+	vdot_lane_s32(arg_i32x2, arg_i8x8, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdot_lane_s32(arg_i32x2, arg_i8x8, arg_i8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdotq_laneq_s32(arg_i32x4, arg_i8x16, arg_i8x16, 0);
+	vdotq_laneq_s32(arg_i32x4, arg_i8x16, arg_i8x16, 3);
+	vdotq_laneq_s32(arg_i32x4, arg_i8x16, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdotq_laneq_s32(arg_i32x4, arg_i8x16, arg_i8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdot_laneq_s32(arg_i32x2, arg_i8x8, arg_i8x16, 0);
+	vdot_laneq_s32(arg_i32x2, arg_i8x8, arg_i8x16, 3);
+	vdot_laneq_s32(arg_i32x2, arg_i8x8, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdot_laneq_s32(arg_i32x2, arg_i8x8, arg_i8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdotq_lane_s32(arg_i32x4, arg_i8x16, arg_i8x8, 0);
+	vdotq_lane_s32(arg_i32x4, arg_i8x16, arg_i8x8, 1);
+	vdotq_lane_s32(arg_i32x4, arg_i8x16, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdotq_lane_s32(arg_i32x4, arg_i8x16, arg_i8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/extract-vector-from-vectors.c b/clang/test/Sema/aarch64-neon-immediate-ranges/extract-vector-from-vectors.c
index a17df47eb98eaf..0453e56401a651 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/extract-vector-from-vectors.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/extract-vector-from-vectors.c
@@ -3,8 +3,18 @@
 #include <arm_neon.h>
 // REQUIRES: aarch64-registered-target
 
-// vext_s8, vextq_s8, vext_u8, vextq_u8, vext_p8, vextq_p8 are tested under
-// clang/test/Sema/aarch64-neon-ranges.c
+void test_extract_vector_from_a_pair_of_vectors_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
+	vext_s8(arg_i8x8, arg_i8x8, 0);
+	vext_s8(arg_i8x8, arg_i8x8, 7);
+	vext_s8(arg_i8x8, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_s8(arg_i8x8, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_s8(arg_i8x16, arg_i8x16, 0);
+	vextq_s8(arg_i8x16, arg_i8x16, 15);
+	vextq_s8(arg_i8x16, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_s8(arg_i8x16, arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
 
 void test_extract_vector_from_a_pair_of_vectors_s16(int16x8_t arg_i16x8, int16x4_t arg_i16x4) {
 	vext_s16(arg_i16x4, arg_i16x4, 0);
@@ -44,6 +54,19 @@ void test_extract_vector_from_a_pair_of_vectors_s64(int64x2_t arg_i64x2, int64x1
 
 }
 
+void test_extract_vector_from_a_pair_of_vectors_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vext_u8(arg_u8x8, arg_u8x8, 0);
+	vext_u8(arg_u8x8, arg_u8x8, 7);
+	vext_u8(arg_u8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_u8(arg_u8x8, arg_u8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_u8(arg_u8x16, arg_u8x16, 0);
+	vextq_u8(arg_u8x16, arg_u8x16, 15);
+	vextq_u8(arg_u8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_u8(arg_u8x16, arg_u8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
 void test_extract_vector_from_a_pair_of_vectors_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
 	vext_u16(arg_u16x4, arg_u16x4, 0);
 	vext_u16(arg_u16x4, arg_u16x4, 3);
@@ -119,6 +142,19 @@ void test_extract_vector_from_a_pair_of_vectors_f64(float64x2_t arg_f64x2, float
 
 }
 
+void test_extract_vector_from_a_pair_of_vectors_p8(poly8x8_t arg_p8x8, poly8x16_t arg_p8x16) {
+	vext_p8(arg_p8x8, arg_p8x8, 0);
+	vext_p8(arg_p8x8, arg_p8x8, 7);
+	vext_p8(arg_p8x8, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vext_p8(arg_p8x8, arg_p8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vextq_p8(arg_p8x16, arg_p8x16, 0);
+	vextq_p8(arg_p8x16, arg_p8x16, 15);
+	vextq_p8(arg_p8x16, arg_p8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vextq_p8(arg_p8x16, arg_p8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
 void test_extract_vector_from_a_pair_of_vectors_p16(poly16x8_t arg_p16x8, poly16x4_t arg_p16x4) {
 	vext_p16(arg_p16x4, arg_p16x4, 0);
 	vext_p16(arg_p16x4, arg_p16x4, 3);
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-scalar.c b/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-scalar.c
index 6f83169498fb76..3a90b445f358d4 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-scalar.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/fp16-scalar.c
@@ -4,9 +4,40 @@
 #include <arm_fp16.h>
 // REQUIRES: aarch64-registered-target
 
-// vcvth_n_f16_s16, vcvth_n_f16_s32, vcvth_n_f16_s64, vcvth_n_f16_u16, vcvth_n_f16_u32
-// vcvth_n_s16_f16, vcvth_n_s32_f16, vcvth_n_s64_f16, vcvth_n_u16_f16, vcvth_n_u32_f16
-// are tested under clang/test/Sema/aarch64-neon-fp16-ranges.c
+void test_conversions_s16(int16_t arg_i16) {
+	vcvth_n_f16_s16(arg_i16, 1);
+	vcvth_n_f16_s16(arg_i16, 16);
+	vcvth_n_f16_s16(arg_i16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_f16_s16(arg_i16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_conversions_s32(int32_t arg_i32) {
+	vcvth_n_f16_s32(arg_i32, 1);
+	vcvth_n_f16_s32(arg_i32, 16);
+	vcvth_n_f16_s32(arg_i32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_f16_s32(arg_i32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_conversions_s64(int64_t arg_i64) {
+	vcvth_n_f16_s64(arg_i64, 1);
+	vcvth_n_f16_s64(arg_i64, 16);
+	vcvth_n_f16_s64(arg_i64, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_f16_s64(arg_i64, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_conversions_u16(uint16_t arg_u16) {
+	vcvth_n_f16_u16(arg_u16, 1);
+	vcvth_n_f16_u16(arg_u16, 16);
+	vcvth_n_f16_u16(arg_u16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_f16_u16(arg_u16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_conversions_u32(uint32_t arg_u32) {
+	vcvth_n_f16_u32(arg_u32, 1);
+	vcvth_n_f16_u32(arg_u32, 16);
+	vcvth_n_f16_u32(arg_u32, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_f16_u32(arg_u32, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
 
 void test_conversions_u64(uint64_t arg_u64) {
 	vcvth_n_f16_u64(arg_u64, 1);
@@ -16,9 +47,33 @@ void test_conversions_u64(uint64_t arg_u64) {
 }
 
 void test_conversions_f16(float16_t arg_f16) {
+	vcvth_n_s16_f16(arg_f16, 1);
+	vcvth_n_s16_f16(arg_f16, 16);
+	vcvth_n_s16_f16(arg_f16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_s16_f16(arg_f16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvth_n_s32_f16(arg_f16, 1);
+	vcvth_n_s32_f16(arg_f16, 16);
+	vcvth_n_s32_f16(arg_f16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_s32_f16(arg_f16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvth_n_s64_f16(arg_f16, 1);
+	vcvth_n_s64_f16(arg_f16, 16);
+	vcvth_n_s64_f16(arg_f16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_s64_f16(arg_f16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvth_n_u16_f16(arg_f16, 1);
+	vcvth_n_u16_f16(arg_f16, 16);
+	vcvth_n_u16_f16(arg_f16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_u16_f16(arg_f16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vcvth_n_u32_f16(arg_f16, 1);
+	vcvth_n_u32_f16(arg_f16, 16);
+	vcvth_n_u32_f16(arg_f16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vcvth_n_u32_f16(arg_f16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
 	vcvth_n_u64_f16(arg_f16, 1);
 	vcvth_n_u64_f16(arg_f16, 16);
 	vcvth_n_u64_f16(arg_f16, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vcvth_n_u64_f16(arg_f16, 17); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 }
-
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/fused-multiply-accumulate.c b/clang/test/Sema/aarch64-neon-immediate-ranges/fused-multiply-accumulate.c
index 1f03ed2264ffc6..c65a2e6e65332a 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/fused-multiply-accumulate.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/fused-multiply-accumulate.c
@@ -3,8 +3,6 @@
 #include <arm_neon.h>
 // REQUIRES: aarch64-registered-target
 
-// vfma_laneq_f64, vfma_lane_f64, vfmaq_lane_f64, vfmaq_laneq_f64,
-// are tested under aarch64-neon-ranges.c
 
 void test_fused_multiply_accumulate_f32(float32x2_t arg_f32x2, float32_t arg_f32, float32x4_t arg_f32x4) {
 	vfma_lane_f32(arg_f32x2, arg_f32x2, arg_f32x2, 0);
@@ -69,7 +67,15 @@ void test_fused_multiply_accumulate_f32(float32x2_t arg_f32x2, float32_t arg_f32
 
 }
 
-void test_fused_multiply_accumulate_f64(float64_t arg_f64, float64x2_t arg_f64x2, float64x1_t arg_f64x1) {
+void test_fused_multiply_accumulate_f64(float64x2_t arg_f64x2, float64_t arg_f64, float64x1_t arg_f64x1) {
+	vfma_lane_f64(arg_f64x1, arg_f64x1, arg_f64x1, 0);
+	vfma_lane_f64(arg_f64x1, arg_f64x1, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfma_lane_f64(arg_f64x1, arg_f64x1, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vfmaq_lane_f64(arg_f64x2, arg_f64x2, arg_f64x1, 0);
+	vfmaq_lane_f64(arg_f64x2, arg_f64x2, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_lane_f64(arg_f64x2, arg_f64x2, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
 	vfmad_lane_f64(arg_f64, arg_f64, arg_f64x1, 0);
 	vfmad_lane_f64(arg_f64, arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vfmad_lane_f64(arg_f64, arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -79,6 +85,11 @@ void test_fused_multiply_accumulate_f64(float64_t arg_f64, float64x2_t arg_f64x2
 	vfma_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vfma_laneq_f64(arg_f64x1, arg_f64x1, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
+	vfmaq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, 0);
+	vfmaq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, 1);
+	vfmaq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vfmaq_laneq_f64(arg_f64x2, arg_f64x2, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
 	vfmad_laneq_f64(arg_f64, arg_f64, arg_f64x2, 0);
 	vfmad_laneq_f64(arg_f64, arg_f64, arg_f64x2, 1);
 	vfmad_laneq_f64(arg_f64, arg_f64, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-accumulate.c b/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-accumulate.c
index 4c3dfd0e65a08d..854d6171a914c5 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-accumulate.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-accumulate.c
@@ -3,8 +3,6 @@
 #include <arm_neon.h>
 // REQUIRES: aarch64-registered-target
 
-// vqdmlal_lane_s32, vqdmlal_high_lane_s32, vqdmlsl_high_lane_s32, vqdmlsl_laneq_s32,
-// vqdmlsls_laneq_s32, vqdmlsl_high_laneq_s32, are tested under arm-neon-range-checks.c.
 
 void test_saturating_multiply_accumulate_s16(int16x4_t arg_i16x4, int32_t arg_i32, int16_t arg_i16,
 											 int32x4_t arg_i32x4, int16x8_t arg_i16x8) {
@@ -70,22 +68,65 @@ void test_saturating_multiply_accumulate_s16(int16x4_t arg_i16x4, int32_t arg_i3
 
 }
 
-void test_saturating_multiply_accumulate_s32(int32x2_t arg_i32x2, int64x2_t arg_i64x2, int32_t arg_i32,
-											 int32x4_t arg_i32x4, int64_t arg_i64) {
+void test_saturating_multiply_accumulate_s32(int32_t arg_i32, int32x4_t arg_i32x4, int64_t arg_i64, int64x2_t arg_i64x2, int32x2_t arg_i32x2) {
+	vqdmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 0);
+	vqdmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 1);
+	vqdmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
 	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, 0);
 	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, 1);
 	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vqdmlals_lane_s32(arg_i64, arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
+	vqdmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 0);
+	vqdmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 1);
+	vqdmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 0);
+	vqdmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 3);
+	vqdmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
 	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, 0);
 	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, 3);
 	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vqdmlals_laneq_s32(arg_i64, arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
+	vqdmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 0);
+	vqdmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 3);
+	vqdmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 0);
+	vqdmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 1);
+	vqdmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
 	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, 0);
 	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, 1);
 	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vqdmlsls_lane_s32(arg_i64, arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
-}
+	vqdmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 0);
+	vqdmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 1);
+	vqdmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 0);
+	vqdmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 3);
+	vqdmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmlsls_laneq_s32(arg_i64, arg_i32, arg_i32x4, 0);
+	vqdmlsls_laneq_s32(arg_i64, arg_i32, arg_i32x4, 3);
+	vqdmlsls_laneq_s32(arg_i64, arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsls_laneq_s32(arg_i64, arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
+	vqdmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 0);
+	vqdmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 3);
+	vqdmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-by-scalar-and-widen.c b/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-by-scalar-and-widen.c
index a72d1950389dd5..662a3c2ed172d7 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-by-scalar-and-widen.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/saturating-multiply-by-scalar-and-widen.c
@@ -3,10 +3,6 @@
 #include <arm_neon.h>
 // REQUIRES: aarch64-registered-target
 
-// clang/Sema/arm-neon-range-checks.c includes tests for:
-// vqdmull_lane_s32, vqdmull_laneq_s32,vqdmull_high_lane_s32, vqdmull_high_laneq_s32
-// vqdmulh_lane_s32, vqdmulhq_lane_s32, vqdmulh_laneq_s32, vqdmulhq_laneq_s32, vqrdmulh_lane_s32
-// vqrdmulhq_lane_s32, vqrdmulh_laneq_s32, vqrdmulhq_laneq_s32
 
 void test_saturating_multiply_by_scalar_and_widen_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8, int16_t arg_i16) {
 	vqdmull_lane_s16(arg_i16x4, arg_i16x4, 0);
@@ -101,32 +97,93 @@ void test_saturating_multiply_by_scalar_and_widen_s16(int16x4_t arg_i16x4, int16
 
 }
 
-void test_saturating_multiply_by_scalar_and_widen_s32(int32x4_t arg_i32x4, int32_t arg_i32, int32x2_t arg_i32x2) {
+
+void test_saturating_multiply_by_scalar_and_widen_s32(int32x2_t arg_i32x2, int32_t arg_i32, int32x4_t arg_i32x4) {
+	vqdmull_lane_s32(arg_i32x2, arg_i32x2, 0);
+	vqdmull_lane_s32(arg_i32x2, arg_i32x2, 1);
+	vqdmull_lane_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_lane_s32(arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
 	vqdmulls_lane_s32(arg_i32, arg_i32x2, 0);
 	vqdmulls_lane_s32(arg_i32, arg_i32x2, 1);
 	vqdmulls_lane_s32(arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vqdmulls_lane_s32(arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
+	vqdmull_high_lane_s32(arg_i32x4, arg_i32x2, 0);
+	vqdmull_high_lane_s32(arg_i32x4, arg_i32x2, 1);
+	vqdmull_high_lane_s32(arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_high_lane_s32(arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmull_laneq_s32(arg_i32x2, arg_i32x4, 0);
+	vqdmull_laneq_s32(arg_i32x2, arg_i32x4, 3);
+	vqdmull_laneq_s32(arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_laneq_s32(arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
 	vqdmulls_laneq_s32(arg_i32, arg_i32x4, 0);
 	vqdmulls_laneq_s32(arg_i32, arg_i32x4, 3);
 	vqdmulls_laneq_s32(arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vqdmulls_laneq_s32(arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
+	vqdmull_high_laneq_s32(arg_i32x4, arg_i32x4, 0);
+	vqdmull_high_laneq_s32(arg_i32x4, arg_i32x4, 3);
+	vqdmull_high_laneq_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmull_high_laneq_s32(arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulh_lane_s32(arg_i32x2, arg_i32x2, 0);
+	vqdmulh_lane_s32(arg_i32x2, arg_i32x2, 1);
+	vqdmulh_lane_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulh_lane_s32(arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhq_lane_s32(arg_i32x4, arg_i32x2, 0);
+	vqdmulhq_lane_s32(arg_i32x4, arg_i32x2, 1);
+	vqdmulhq_lane_s32(arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhq_lane_s32(arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
 	vqdmulhs_lane_s32(arg_i32, arg_i32x2, 0);
 	vqdmulhs_lane_s32(arg_i32, arg_i32x2, 1);
 	vqdmulhs_lane_s32(arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vqdmulhs_lane_s32(arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
+	vqdmulh_laneq_s32(arg_i32x2, arg_i32x4, 0);
+	vqdmulh_laneq_s32(arg_i32x2, arg_i32x4, 3);
+	vqdmulh_laneq_s32(arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulh_laneq_s32(arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqdmulhq_laneq_s32(arg_i32x4, arg_i32x4, 0);
+	vqdmulhq_laneq_s32(arg_i32x4, arg_i32x4, 3);
+	vqdmulhq_laneq_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqdmulhq_laneq_s32(arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
 	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, 0);
 	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, 3);
 	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vqdmulhs_laneq_s32(arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
+	vqrdmulh_lane_s32(arg_i32x2, arg_i32x2, 0);
+	vqrdmulh_lane_s32(arg_i32x2, arg_i32x2, 1);
+	vqrdmulh_lane_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulh_lane_s32(arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhq_lane_s32(arg_i32x4, arg_i32x2, 0);
+	vqrdmulhq_lane_s32(arg_i32x4, arg_i32x2, 1);
+	vqrdmulhq_lane_s32(arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhq_lane_s32(arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
 	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, 0);
 	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, 1);
 	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vqrdmulhs_lane_s32(arg_i32, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
+	vqrdmulh_laneq_s32(arg_i32x2, arg_i32x4, 0);
+	vqrdmulh_laneq_s32(arg_i32x2, arg_i32x4, 3);
+	vqrdmulh_laneq_s32(arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulh_laneq_s32(arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmulhq_laneq_s32(arg_i32x4, arg_i32x4, 0);
+	vqrdmulhq_laneq_s32(arg_i32x4, arg_i32x4, 3);
+	vqrdmulhq_laneq_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmulhq_laneq_s32(arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
 	vqrdmulhs_laneq_s32(arg_i32, arg_i32x4, 0);
 	vqrdmulhs_laneq_s32(arg_i32, arg_i32x4, 3);
 	vqrdmulhs_laneq_s32(arg_i32, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/set-lanes-to-value.c b/clang/test/Sema/aarch64-neon-immediate-ranges/set-lanes-to-value.c
index ea1fcd5dc03221..b5fa76b5be882f 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/set-lanes-to-value.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/set-lanes-to-value.c
@@ -3,8 +3,6 @@
 #include <arm_neon.h>
 // REQUIRES: aarch64-registered-target
 
-// vdup_lane_s32, vdupq_lane_s32, vdup_laneq_s32, vdupq_laneq_s3 are tested 
-// under clang/test/CodeGen/arm-neon-range-checks.c
 
 void test_set_all_lanes_to_the_same_value_s8(int8x8_t arg_i8x8, int8x16_t arg_i8x16) {
 	vdup_lane_s8(arg_i8x8, 0);
@@ -52,6 +50,28 @@ void test_set_all_lanes_to_the_same_value_s16(int16x4_t arg_i16x4, int16x8_t arg
 
 }
 
+void test_set_all_lanes_to_the_same_value_s32(int32x4_t arg_i32x4, int32x2_t arg_i32x2) {
+	vdup_lane_s32(arg_i32x2, 0);
+	vdup_lane_s32(arg_i32x2, 1);
+	vdup_lane_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_lane_s32(arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_lane_s32(arg_i32x2, 0);
+	vdupq_lane_s32(arg_i32x2, 1);
+	vdupq_lane_s32(arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_lane_s32(arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdup_laneq_s32(arg_i32x4, 0);
+	vdup_laneq_s32(arg_i32x4, 3);
+	vdup_laneq_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdup_laneq_s32(arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vdupq_laneq_s32(arg_i32x4, 0);
+	vdupq_laneq_s32(arg_i32x4, 3);
+	vdupq_laneq_s32(arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vdupq_laneq_s32(arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
 
 void test_set_all_lanes_to_the_same_value_s64(int64x1_t arg_i64x1, int64x2_t arg_i64x2) {
 	vdup_lane_s64(arg_i64x1, 0);
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/sqrdmlah-ranges.c b/clang/test/Sema/aarch64-neon-immediate-ranges/sqrdmlah-ranges.c
index 9da02914966295..2439fb79737e6b 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/sqrdmlah-ranges.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/sqrdmlah-ranges.c
@@ -3,9 +3,6 @@
 #include <arm_neon.h>
 // REQUIRES: aarch64-registered-target
 
-// vqrdmlah_lane_s32, vqrdmlahq_lane_s32, vqrdmlah_laneq_s32, vqrdmlahq_laneq_s32, 
-// vqrdmlsh_lane_s32 are tested under clang/test/CodeGen/arm-neon-range-checks.c
-
 void test_saturating_multiply_accumulate_by_element_s16(int16x8_t arg_i16x8, int16_t arg_i16, int16x4_t arg_i16x4) {
 	vqrdmlah_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 0);
 	vqrdmlah_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 3);
@@ -69,7 +66,47 @@ void test_saturating_multiply_accumulate_by_element_s16(int16x8_t arg_i16x8, int
 
 }
 
-void test_saturating_multiply_accumulate_by_element_s32(int32x4_t arg_i32x4, int32_t arg_i32, int32x2_t arg_i32x2) {
+void test_saturating_multiply_accumulate_by_element_s32(int32x2_t arg_i32x2, int32x4_t arg_i32x4, int32_t arg_i32) {
+	vqrdmlah_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 0);
+	vqrdmlah_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 1);
+	vqrdmlah_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlah_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 0);
+	vqrdmlahq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 1);
+	vqrdmlahq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlah_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 0);
+	vqrdmlah_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 3);
+	vqrdmlah_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlah_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlahq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 0);
+	vqrdmlahq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 3);
+	vqrdmlahq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlahq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlsh_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 0);
+	vqrdmlsh_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 1);
+	vqrdmlsh_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlsh_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 0);
+	vqrdmlshq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 1);
+	vqrdmlshq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlsh_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 0);
+	vqrdmlsh_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 3);
+	vqrdmlsh_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlsh_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vqrdmlshq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 0);
+	vqrdmlshq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 3);
+	vqrdmlshq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vqrdmlshq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
 	vqrdmlahs_lane_s32(arg_i32, arg_i32, arg_i32x2, 0);
 	vqrdmlahs_lane_s32(arg_i32, arg_i32, arg_i32x2, 1);
 	vqrdmlahs_lane_s32(arg_i32, arg_i32, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -91,4 +128,3 @@ void test_saturating_multiply_accumulate_by_element_s32(int32x4_t arg_i32x4, int
 	vqrdmlshs_laneq_s32(arg_i32, arg_i32, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
 }
-
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-load.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-load.c
index a11535acf0b4a7..3259d47e1b625a 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-load.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-load.c
@@ -4,13 +4,153 @@
 // REQUIRES: aarch64-registered-target
 
 
-// the majority of s8,s16,s32 and s64 variants are tested under 
-// clang/test/Sema/aarch64-neon-ranges.c
+void test_vector_load_s8(int8x8x2_t arg_i8x8x2, int8x8x3_t arg_i8x8x3, int8x16x2_t arg_i8x16x2,
+						 int8x16x3_t arg_i8x16x3, int8x8_t arg_i8x8, int8x16x4_t arg_i8x16x4,
+						 int8x16_t arg_i8x16, int8x8x4_t arg_i8x8x4, int8_t* arg_i8_ptr) {
+	vld1_lane_s8(arg_i8_ptr, arg_i8x8, 0);
+	vld1_lane_s8(arg_i8_ptr, arg_i8x8, 7);
+	vld1_lane_s8(arg_i8_ptr, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_s8(arg_i8_ptr, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_s8(arg_i8_ptr, arg_i8x16, 0);
+	vld1q_lane_s8(arg_i8_ptr, arg_i8x16, 15);
+	vld1q_lane_s8(arg_i8_ptr, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_s8(arg_i8_ptr, arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_s8(arg_i8_ptr, arg_i8x8x2, 0);
+	vld2_lane_s8(arg_i8_ptr, arg_i8x8x2, 7);
+	vld2_lane_s8(arg_i8_ptr, arg_i8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_s8(arg_i8_ptr, arg_i8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_s8(arg_i8_ptr, arg_i8x16x2, 0);
+	vld2q_lane_s8(arg_i8_ptr, arg_i8x16x2, 15);
+	vld2q_lane_s8(arg_i8_ptr, arg_i8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_s8(arg_i8_ptr, arg_i8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_s8(arg_i8_ptr, arg_i8x8x3, 0);
+	vld3_lane_s8(arg_i8_ptr, arg_i8x8x3, 7);
+	vld3_lane_s8(arg_i8_ptr, arg_i8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_s8(arg_i8_ptr, arg_i8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_s8(arg_i8_ptr, arg_i8x16x3, 0);
+	vld3q_lane_s8(arg_i8_ptr, arg_i8x16x3, 15);
+	vld3q_lane_s8(arg_i8_ptr, arg_i8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_s8(arg_i8_ptr, arg_i8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_s8(arg_i8_ptr, arg_i8x8x4, 0);
+	vld4_lane_s8(arg_i8_ptr, arg_i8x8x4, 7);
+	vld4_lane_s8(arg_i8_ptr, arg_i8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_s8(arg_i8_ptr, arg_i8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_s8(arg_i8_ptr, arg_i8x16x4, 0);
+	vld4q_lane_s8(arg_i8_ptr, arg_i8x16x4, 15);
+	vld4q_lane_s8(arg_i8_ptr, arg_i8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_s8(arg_i8_ptr, arg_i8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_s16(int16x8x2_t arg_i16x8x2, int16x8x3_t arg_i16x8x3, int16x8x4_t arg_i16x8x4,
+						  int16_t* arg_i16_ptr, int16x4x2_t arg_i16x4x2, int16x4x3_t arg_i16x4x3,
+						  int16x8_t arg_i16x8, int16x4x4_t arg_i16x4x4, int16x4_t arg_i16x4) {
+	vld1_lane_s16(arg_i16_ptr, arg_i16x4, 0);
+	vld1_lane_s16(arg_i16_ptr, arg_i16x4, 3);
+	vld1_lane_s16(arg_i16_ptr, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_s16(arg_i16_ptr, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_s16(arg_i16_ptr, arg_i16x8, 0);
+	vld1q_lane_s16(arg_i16_ptr, arg_i16x8, 7);
+	vld1q_lane_s16(arg_i16_ptr, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_s16(arg_i16_ptr, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_s16(arg_i16_ptr, arg_i16x4x2, 0);
+	vld2_lane_s16(arg_i16_ptr, arg_i16x4x2, 3);
+	vld2_lane_s16(arg_i16_ptr, arg_i16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_s16(arg_i16_ptr, arg_i16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_s16(arg_i16_ptr, arg_i16x8x2, 0);
+	vld2q_lane_s16(arg_i16_ptr, arg_i16x8x2, 7);
+	vld2q_lane_s16(arg_i16_ptr, arg_i16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_s16(arg_i16_ptr, arg_i16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_s16(arg_i16_ptr, arg_i16x4x3, 0);
+	vld3_lane_s16(arg_i16_ptr, arg_i16x4x3, 3);
+	vld3_lane_s16(arg_i16_ptr, arg_i16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_s16(arg_i16_ptr, arg_i16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_s16(arg_i16_ptr, arg_i16x8x3, 0);
+	vld3q_lane_s16(arg_i16_ptr, arg_i16x8x3, 7);
+	vld3q_lane_s16(arg_i16_ptr, arg_i16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_s16(arg_i16_ptr, arg_i16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_s16(arg_i16_ptr, arg_i16x4x4, 0);
+	vld4_lane_s16(arg_i16_ptr, arg_i16x4x4, 3);
+	vld4_lane_s16(arg_i16_ptr, arg_i16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_s16(arg_i16_ptr, arg_i16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_s16(arg_i16_ptr, arg_i16x8x4, 0);
+	vld4q_lane_s16(arg_i16_ptr, arg_i16x8x4, 7);
+	vld4q_lane_s16(arg_i16_ptr, arg_i16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_s16(arg_i16_ptr, arg_i16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_s32(int32x2x4_t arg_i32x2x4, int32x4_t arg_i32x4, int32x2_t arg_i32x2,
+						  int32x4x2_t arg_i32x4x2, int32x4x4_t arg_i32x4x4, int32_t* arg_i32_ptr,
+						  int32x2x3_t arg_i32x2x3, int32x4x3_t arg_i32x4x3, int32x2x2_t arg_i32x2x2) {
+	vld1_lane_s32(arg_i32_ptr, arg_i32x2, 0);
+	vld1_lane_s32(arg_i32_ptr, arg_i32x2, 1);
+	vld1_lane_s32(arg_i32_ptr, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_s32(arg_i32_ptr, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_s32(arg_i32_ptr, arg_i32x4, 0);
+	vld1q_lane_s32(arg_i32_ptr, arg_i32x4, 3);
+	vld1q_lane_s32(arg_i32_ptr, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_s32(arg_i32_ptr, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2_lane_s32(arg_i32_ptr, arg_i32x2x2, 0);
+	vld2_lane_s32(arg_i32_ptr, arg_i32x2x2, 1);
+	vld2_lane_s32(arg_i32_ptr, arg_i32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_s32(arg_i32_ptr, arg_i32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_s32(arg_i32_ptr, arg_i32x4x2, 0);
+	vld2q_lane_s32(arg_i32_ptr, arg_i32x4x2, 3);
+	vld2q_lane_s32(arg_i32_ptr, arg_i32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_s32(arg_i32_ptr, arg_i32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_s32(arg_i32_ptr, arg_i32x2x3, 0);
+	vld3_lane_s32(arg_i32_ptr, arg_i32x2x3, 1);
+	vld3_lane_s32(arg_i32_ptr, arg_i32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_s32(arg_i32_ptr, arg_i32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_s32(arg_i32_ptr, arg_i32x4x3, 0);
+	vld3q_lane_s32(arg_i32_ptr, arg_i32x4x3, 3);
+	vld3q_lane_s32(arg_i32_ptr, arg_i32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_s32(arg_i32_ptr, arg_i32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_s32(arg_i32_ptr, arg_i32x2x4, 0);
+	vld4_lane_s32(arg_i32_ptr, arg_i32x2x4, 1);
+	vld4_lane_s32(arg_i32_ptr, arg_i32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_s32(arg_i32_ptr, arg_i32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_s32(arg_i32_ptr, arg_i32x4x4, 0);
+	vld4q_lane_s32(arg_i32_ptr, arg_i32x4x4, 3);
+	vld4q_lane_s32(arg_i32_ptr, arg_i32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_s32(arg_i32_ptr, arg_i32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_load_s64(int64x1x4_t arg_i64x1x4, int64x1_t arg_i64x1, int64x2x2_t arg_i64x2x2,
+						  int64x2x4_t arg_i64x2x4, int64x1x3_t arg_i64x1x3, int64x1x2_t arg_i64x1x2,
+						  int64x2_t arg_i64x2, int64x2x3_t arg_i64x2x3, int64_t* arg_i64_ptr) {
+	vld1_lane_s64(arg_i64_ptr, arg_i64x1, 0);
+	vld1_lane_s64(arg_i64_ptr, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1_lane_s64(arg_i64_ptr, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld1q_lane_s64(arg_i64_ptr, arg_i64x2, 0);
+	vld1q_lane_s64(arg_i64_ptr, arg_i64x2, 1);
+	vld1q_lane_s64(arg_i64_ptr, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld1q_lane_s64(arg_i64_ptr, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
-void test_vector_load_s64(int64x1x4_t arg_i64x1x4, int64x2_t arg_i64x2, int64_t *arg_i64_ptr,
-						  int64x2x3_t arg_i64x2x3, int64x1x2_t arg_i64x1x2, int64x2x2_t arg_i64x2x2,
-						  int64x2x4_t arg_i64x2x4, int64x1x3_t arg_i64x1x3, int64x1_t arg_i64x1) {
-	
 	vldap1_lane_s64(arg_i64_ptr, arg_i64x1, 0);
 	vldap1_lane_s64(arg_i64_ptr, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vldap1_lane_s64(arg_i64_ptr, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -29,6 +169,33 @@ void test_vector_load_s64(int64x1x4_t arg_i64x1x4, int64x2_t arg_i64x2, int64_t
 	vstl1q_lane_s64(arg_i64_ptr, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vstl1q_lane_s64(arg_i64_ptr, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
+	vld2_lane_s64(arg_i64_ptr, arg_i64x1x2, 0);
+	vld2_lane_s64(arg_i64_ptr, arg_i64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2_lane_s64(arg_i64_ptr, arg_i64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld2q_lane_s64(arg_i64_ptr, arg_i64x2x2, 0);
+	vld2q_lane_s64(arg_i64_ptr, arg_i64x2x2, 1);
+	vld2q_lane_s64(arg_i64_ptr, arg_i64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld2q_lane_s64(arg_i64_ptr, arg_i64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3_lane_s64(arg_i64_ptr, arg_i64x1x3, 0);
+	vld3_lane_s64(arg_i64_ptr, arg_i64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3_lane_s64(arg_i64_ptr, arg_i64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld3q_lane_s64(arg_i64_ptr, arg_i64x2x3, 0);
+	vld3q_lane_s64(arg_i64_ptr, arg_i64x2x3, 1);
+	vld3q_lane_s64(arg_i64_ptr, arg_i64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld3q_lane_s64(arg_i64_ptr, arg_i64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4_lane_s64(arg_i64_ptr, arg_i64x1x4, 0);
+	vld4_lane_s64(arg_i64_ptr, arg_i64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4_lane_s64(arg_i64_ptr, arg_i64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vld4q_lane_s64(arg_i64_ptr, arg_i64x2x4, 0);
+	vld4q_lane_s64(arg_i64_ptr, arg_i64x2x4, 1);
+	vld4q_lane_s64(arg_i64_ptr, arg_i64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vld4q_lane_s64(arg_i64_ptr, arg_i64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
 }
 
 void test_vector_load_u8(uint8x8x2_t arg_u8x8x2, uint8x16x2_t arg_u8x16x2, uint8x8x4_t arg_u8x8x4,
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-accumulate-by-scalar.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-accumulate-by-scalar.c
index a306200c62f1c0..a7eee3ad25e097 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-accumulate-by-scalar.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-accumulate-by-scalar.c
@@ -3,10 +3,6 @@
 #include <arm_neon.h>
 // REQUIRES: aarch64-registered-target
 
-// vmla_lane_s32, vmlaq_lane_s32, vmla_laneq_s32, vmlaq_laneq_s32, vmlal_lane_s32
-// vmlal_high_lane_s32, vmlal_laneq_s32, vmlal_high_laneq_s32 are tested under
-// clang/test/CodeGen/arm-neon-range-checks.c
-
 void test_vector_multiply_accumulate_by_scalar_s16(int32x4_t arg_i32x4, int16x8_t arg_i16x8, int16x4_t arg_i16x4) {
 	vmla_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 0);
 	vmla_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 3);
@@ -50,6 +46,49 @@ void test_vector_multiply_accumulate_by_scalar_s16(int32x4_t arg_i32x4, int16x8_
 
 }
 
+void test_vector_multiply_accumulate_by_scalar_s32(int32x4_t arg_i32x4, int32x2_t arg_i32x2, int64x2_t arg_i64x2) {
+	vmla_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 0);
+	vmla_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 1);
+	vmla_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 0);
+	vmlaq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 1);
+	vmlaq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmla_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 0);
+	vmla_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 3);
+	vmla_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmla_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlaq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 0);
+	vmlaq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 3);
+	vmlaq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlaq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 0);
+	vmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 1);
+	vmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 0);
+	vmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 1);
+	vmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 0);
+	vmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 3);
+	vmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 0);
+	vmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 3);
+	vmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlal_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
 void test_vector_multiply_accumulate_by_scalar_u16(uint16x4_t arg_u16x4, uint16x8_t arg_u16x8, uint32x4_t arg_u32x4) {
 	vmla_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 0);
 	vmla_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 3);
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar-and-widen.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar-and-widen.c
index 1363a4ec453347..1ed848742e681d 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar-and-widen.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar-and-widen.c
@@ -3,8 +3,6 @@
 #include <arm_neon.h>
 // REQUIRES: aarch64-registered-target
 
-// vmull_lane_s32, vmull_high_lane_s32, vmull_laneq_s32, vmull_high_laneq_s32
-// are tested under clang/test/CodeGen/arm-neon-range-checks.c
 
 void test_vector_multiply_by_scalar_and_widen_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
 	vmull_lane_s16(arg_i16x4, arg_i16x4, 0);
@@ -29,6 +27,28 @@ void test_vector_multiply_by_scalar_and_widen_s16(int16x4_t arg_i16x4, int16x8_t
 
 }
 
+void test_vector_multiply_by_scalar_and_widen_s32(int32x4_t arg_i32x4, int32x2_t arg_i32x2) {
+	vmull_lane_s32(arg_i32x2, arg_i32x2, 0);
+	vmull_lane_s32(arg_i32x2, arg_i32x2, 1);
+	vmull_lane_s32(arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_lane_s32(arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_lane_s32(arg_i32x4, arg_i32x2, 0);
+	vmull_high_lane_s32(arg_i32x4, arg_i32x2, 1);
+	vmull_high_lane_s32(arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_lane_s32(arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_laneq_s32(arg_i32x2, arg_i32x4, 0);
+	vmull_laneq_s32(arg_i32x2, arg_i32x4, 3);
+	vmull_laneq_s32(arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_laneq_s32(arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmull_high_laneq_s32(arg_i32x4, arg_i32x4, 0);
+	vmull_high_laneq_s32(arg_i32x4, arg_i32x4, 3);
+	vmull_high_laneq_s32(arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmull_high_laneq_s32(arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
 
 void test_vector_multiply_by_scalar_and_widen_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4) {
 	vmull_lane_u16(arg_u16x4, arg_u16x4, 0);
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar.c
index 8d762f32b35af5..7c9e73fb12a5a8 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-by-scalar.c
@@ -3,8 +3,6 @@
 #include <arm_neon.h>
 // REQUIRES: aarch64-registered-target
 
-// vmul_lane_f64, vmul_laneq_f64
-// are tested under clang/test/aarch64-neon-ranges.c
 
 void test_vector_multiply_by_scalar_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8) {
 	vmul_lane_s16(arg_i16x4, arg_i16x4, 0);
@@ -131,7 +129,7 @@ void test_vector_multiply_by_scalar_f32(float32_t arg_f32, float32x2_t arg_f32x2
 
 }
 
-void test_vector_multiply_by_scalar_f64(float64x2_t arg_f64x2, float64_t arg_f64, float64x1_t arg_f64x1) {
+void test_vector_multiply_by_scalar_f64(float64x1_t arg_f64x1, float64_t arg_f64, float64x2_t arg_f64x2) {
 	vmul_lane_f64(arg_f64x1, arg_f64x1, 0);
 	vmul_lane_f64(arg_f64x1, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vmul_lane_f64(arg_f64x1, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -144,6 +142,11 @@ void test_vector_multiply_by_scalar_f64(float64x2_t arg_f64x2, float64_t arg_f64
 	vmuld_lane_f64(arg_f64, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vmuld_lane_f64(arg_f64, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
+	vmul_laneq_f64(arg_f64x1, arg_f64x2, 0);
+	vmul_laneq_f64(arg_f64x1, arg_f64x2, 1);
+	vmul_laneq_f64(arg_f64x1, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmul_laneq_f64(arg_f64x1, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
 	vmulq_laneq_f64(arg_f64x2, arg_f64x2, 0);
 	vmulq_laneq_f64(arg_f64x2, arg_f64x2, 1);
 	vmulq_laneq_f64(arg_f64x2, arg_f64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -155,4 +158,3 @@ void test_vector_multiply_by_scalar_f64(float64x2_t arg_f64x2, float64_t arg_f64
 	vmuld_laneq_f64(arg_f64, arg_f64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
 }
-
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-subtract-by-scalar.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-subtract-by-scalar.c
index 81194427b3b792..c717948b13da9b 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-subtract-by-scalar.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-multiply-subtract-by-scalar.c
@@ -3,9 +3,6 @@
 #include <arm_neon.h>
 // REQUIRES: aarch64-registered-target
 
-// clang/test/CodeGen/arm-neon-range-checks.c includes tests for:
-// vmls_lane_s32, vmlsq_lane_s3, vmls_laneq_s32, vmlsq_laneq_s32
-// vmlsl_lane_s32, vmlsl_high_lane_s32, vmlsl_laneq_s32, vmlsl_high_laneq_s32
 
 void test_vector_multiply_subtract_by_scalar_s16(int16x8_t arg_i16x8, int16x4_t arg_i16x4, int32x4_t arg_i32x4) {
 	vmls_lane_s16(arg_i16x4, arg_i16x4, arg_i16x4, 0);
@@ -50,6 +47,49 @@ void test_vector_multiply_subtract_by_scalar_s16(int16x8_t arg_i16x8, int16x4_t
 
 }
 
+void test_vector_multiply_subtract_by_scalar_s32(int64x2_t arg_i64x2, int32x4_t arg_i32x4, int32x2_t arg_i32x2) {
+	vmls_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 0);
+	vmls_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 1);
+	vmls_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_lane_s32(arg_i32x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 0);
+	vmlsq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 1);
+	vmlsq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_lane_s32(arg_i32x4, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmls_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 0);
+	vmls_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 3);
+	vmls_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmls_laneq_s32(arg_i32x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 0);
+	vmlsq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 3);
+	vmlsq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsq_laneq_s32(arg_i32x4, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 0);
+	vmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 1);
+	vmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_lane_s32(arg_i64x2, arg_i32x2, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 0);
+	vmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 1);
+	vmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_lane_s32(arg_i64x2, arg_i32x4, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 0);
+	vmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 3);
+	vmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_laneq_s32(arg_i64x2, arg_i32x2, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 0);
+	vmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 3);
+	vmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vmlsl_high_laneq_s32(arg_i64x2, arg_i32x4, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
 void test_vector_multiply_subtract_by_scalar_u16(uint16x8_t arg_u16x8, uint16x4_t arg_u16x4, uint32x4_t arg_u32x4) {
 	vmls_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 0);
 	vmls_lane_u16(arg_u16x4, arg_u16x4, arg_u16x4, 3);
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-store.c b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-store.c
index 69469fd0d09cdd..a35891c9adbb58 100644
--- a/clang/test/Sema/aarch64-neon-immediate-ranges/vector-store.c
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/vector-store.c
@@ -3,12 +3,186 @@
 #include <arm_neon.h>
 // REQUIRES: aarch64-registered-target
 
-// s8, s16, s32, and s64 variants are tested under 
-// clang/test/Sema/arm-neon-ranges.c 
 
-void test_store_u8(uint8_t* arg_u8_ptr, uint8x8x2_t arg_u8x8x2, uint8x16_t arg_u8x16,
-				   uint8x16x2_t arg_u8x16x2, uint8x16x4_t arg_u8x16x4, uint8x8_t arg_u8x8,
-				   uint8x16x3_t arg_u8x16x3, uint8x8x4_t arg_u8x8x4, uint8x8x3_t arg_u8x8x3) {
+void test_vector_store_s8(int8x8_t arg_i8x8, int8x8x3_t arg_i8x8x3, int8_t* arg_i8_ptr,
+						  int8x16x3_t arg_i8x16x3,  int8x8x4_t arg_i8x8x4, int8x16x4_t arg_i8x16x4,
+						  int8x8x2_t arg_i8x8x2, int8x16_t arg_i8x16, int8x16x2_t arg_i8x16x2) {
+	vst1_lane_s8(arg_i8_ptr, arg_i8x8, 0);
+	vst1_lane_s8(arg_i8_ptr, arg_i8x8, 7);
+	vst1_lane_s8(arg_i8_ptr, arg_i8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_s8(arg_i8_ptr, arg_i8x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_s8(arg_i8_ptr, arg_i8x16, 0);
+	vst1q_lane_s8(arg_i8_ptr, arg_i8x16, 15);
+	vst1q_lane_s8(arg_i8_ptr, arg_i8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_s8(arg_i8_ptr, arg_i8x16, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_s8(arg_i8_ptr, arg_i8x8x2, 0);
+	vst2_lane_s8(arg_i8_ptr, arg_i8x8x2, 7);
+	vst2_lane_s8(arg_i8_ptr, arg_i8x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_s8(arg_i8_ptr, arg_i8x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_s8(arg_i8_ptr, arg_i8x8x3, 0);
+	vst3_lane_s8(arg_i8_ptr, arg_i8x8x3, 7);
+	vst3_lane_s8(arg_i8_ptr, arg_i8x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_s8(arg_i8_ptr, arg_i8x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_s8(arg_i8_ptr, arg_i8x8x4, 0);
+	vst4_lane_s8(arg_i8_ptr, arg_i8x8x4, 7);
+	vst4_lane_s8(arg_i8_ptr, arg_i8x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_s8(arg_i8_ptr, arg_i8x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_s8(arg_i8_ptr, arg_i8x16x2, 0);
+	vst2q_lane_s8(arg_i8_ptr, arg_i8x16x2, 15);
+	vst2q_lane_s8(arg_i8_ptr, arg_i8x16x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_s8(arg_i8_ptr, arg_i8x16x2, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_s8(arg_i8_ptr, arg_i8x16x3, 0);
+	vst3q_lane_s8(arg_i8_ptr, arg_i8x16x3, 15);
+	vst3q_lane_s8(arg_i8_ptr, arg_i8x16x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_s8(arg_i8_ptr, arg_i8x16x3, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_s8(arg_i8_ptr, arg_i8x16x4, 0);
+	vst4q_lane_s8(arg_i8_ptr, arg_i8x16x4, 15);
+	vst4q_lane_s8(arg_i8_ptr, arg_i8x16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_s8(arg_i8_ptr, arg_i8x16x4, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_s16(int16x8x3_t arg_i16x8x3, int16x4_t arg_i16x4, int16x4x3_t arg_i16x4x3,
+						   int16x8_t arg_i16x8, int16_t* arg_i16_ptr, int16x8x2_t arg_i16x8x2,
+						   int16x8x4_t arg_i16x8x4, int16x4x4_t arg_i16x4x4, int16x4x2_t arg_i16x4x2) {
+	vst1_lane_s16(arg_i16_ptr, arg_i16x4, 0);
+	vst1_lane_s16(arg_i16_ptr, arg_i16x4, 3);
+	vst1_lane_s16(arg_i16_ptr, arg_i16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_s16(arg_i16_ptr, arg_i16x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_s16(arg_i16_ptr, arg_i16x8, 0);
+	vst1q_lane_s16(arg_i16_ptr, arg_i16x8, 7);
+	vst1q_lane_s16(arg_i16_ptr, arg_i16x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_s16(arg_i16_ptr, arg_i16x8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_s16(arg_i16_ptr, arg_i16x4x2, 0);
+	vst2_lane_s16(arg_i16_ptr, arg_i16x4x2, 3);
+	vst2_lane_s16(arg_i16_ptr, arg_i16x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_s16(arg_i16_ptr, arg_i16x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_s16(arg_i16_ptr, arg_i16x8x2, 0);
+	vst2q_lane_s16(arg_i16_ptr, arg_i16x8x2, 7);
+	vst2q_lane_s16(arg_i16_ptr, arg_i16x8x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_s16(arg_i16_ptr, arg_i16x8x2, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_s16(arg_i16_ptr, arg_i16x4x3, 0);
+	vst3_lane_s16(arg_i16_ptr, arg_i16x4x3, 3);
+	vst3_lane_s16(arg_i16_ptr, arg_i16x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_s16(arg_i16_ptr, arg_i16x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_s16(arg_i16_ptr, arg_i16x8x3, 0);
+	vst3q_lane_s16(arg_i16_ptr, arg_i16x8x3, 7);
+	vst3q_lane_s16(arg_i16_ptr, arg_i16x8x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_s16(arg_i16_ptr, arg_i16x8x3, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_s16(arg_i16_ptr, arg_i16x4x4, 0);
+	vst4_lane_s16(arg_i16_ptr, arg_i16x4x4, 3);
+	vst4_lane_s16(arg_i16_ptr, arg_i16x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_s16(arg_i16_ptr, arg_i16x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_s16(arg_i16_ptr, arg_i16x8x4, 0);
+	vst4q_lane_s16(arg_i16_ptr, arg_i16x8x4, 7);
+	vst4q_lane_s16(arg_i16_ptr, arg_i16x8x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_s16(arg_i16_ptr, arg_i16x8x4, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_s32(int32x4x3_t arg_i32x4x3, int32x4_t arg_i32x4, int32x2x2_t arg_i32x2x2,
+						   int32x2x3_t arg_i32x2x3, int32x4x4_t arg_i32x4x4, int32x4x2_t arg_i32x4x2,
+						   int32x2_t arg_i32x2, int32x2x4_t arg_i32x2x4, int32_t* arg_i32_ptr) {
+	vst1_lane_s32(arg_i32_ptr, arg_i32x2, 0);
+	vst1_lane_s32(arg_i32_ptr, arg_i32x2, 1);
+	vst1_lane_s32(arg_i32_ptr, arg_i32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_s32(arg_i32_ptr, arg_i32x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_s32(arg_i32_ptr, arg_i32x4, 0);
+	vst1q_lane_s32(arg_i32_ptr, arg_i32x4, 3);
+	vst1q_lane_s32(arg_i32_ptr, arg_i32x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_s32(arg_i32_ptr, arg_i32x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_s32(arg_i32_ptr, arg_i32x2x2, 0);
+	vst2_lane_s32(arg_i32_ptr, arg_i32x2x2, 1);
+	vst2_lane_s32(arg_i32_ptr, arg_i32x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_s32(arg_i32_ptr, arg_i32x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_s32(arg_i32_ptr, arg_i32x4x2, 0);
+	vst2q_lane_s32(arg_i32_ptr, arg_i32x4x2, 3);
+	vst2q_lane_s32(arg_i32_ptr, arg_i32x4x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_s32(arg_i32_ptr, arg_i32x4x2, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_s32(arg_i32_ptr, arg_i32x2x3, 0);
+	vst3_lane_s32(arg_i32_ptr, arg_i32x2x3, 1);
+	vst3_lane_s32(arg_i32_ptr, arg_i32x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_s32(arg_i32_ptr, arg_i32x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_s32(arg_i32_ptr, arg_i32x4x3, 0);
+	vst3q_lane_s32(arg_i32_ptr, arg_i32x4x3, 3);
+	vst3q_lane_s32(arg_i32_ptr, arg_i32x4x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_s32(arg_i32_ptr, arg_i32x4x3, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_s32(arg_i32_ptr, arg_i32x2x4, 0);
+	vst4_lane_s32(arg_i32_ptr, arg_i32x2x4, 1);
+	vst4_lane_s32(arg_i32_ptr, arg_i32x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_s32(arg_i32_ptr, arg_i32x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_s32(arg_i32_ptr, arg_i32x4x4, 0);
+	vst4q_lane_s32(arg_i32_ptr, arg_i32x4x4, 3);
+	vst4q_lane_s32(arg_i32_ptr, arg_i32x4x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_s32(arg_i32_ptr, arg_i32x4x4, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_s64(int64x2x2_t arg_i64x2x2, int64_t* arg_i64_ptr, int64x1_t arg_i64x1,
+						   int64x2x4_t arg_i64x2x4, int64x1x4_t arg_i64x1x4, int64x1x2_t arg_i64x1x2,
+						   int64x1x3_t arg_i64x1x3, int64x2x3_t arg_i64x2x3, int64x2_t arg_i64x2) {
+	vst1_lane_s64(arg_i64_ptr, arg_i64x1, 0);
+	vst1_lane_s64(arg_i64_ptr, arg_i64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1_lane_s64(arg_i64_ptr, arg_i64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst1q_lane_s64(arg_i64_ptr, arg_i64x2, 0);
+	vst1q_lane_s64(arg_i64_ptr, arg_i64x2, 1);
+	vst1q_lane_s64(arg_i64_ptr, arg_i64x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst1q_lane_s64(arg_i64_ptr, arg_i64x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2_lane_s64(arg_i64_ptr, arg_i64x1x2, 0);
+	vst2_lane_s64(arg_i64_ptr, arg_i64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2_lane_s64(arg_i64_ptr, arg_i64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst2q_lane_s64(arg_i64_ptr, arg_i64x2x2, 0);
+	vst2q_lane_s64(arg_i64_ptr, arg_i64x2x2, 1);
+	vst2q_lane_s64(arg_i64_ptr, arg_i64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst2q_lane_s64(arg_i64_ptr, arg_i64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3_lane_s64(arg_i64_ptr, arg_i64x1x3, 0);
+	vst3_lane_s64(arg_i64_ptr, arg_i64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3_lane_s64(arg_i64_ptr, arg_i64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst3q_lane_s64(arg_i64_ptr, arg_i64x2x3, 0);
+	vst3q_lane_s64(arg_i64_ptr, arg_i64x2x3, 1);
+	vst3q_lane_s64(arg_i64_ptr, arg_i64x2x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst3q_lane_s64(arg_i64_ptr, arg_i64x2x3, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4_lane_s64(arg_i64_ptr, arg_i64x1x4, 0);
+	vst4_lane_s64(arg_i64_ptr, arg_i64x1x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4_lane_s64(arg_i64_ptr, arg_i64x1x4, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vst4q_lane_s64(arg_i64_ptr, arg_i64x2x4, 0);
+	vst4q_lane_s64(arg_i64_ptr, arg_i64x2x4, 1);
+	vst4q_lane_s64(arg_i64_ptr, arg_i64x2x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vst4q_lane_s64(arg_i64_ptr, arg_i64x2x4, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_vector_store_u8(uint8x16_t arg_u8x16, uint8x16x3_t arg_u8x16x3, uint8x8x4_t arg_u8x8x4,
+						  uint8x16x2_t arg_u8x16x2, uint8x8_t arg_u8x8, uint8x8x3_t arg_u8x8x3,
+						  uint8x16x4_t arg_u8x16x4, uint8_t* arg_u8_ptr, uint8x8x2_t arg_u8x8x2) {
 	vst1_lane_u8(arg_u8_ptr, arg_u8x8, 0);
 	vst1_lane_u8(arg_u8_ptr, arg_u8x8, 7);
 	vst1_lane_u8(arg_u8_ptr, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -51,9 +225,9 @@ void test_store_u8(uint8_t* arg_u8_ptr, uint8x8x2_t arg_u8x8x2, uint8x16_t arg_u
 
 }
 
-void test_store_u16(uint16_t* arg_u16_ptr, uint16x8x2_t arg_u16x8x2, uint16x8x3_t arg_u16x8x3,
-					uint16x4x3_t arg_u16x4x3, uint16x4x2_t arg_u16x4x2, uint16x4x4_t arg_u16x4x4,
-					uint16x8x4_t arg_u16x8x4, uint16x4_t arg_u16x4, uint16x8_t arg_u16x8) {
+void test_vector_store_u16(uint16x8x3_t arg_u16x8x3, uint16x4x4_t arg_u16x4x4, uint16_t* arg_u16_ptr,
+						   uint16x4_t arg_u16x4, uint16x4x2_t arg_u16x4x2, uint16x4x3_t arg_u16x4x3,
+						   uint16x8_t arg_u16x8, uint16x8x2_t arg_u16x8x2, uint16x8x4_t arg_u16x8x4) {
 	vst1_lane_u16(arg_u16_ptr, arg_u16x4, 0);
 	vst1_lane_u16(arg_u16_ptr, arg_u16x4, 3);
 	vst1_lane_u16(arg_u16_ptr, arg_u16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -96,9 +270,9 @@ void test_store_u16(uint16_t* arg_u16_ptr, uint16x8x2_t arg_u16x8x2, uint16x8x3_
 
 }
 
-void test_store_u32(uint32x2x3_t arg_u32x2x3, uint32_t* arg_u32_ptr, uint32x4x3_t arg_u32x4x3,
-					uint32x4_t arg_u32x4, uint32x2x4_t arg_u32x2x4, uint32x4x4_t arg_u32x4x4,
-					uint32x2_t arg_u32x2, uint32x2x2_t arg_u32x2x2, uint32x4x2_t arg_u32x4x2) {
+void test_vector_store_u32(uint32x4x3_t arg_u32x4x3, uint32x2_t arg_u32x2, uint32x2x3_t arg_u32x2x3,
+						   uint32x4x4_t arg_u32x4x4, uint32x4_t arg_u32x4, uint32x2x2_t arg_u32x2x2,
+						   uint32_t* arg_u32_ptr, uint32x2x4_t arg_u32x2x4, uint32x4x2_t arg_u32x4x2) {
 	vst1_lane_u32(arg_u32_ptr, arg_u32x2, 0);
 	vst1_lane_u32(arg_u32_ptr, arg_u32x2, 1);
 	vst1_lane_u32(arg_u32_ptr, arg_u32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -141,9 +315,9 @@ void test_store_u32(uint32x2x3_t arg_u32x2x3, uint32_t* arg_u32_ptr, uint32x4x3_
 
 }
 
-void test_store_u64(uint64x1_t arg_u64x1, uint64x2_t arg_u64x2, uint64x2x4_t arg_u64x2x4,
-					uint64x1x4_t arg_u64x1x4, uint64_t* arg_u64_ptr, uint64x1x3_t arg_u64x1x3,
-					uint64x2x2_t arg_u64x2x2, uint64x2x3_t arg_u64x2x3, uint64x1x2_t arg_u64x1x2) {
+void test_vector_store_u64(uint64x2x3_t arg_u64x2x3, uint64x1_t arg_u64x1, uint64x2_t arg_u64x2,
+						   uint64x1x2_t arg_u64x1x2, uint64x2x2_t arg_u64x2x2, uint64x1x3_t arg_u64x1x3,
+						   uint64_t* arg_u64_ptr, uint64x2x4_t arg_u64x2x4, uint64x1x4_t arg_u64x1x4) {
 	vst1_lane_u64(arg_u64_ptr, arg_u64x1, 0);
 	vst1_lane_u64(arg_u64_ptr, arg_u64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vst1_lane_u64(arg_u64_ptr, arg_u64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -182,9 +356,9 @@ void test_store_u64(uint64x1_t arg_u64x1, uint64x2_t arg_u64x2, uint64x2x4_t arg
 
 }
 
-void test_store_p64(poly64_t* arg_p64_ptr, poly64x1x4_t arg_p64x1x4, poly64x2x3_t arg_p64x2x3,
-					poly64x1x2_t arg_p64x1x2, poly64x2x4_t arg_p64x2x4, poly64x2_t arg_p64x2,
-					poly64x1x3_t arg_p64x1x3, poly64x1_t arg_p64x1, poly64x2x2_t arg_p64x2x2) {
+void test_vector_store_p64(poly64x2x4_t arg_p64x2x4, poly64x1x3_t arg_p64x1x3, poly64x1_t arg_p64x1,
+						   poly64x2x2_t arg_p64x2x2, poly64x1x4_t arg_p64x1x4, poly64_t* arg_p64_ptr,
+						   poly64x1x2_t arg_p64x1x2, poly64x2_t arg_p64x2, poly64x2x3_t arg_p64x2x3) {
 	vst1_lane_p64(arg_p64_ptr, arg_p64x1, 0);
 	vst1_lane_p64(arg_p64_ptr, arg_p64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vst1_lane_p64(arg_p64_ptr, arg_p64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -223,9 +397,9 @@ void test_store_p64(poly64_t* arg_p64_ptr, poly64x1x4_t arg_p64x1x4, poly64x2x3_
 
 }
 
-void test_store_f16(float16x4x2_t arg_f16x4x2, float16x8x3_t arg_f16x8x3, float16x8_t arg_f16x8,
-					float16x4_t arg_f16x4, float16x4x3_t arg_f16x4x3, float16x8x4_t arg_f16x8x4,
-					float16x8x2_t arg_f16x8x2, float16_t* arg_f16_ptr, float16x4x4_t arg_f16x4x4) {
+void test_vector_store_f16(float16x4x4_t arg_f16x4x4, float16x8_t arg_f16x8, float16x8x2_t arg_f16x8x2,
+						   float16x8x3_t arg_f16x8x3, float16x4x2_t arg_f16x4x2, float16x4x3_t arg_f16x4x3,
+						   float16x4_t arg_f16x4, float16_t* arg_f16_ptr, float16x8x4_t arg_f16x8x4) {
 	vst1_lane_f16(arg_f16_ptr, arg_f16x4, 0);
 	vst1_lane_f16(arg_f16_ptr, arg_f16x4, 3);
 	vst1_lane_f16(arg_f16_ptr, arg_f16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -268,9 +442,9 @@ void test_store_f16(float16x4x2_t arg_f16x4x2, float16x8x3_t arg_f16x8x3, float1
 
 }
 
-void test_store_f32(float32x2x4_t arg_f32x2x4, float32x4x4_t arg_f32x4x4, float32x4x3_t arg_f32x4x3,
-					float32x4x2_t arg_f32x4x2, float32_t* arg_f32_ptr, float32x4_t arg_f32x4,
-					float32x2_t arg_f32x2, float32x2x2_t arg_f32x2x2, float32x2x3_t arg_f32x2x3) {
+void test_vector_store_f32(float32x2x3_t arg_f32x2x3, float32x2x2_t arg_f32x2x2, float32x4_t arg_f32x4,
+						   float32x4x3_t arg_f32x4x3, float32_t* arg_f32_ptr, float32x4x4_t arg_f32x4x4,
+						   float32x2x4_t arg_f32x2x4, float32x2_t arg_f32x2, float32x4x2_t arg_f32x4x2) {
 	vst1_lane_f32(arg_f32_ptr, arg_f32x2, 0);
 	vst1_lane_f32(arg_f32_ptr, arg_f32x2, 1);
 	vst1_lane_f32(arg_f32_ptr, arg_f32x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -313,9 +487,9 @@ void test_store_f32(float32x2x4_t arg_f32x2x4, float32x4x4_t arg_f32x4x4, float3
 
 }
 
-void test_store_p8(poly8x16_t arg_p8x16, poly8x16x4_t arg_p8x16x4, poly8x8_t arg_p8x8,
-				   poly8x16x2_t arg_p8x16x2, poly8x8x4_t arg_p8x8x4, poly8x16x3_t arg_p8x16x3,
-				   poly8_t* arg_p8_ptr, poly8x8x3_t arg_p8x8x3, poly8x8x2_t arg_p8x8x2) {
+void test_vector_store_p8(poly8x16_t arg_p8x16, poly8x16x2_t arg_p8x16x2, poly8x8x3_t arg_p8x8x3,
+						  poly8x16x3_t arg_p8x16x3, poly8x16x4_t arg_p8x16x4, poly8x8x4_t arg_p8x8x4,
+						  poly8_t* arg_p8_ptr, poly8x8_t arg_p8x8, poly8x8x2_t arg_p8x8x2) {
 	vst1_lane_p8(arg_p8_ptr, arg_p8x8, 0);
 	vst1_lane_p8(arg_p8_ptr, arg_p8x8, 7);
 	vst1_lane_p8(arg_p8_ptr, arg_p8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -358,9 +532,9 @@ void test_store_p8(poly8x16_t arg_p8x16, poly8x16x4_t arg_p8x16x4, poly8x8_t arg
 
 }
 
-void test_store_p16(poly16x4_t arg_p16x4, poly16x4x3_t arg_p16x4x3, poly16x4x2_t arg_p16x4x2,
-					poly16x8x2_t arg_p16x8x2, poly16x8x3_t arg_p16x8x3, poly16_t* arg_p16_ptr,
-					poly16x8x4_t arg_p16x8x4, poly16x8_t arg_p16x8, poly16x4x4_t arg_p16x4x4) {
+void test_vector_store_p16(poly16x4x4_t arg_p16x4x4, poly16x4_t arg_p16x4, poly16x8x2_t arg_p16x8x2,
+						   poly16_t* arg_p16_ptr, poly16x8_t arg_p16x8, poly16x8x3_t arg_p16x8x3,
+						   poly16x4x3_t arg_p16x4x3, poly16x8x4_t arg_p16x8x4, poly16x4x2_t arg_p16x4x2) {
 	vst1_lane_p16(arg_p16_ptr, arg_p16x4, 0);
 	vst1_lane_p16(arg_p16_ptr, arg_p16x4, 3);
 	vst1_lane_p16(arg_p16_ptr, arg_p16x4, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -403,9 +577,9 @@ void test_store_p16(poly16x4_t arg_p16x4, poly16x4x3_t arg_p16x4x3, poly16x4x2_t
 
 }
 
-void test_store_f64(float64x1x2_t arg_f64x1x2, float64x1_t arg_f64x1, float64x2x2_t arg_f64x2x2,
-					float64x1x3_t arg_f64x1x3, float64x2x3_t arg_f64x2x3, float64x2_t arg_f64x2,
-					float64_t* arg_f64_ptr, float64x1x4_t arg_f64x1x4, float64x2x4_t arg_f64x2x4) {
+void test_vector_store_f64(float64_t* arg_f64_ptr, float64x2_t arg_f64x2, float64x1x3_t arg_f64x1x3,
+						   float64x2x4_t arg_f64x2x4, float64x1x4_t arg_f64x1x4, float64x1x2_t arg_f64x1x2,
+						   float64x1_t arg_f64x1, float64x2x2_t arg_f64x2x2, float64x2x3_t arg_f64x2x3) {
 	vst1_lane_f64(arg_f64_ptr, arg_f64x1, 0);
 	vst1_lane_f64(arg_f64_ptr, arg_f64x1, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vst1_lane_f64(arg_f64_ptr, arg_f64x1, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -419,13 +593,11 @@ void test_store_f64(float64x1x2_t arg_f64x1x2, float64x1_t arg_f64x1, float64x2x
 	vst2_lane_f64(arg_f64_ptr, arg_f64x1x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vst2_lane_f64(arg_f64_ptr, arg_f64x1x2, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 
-	// Differs from current innacurate ACLE spec -----
 	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 0);
 	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 1);
 	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vst2q_lane_f64(arg_f64_ptr, arg_f64x2x2, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-	// ----------------------------
-	
+
 	vst3_lane_f64(arg_f64_ptr, arg_f64x1x3, 0);
 	vst3_lane_f64(arg_f64_ptr, arg_f64x1x3, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
 	vst3_lane_f64(arg_f64_ptr, arg_f64x1x3, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
diff --git a/clang/test/Sema/aarch64-neon-ranges.c b/clang/test/Sema/aarch64-neon-ranges.c
deleted file mode 100644
index 2e60a12c26380d..00000000000000
--- a/clang/test/Sema/aarch64-neon-ranges.c
+++ /dev/null
@@ -1,220 +0,0 @@
-// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -ffreestanding -fsyntax-only -verify %s
-
-// REQUIRES: aarch64-registered-target || arm-registered-target
-
-#include <arm_neon.h>
-
-void test_vext_8bit(int8x8_t small, int8x16_t big) {
-  vext_s8(small, small, 7);
-  vext_u8(small, small, 7);
-  vext_p8(small, small, 7);
-  vextq_s8(big, big, 15);
-  vextq_u8(big, big, 15);
-  vextq_p8(big, big, 15);
-
-  vext_s8(small, small, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vext_u8(small, small, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vext_p8(small, small, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vextq_s8(big, big, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vextq_u8(big, big, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vextq_p8(big, big, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-void test_mul_lane_f64(float64x1_t small, float64x2_t big, float64x2_t rhs) {
-  vmul_lane_f64(small, small, 0);
-  vmul_laneq_f64(small, big, 1);
-  vmulq_lane_f64(big, small, 0);
-  vmulq_laneq_f64(big, big, 1);
-  vfma_lane_f64(small, small, small, 0);
-  vfma_laneq_f64(small, small, big, 1);
-  vfmaq_lane_f64(big, big, small, 0);
-  vfmaq_laneq_f64(big, big, big, 1);
-
-  vmul_lane_f64(small, small, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vmul_laneq_f64(small, big, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vfma_lane_f64(small, small, small, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vfma_laneq_f64(small, small, big, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vfmaq_laneq_f64(big, big, big, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-void test_ld1st1(int8x8_t small, int8x16_t big, void *addr) {
-  vld1_lane_s8(addr, small, 7);
-  vld1_lane_s16(addr, small, 3);
-  vld1_lane_s32(addr, small, 1);
-  vld1_lane_s64(addr, small, 0);
-
-  vld1q_lane_s8(addr, big, 15);
-  vld1q_lane_s16(addr, big, 7);
-  vld1q_lane_s32(addr, big, 3);
-  vld1q_lane_s64(addr, big, 1);
-
-  vld1_lane_s8(addr, small, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld1_lane_s16(addr, small, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld1_lane_s32(addr, small, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld1_lane_s64(addr, small, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vld1q_lane_s8(addr, big, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld1q_lane_s16(addr, big, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld1q_lane_s32(addr, big, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld1q_lane_s64(addr, big, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst1_lane_s8(addr, small, 7);
-  vst1_lane_s16(addr, small, 3);
-  vst1_lane_s32(addr, small, 1);
-  vst1_lane_s64(addr, small, 0);
-
-  vst1q_lane_s8(addr, big, 15);
-  vst1q_lane_s16(addr, big, 7);
-  vst1q_lane_s32(addr, big, 3);
-  vst1q_lane_s64(addr, big, 1);
-
-  vst1_lane_s8(addr, small, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst1_lane_s16(addr, small, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst1_lane_s32(addr, small, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst1_lane_s64(addr, small, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst1q_lane_s8(addr, big, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst1q_lane_s16(addr, big, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst1q_lane_s32(addr, big, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst1q_lane_s64(addr, big, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-void test_ld2st2(int8x8x2_t small8, int8x16x2_t big8,
-                 int16x4x2_t small16, int16x8x2_t big16,
-                 int32x2x2_t small32, int32x4x2_t big32,
-                 int64x1x2_t small64, int64x2x2_t big64,
-                 void *addr) {
-  vld2_lane_s8(addr, small8, 7);
-  vld2_lane_s16(addr, small16, 3);
-  vld2_lane_s32(addr, small32, 1);
-  vld2_lane_s64(addr, small64, 0);
-
-  vld2q_lane_s8(addr, big8, 15);
-  vld2q_lane_s16(addr, big16, 7);
-  vld2q_lane_s32(addr, big32, 3);
-  vld2q_lane_s64(addr, big64, 1);
-
-  vld2_lane_s8(addr, small8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld2_lane_s16(addr, small16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld2_lane_s32(addr, small32, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld2_lane_s64(addr, small64, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vld2q_lane_s8(addr, big8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld2q_lane_s16(addr, big16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld2q_lane_s32(addr, big32, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld2q_lane_s64(addr, big64, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst2_lane_s8(addr, small8, 7);
-  vst2_lane_s16(addr, small16, 3);
-  vst2_lane_s32(addr, small32, 1);
-  vst2_lane_s64(addr, small64, 0);
-
-  vst2q_lane_s8(addr, big8, 15);
-  vst2q_lane_s16(addr, big16, 7);
-  vst2q_lane_s32(addr, big32, 3);
-  vst2q_lane_s64(addr, big64, 1);
-
-  vst2_lane_s8(addr, small8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst2_lane_s16(addr, small16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst2_lane_s32(addr, small32, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst2_lane_s64(addr, small64, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst2q_lane_s8(addr, big8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst2q_lane_s16(addr, big16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst2q_lane_s32(addr, big32, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst2q_lane_s64(addr, big64, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-void test_ld3st3(int8x8x3_t small8, int8x16x3_t big8,
-                 int16x4x3_t small16, int16x8x3_t big16,
-                 int32x2x3_t small32, int32x4x3_t big32,
-                 int64x1x3_t small64, int64x2x3_t big64,
-                 void *addr) {
-  vld3_lane_s8(addr, small8, 7);
-  vld3_lane_s16(addr, small16, 3);
-  vld3_lane_s32(addr, small32, 1);
-  vld3_lane_s64(addr, small64, 0);
-
-  vld3q_lane_s8(addr, big8, 15);
-  vld3q_lane_s16(addr, big16, 7);
-  vld3q_lane_s32(addr, big32, 3);
-  vld3q_lane_s64(addr, big64, 1);
-
-  vld3_lane_s8(addr, small8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld3_lane_s16(addr, small16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld3_lane_s32(addr, small32, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld3_lane_s64(addr, small64, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vld3q_lane_s8(addr, big8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld3q_lane_s16(addr, big16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld3q_lane_s32(addr, big32, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld3q_lane_s64(addr, big64, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst3_lane_s8(addr, small8, 7);
-  vst3_lane_s16(addr, small16, 3);
-  vst3_lane_s32(addr, small32, 1);
-  vst3_lane_s64(addr, small64, 0);
-
-  vst3q_lane_s8(addr, big8, 15);
-  vst3q_lane_s16(addr, big16, 7);
-  vst3q_lane_s32(addr, big32, 3);
-  vst3q_lane_s64(addr, big64, 1);
-
-  vst3_lane_s8(addr, small8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst3_lane_s16(addr, small16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst3_lane_s32(addr, small32, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst3_lane_s64(addr, small64, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst3q_lane_s8(addr, big8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst3q_lane_s16(addr, big16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst3q_lane_s32(addr, big32, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst3q_lane_s64(addr, big64, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
-void test_ld4st4(int8x8x4_t small8, int8x16x4_t big8,
-                 int16x4x4_t small16, int16x8x4_t big16,
-                 int32x2x4_t small32, int32x4x4_t big32,
-                 int64x1x4_t small64, int64x2x4_t big64,
-                 void *addr) {
-  vld4_lane_s8(addr, small8, 7);
-  vld4_lane_s16(addr, small16, 3);
-  vld4_lane_s32(addr, small32, 1);
-  vld4_lane_s64(addr, small64, 0);
-
-  vld4q_lane_s8(addr, big8, 15);
-  vld4q_lane_s16(addr, big16, 7);
-  vld4q_lane_s32(addr, big32, 3);
-  vld4q_lane_s64(addr, big64, 1);
-
-  vld4_lane_s8(addr, small8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld4_lane_s16(addr, small16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld4_lane_s32(addr, small32, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld4_lane_s64(addr, small64, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vld4q_lane_s8(addr, big8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld4q_lane_s16(addr, big16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld4q_lane_s32(addr, big32, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vld4q_lane_s64(addr, big64, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst4_lane_s8(addr, small8, 7);
-  vst4_lane_s16(addr, small16, 3);
-  vst4_lane_s32(addr, small32, 1);
-  vst4_lane_s64(addr, small64, 0);
-
-  vst4q_lane_s8(addr, big8, 15);
-  vst4q_lane_s16(addr, big16, 7);
-  vst4q_lane_s32(addr, big32, 3);
-  vst4q_lane_s64(addr, big64, 1);
-
-  vst4_lane_s8(addr, small8, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst4_lane_s16(addr, small16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst4_lane_s32(addr, small32, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst4_lane_s64(addr, small64, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-
-  vst4q_lane_s8(addr, big8, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst4q_lane_s16(addr, big16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst4q_lane_s32(addr, big32, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  vst4q_lane_s64(addr, big64, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
-}
-
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index d628a88da56ea3..844bde0662ac2a 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -588,7 +588,7 @@ class NeonEmitter {
                                      SmallVectorImpl<Intrinsic *> &Defs);
   void genOverloadTypeCheckCode(raw_ostream &OS,
                                 SmallVectorImpl<Intrinsic *> &Defs);
-  bool areCompatableRangeChecks(const ArrayRef<ImmCheck> ChecksA,
+  bool areRangeChecksCompatable(const ArrayRef<ImmCheck> ChecksA,
                                 const ArrayRef<ImmCheck> ChecksB);
   void genIntrinsicRangeCheckCode(raw_ostream &OS,
                                   SmallVectorImpl<Intrinsic *> &Defs);
@@ -2180,11 +2180,11 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
 }
 
 inline bool
-NeonEmitter::areCompatableRangeChecks(const ArrayRef<ImmCheck> ChecksA,
+NeonEmitter::areRangeChecksCompatable(const ArrayRef<ImmCheck> ChecksA,
                                       const ArrayRef<ImmCheck> ChecksB) {
   // If multiple intrinsics map to the same builtin, we must ensure that the
-  // intended range checks performed in SemaArm.cpp do not contradict eachother,
-  // as these are emitted once per-buitlin.
+  // intended range checks performed in SemaArm.cpp do not contradict each
+  // other, as these are emitted once per-buitlin.
   //
   // The arguments to be checked and type of each check to be performed must be
   // the same. The element types may differ as they will be resolved
@@ -2216,7 +2216,7 @@ void NeonEmitter::genIntrinsicRangeCheckCode(
 
     const auto it = Emitted.find(Def->getMangledName());
     if (it != Emitted.end()) {
-      assert(areCompatableRangeChecks(Checks, it->second) &&
+      assert(areRangeChecksCompatable(Checks, it->second) &&
              "Neon intrinsics with incompatable immediate range checks cannot "
              "share a builtin.");
       continue; // Ensure this is emitted only once

>From a095c4b16b8d0a3e3e65de9a1b5567e577cfa47b Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Mon, 2 Sep 2024 15:26:04 +0000
Subject: [PATCH 15/17] [Fixup] Improve semantics and remove unnecessary code

Changes
	- clang/lib/Sema/SemaARM.cpp clang/include/clang/Sema/SemaARM.h
		- Rename immediate checking functions from 'Parse' to 'Perform'
		- Change CheckImmediateInSet to test against a set of values, rather than
		  an expression.
		- Use SmallVectorImpl in SemaARM.h.
	- clang/utils/TableGen/NeonEmitter.cpp clang/utils/TableGen/SveEmitter.cpp
		- Simplify emitted expressions and unpacking values
---
 clang/include/clang/Sema/SemaARM.h   | 11 ++++----
 clang/lib/Sema/SemaARM.cpp           | 39 +++++++++++-----------------
 clang/utils/TableGen/NeonEmitter.cpp | 19 +++++++-------
 clang/utils/TableGen/SveEmitter.cpp  |  9 +++----
 4 files changed, 33 insertions(+), 45 deletions(-)

diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index c431b1182ce8bc..bf9a9fb75c2dc1 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -48,12 +48,13 @@ class SemaARM : public SemaBase {
                                     unsigned MaxWidth);
   bool CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
                                     CallExpr *TheCall);
+  bool PerformNeonImmChecks(
+      CallExpr *TheCall,
+      SmallVectorImpl<std::tuple<int, int, int, int>> &ImmChecks,
+      int OverloadType);
   bool
-  ParseNeonImmChecks(CallExpr *TheCall,
-                     SmallVector<std::tuple<int, int, int, int>, 2> &ImmChecks,
-                     int OverloadType);
-  bool ParseSVEImmChecks(CallExpr *TheCall,
-                         SmallVector<std::tuple<int, int, int>, 3> &ImmChecks);
+  PerformSVEImmChecks(CallExpr *TheCall,
+                      SmallVectorImpl<std::tuple<int, int, int>> &ImmChecks);
   bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 4ab288e2804f41..80a79599844994 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -373,12 +373,9 @@ enum ArmSMEState : unsigned {
 bool SemaARM::CheckImmediateArg(CallExpr *TheCall, unsigned CheckTy,
                                 unsigned ArgIdx, unsigned EltBitWidth,
                                 unsigned VecBitWidth) {
-
-  typedef bool (*OptionSetCheckFnTy)(int64_t Value);
-
   // Function that checks whether the operand (ArgIdx) is an immediate
-  // that is one of the predefined values.
-  auto CheckImmediateInSet = [&](OptionSetCheckFnTy CheckImm,
+  // that is one of a given set of values.
+  auto CheckImmediateInSet = [&](std::initializer_list<int64_t> Set,
                                  int ErrDiag) -> bool {
     // We can't check the value of a dependent argument.
     Expr *Arg = TheCall->getArg(ArgIdx);
@@ -390,7 +387,7 @@ bool SemaARM::CheckImmediateArg(CallExpr *TheCall, unsigned CheckTy,
     if (SemaRef.BuiltinConstantArg(TheCall, ArgIdx, Imm))
       return true;
 
-    if (!CheckImm(Imm.getSExtValue()))
+    if (std::find(Set.begin(), Set.end(), Imm.getSExtValue()) == Set.end())
       return Diag(TheCall->getBeginLoc(), ErrDiag) << Arg->getSourceRange();
     return false;
   };
@@ -462,14 +459,12 @@ bool SemaARM::CheckImmediateArg(CallExpr *TheCall, unsigned CheckTy,
       return true;
     break;
   case ImmCheckType::ImmCheckComplexRot90_270:
-    if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; },
-                            diag::err_rotation_argument_to_cadd))
+    if (CheckImmediateInSet({90, 270}, diag::err_rotation_argument_to_cadd))
       return true;
     break;
   case ImmCheckType::ImmCheckComplexRotAll90:
-    if (CheckImmediateInSet(
-            [](int64_t V) { return V == 0 || V == 90 || V == 180 || V == 270; },
-            diag::err_rotation_argument_to_cmla))
+    if (CheckImmediateInSet({0, 90, 180, 270},
+                            diag::err_rotation_argument_to_cmla))
       return true;
     break;
   case ImmCheckType::ImmCheck0_1:
@@ -516,16 +511,14 @@ bool SemaARM::CheckImmediateArg(CallExpr *TheCall, unsigned CheckTy,
   return false;
 }
 
-bool SemaARM::ParseNeonImmChecks(
+bool SemaARM::PerformNeonImmChecks(
     CallExpr *TheCall,
-    SmallVector<std::tuple<int, int, int, int>, 2> &ImmChecks,
+    SmallVectorImpl<std::tuple<int, int, int, int>> &ImmChecks,
     int OverloadType = -1) {
-  unsigned CheckTy;
-  unsigned ArgIdx, ElementSizeInBits, VecSizeInBits;
   bool HasError = false;
 
   for (const auto &I : ImmChecks) {
-    std::tie(ArgIdx, CheckTy, ElementSizeInBits, VecSizeInBits) = I;
+    auto [ArgIdx, CheckTy, ElementSizeInBits, VecSizeInBits] = I;
 
     if (OverloadType >= 0)
       ElementSizeInBits = NeonTypeFlags(OverloadType).getEltSizeInBits();
@@ -537,14 +530,12 @@ bool SemaARM::ParseNeonImmChecks(
   return HasError;
 }
 
-bool SemaARM::ParseSVEImmChecks(
-    CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) {
-
+bool SemaARM::PerformSVEImmChecks(
+    CallExpr *TheCall, SmallVectorImpl<std::tuple<int, int, int>> &ImmChecks) {
   bool HasError = false;
-  unsigned CheckTy, ArgIdx, ElementSizeInBits;
 
   for (const auto &I : ImmChecks) {
-    std::tie(ArgIdx, CheckTy, ElementSizeInBits) = I;
+    auto [ArgIdx, CheckTy, ElementSizeInBits] = I;
     HasError |=
         CheckImmediateArg(TheCall, CheckTy, ArgIdx, ElementSizeInBits, 128);
   }
@@ -701,7 +692,7 @@ bool SemaARM::CheckSMEBuiltinFunctionCall(unsigned BuiltinID,
 #undef GET_SME_IMMEDIATE_CHECK
   }
 
-  return ParseSVEImmChecks(TheCall, ImmChecks);
+  return PerformSVEImmChecks(TheCall, ImmChecks);
 }
 
 bool SemaARM::CheckSVEBuiltinFunctionCall(unsigned BuiltinID,
@@ -729,7 +720,7 @@ bool SemaARM::CheckSVEBuiltinFunctionCall(unsigned BuiltinID,
 #undef GET_SVE_IMMEDIATE_CHECK
   }
 
-  return ParseSVEImmChecks(TheCall, ImmChecks);
+  return PerformSVEImmChecks(TheCall, ImmChecks);
 }
 
 bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
@@ -819,7 +810,7 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 #undef GET_NEON_IMMEDIATE_CHECK
   }
 
-  return ParseNeonImmChecks(TheCall, ImmChecks, TV);
+  return PerformNeonImmChecks(TheCall, ImmChecks, TV);
 }
 
 bool SemaARM::CheckMVEBuiltinFunctionCall(unsigned BuiltinID,
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 844bde0662ac2a..bcde879e75e4c6 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -432,8 +432,7 @@ class Intrinsic {
         VecSizeInBits = ContextType.getSizeInBits();
       }
 
-      ImmChecks.emplace_back(
-          ImmCheck(ArgIdx, Kind, EltSizeInBits, VecSizeInBits));
+      ImmChecks.emplace_back(ArgIdx, Kind, EltSizeInBits, VecSizeInBits);
     }
     llvm::sort(ImmChecks.begin(), ImmChecks.end(),
                [](const ImmCheck &a, const ImmCheck &b) {
@@ -588,7 +587,7 @@ class NeonEmitter {
                                      SmallVectorImpl<Intrinsic *> &Defs);
   void genOverloadTypeCheckCode(raw_ostream &OS,
                                 SmallVectorImpl<Intrinsic *> &Defs);
-  bool areRangeChecksCompatable(const ArrayRef<ImmCheck> ChecksA,
+  bool areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
                                 const ArrayRef<ImmCheck> ChecksB);
   void genIntrinsicRangeCheckCode(raw_ostream &OS,
                                   SmallVectorImpl<Intrinsic *> &Defs);
@@ -2180,7 +2179,7 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
 }
 
 inline bool
-NeonEmitter::areRangeChecksCompatable(const ArrayRef<ImmCheck> ChecksA,
+NeonEmitter::areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
                                       const ArrayRef<ImmCheck> ChecksB) {
   // If multiple intrinsics map to the same builtin, we must ensure that the
   // intended range checks performed in SemaArm.cpp do not contradict each
@@ -2192,7 +2191,7 @@ NeonEmitter::areRangeChecksCompatable(const ArrayRef<ImmCheck> ChecksA,
   // are not and so must be the same.
   bool compat =
       std::equal(ChecksA.begin(), ChecksA.end(), ChecksB.begin(), ChecksB.end(),
-                 [](const auto A, const auto B) {
+                 [](const auto &A, const auto &B) {
                    return A.getImmArgIdx() == B.getImmArgIdx() &&
                           A.getKind() == B.getKind() &&
                           A.getVecSizeInBits() == B.getVecSizeInBits();
@@ -2216,8 +2215,8 @@ void NeonEmitter::genIntrinsicRangeCheckCode(
 
     const auto it = Emitted.find(Def->getMangledName());
     if (it != Emitted.end()) {
-      assert(areRangeChecksCompatable(Checks, it->second) &&
-             "Neon intrinsics with incompatable immediate range checks cannot "
+      assert(areRangeChecksCompatible(Checks, it->second) &&
+             "Neon intrinsics with incompatible immediate range checks cannot "
              "share a builtin.");
       continue; // Ensure this is emitted only once
     }
@@ -2225,9 +2224,9 @@ void NeonEmitter::genIntrinsicRangeCheckCode(
     // Emit builtin's range checks
     OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
     for (const auto &Check : Checks) {
-      OS << " ImmChecks.push_back(std::make_tuple(" << Check.getImmArgIdx()
-         << ", " << Check.getKind() << ", " << Check.getElementSizeInBits()
-         << ", " << Check.getVecSizeInBits() << "));\n"
+      OS << " ImmChecks.emplace_back(" << Check.getImmArgIdx() << ", "
+         << Check.getKind() << ", " << Check.getElementSizeInBits() << ", "
+         << Check.getVecSizeInBits() << ");\n"
          << " break;\n";
     }
     Emitted[Def->getMangledName()] = Checks;
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index bb7de00a7a1ba9..b2e2db1a409904 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1203,9 +1203,7 @@ void SVEEmitter::createIntrinsic(
              "ImmArgIdx and Kind must be nonnegative");
 
       unsigned ElementSizeInBits = 0;
-      char Mod;
-      unsigned NumVectors;
-      std::tie(Mod, NumVectors) = getProtoModifier(Proto, EltSizeArgIdx + 1);
+      auto [Mod, NumVectors] = getProtoModifier(Proto, EltSizeArgIdx + 1);
       if (EltSizeArgIdx >= 0)
         ElementSizeInBits = SVEType(TS, Mod, NumVectors).getElementSizeInBits();
       ImmChecks.push_back(ImmCheck(ArgIdx, Kind, ElementSizeInBits));
@@ -1528,9 +1526,8 @@ void SVEEmitter::createRangeChecks(raw_ostream &OS) {
 
     OS << "case SVE::BI__builtin_sve_" << Def->getMangledName() << ":\n";
     for (auto &Check : Def->getImmChecks())
-      OS << "ImmChecks.push_back(std::make_tuple(" << Check.getImmArgIdx()
-         << ", " << Check.getKind() << ", " << Check.getElementSizeInBits()
-         << "));\n";
+      OS << "ImmChecks.emplace_back(" << Check.getImmArgIdx() << ", "
+         << Check.getKind() << ", " << Check.getElementSizeInBits() << ");\n";
     OS << "  break;\n";
 
     Emitted.insert(Def->getMangledName());

>From f37248cfd1fb7baaa002ba3a6063a4d01105397e Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Thu, 5 Sep 2024 18:27:42 +0000
Subject: [PATCH 16/17] Rebase and provide ImmChecks for NEON LUTI intrinsics 
 - Add immediate range checking to new NEON LUTI intrinsics 	 
 (https://github.com/ARM-software/acle/commit/e938350b3034512912eb2655402324750cb8c1ef)
 	- Add tests for this under
 clang/Sema/aarch64-neon-immediate-ranges/luti.c 	- Update CodeGen tests
 for NEON LUTI intrinsics to use legal indices 	- Use std::string::contains
 instead of ends_with to detect laneq in NEON operation DAG call          
 generation code (this has no effect on these intrinsics (or any existing
 intrinsics) as they           do not have operation DAGs, but the '_x2'
 naming convention prompted this action.

---
 clang/include/clang/Basic/arm_neon.td         |  42 ++-
 clang/test/CodeGen/aarch64-neon-luti.c        |  40 +--
 .../Sema/aarch64-neon-immediate-ranges/luti.c | 283 ++++++++++++++++++
 clang/utils/TableGen/NeonEmitter.cpp          |   2 +-
 4 files changed, 331 insertions(+), 36 deletions(-)
 create mode 100644 clang/test/Sema/aarch64-neon-immediate-ranges/luti.c

diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 3760113fcbe494..263d9020d4d00c 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -2093,19 +2093,31 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "r
 
 // Lookup table read with 2-bit/4-bit indices
 let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in {
-  def VLUTI2_B    : SInst<"vluti2_lane",    "Q.(qU)I",   "cUcPcQcQUcQPc">;
-  def VLUTI2_B_Q  : SInst<"vluti2_laneq",   "Q.(QU)I",   "cUcPcQcQUcQPc">;
-  def VLUTI2_H    : SInst<"vluti2_lane",    "Q.(<qU)I",   "sUsPshQsQUsQPsQh">;
-  def VLUTI2_H_Q  : SInst<"vluti2_laneq",   "Q.(<QU)I",   "sUsPshQsQUsQPsQh">;
-  def VLUTI4_B    : SInst<"vluti4_lane",    "..(qU)I",   "QcQUcQPc">;
-  def VLUTI4_B_Q    : SInst<"vluti4_laneq",    "..UI",   "QcQUcQPc">;
-  def VLUTI4_H_X2 : SInst<"vluti4_lane_x2", ".2(<qU)I", "QsQUsQPsQh">;
-  def VLUTI4_H_X2_Q : SInst<"vluti4_laneq_x2", ".2(<U)I", "QsQUsQPsQh">;
-    
-  let ArchGuard = "defined(__aarch64__)", TargetGuard= "lut,bf16" in {
-    def VLUTI2_BF      : SInst<"vluti2_lane",     "Q.(<qU)I",   "bQb">;
-    def VLUTI2_BF_Q    : SInst<"vluti2_laneq",    "Q.(<QU)I",   "bQb">;
-    def VLUTI4_BF_X2   : SInst<"vluti4_lane_x2", ".2(<qU)I", "Qb">;
-    def VLUTI4_BF_X2_Q   : SInst<"vluti4_laneq_x2", ".2(<U)I", "Qb">;
+  def VLUTI2_B    : SInst<"vluti2_lane", "Q.(qU)I", "cUcPcQcQUcQPc",
+                         [ImmCheck<2, ImmCheck0_1>]>;
+  def VLUTI2_B_Q  : SInst<"vluti2_laneq", "Q.(QU)I", "cUcPcQcQUcQPc",
+                         [ImmCheck<2, ImmCheck0_3>]>;
+  def VLUTI2_H    : SInst<"vluti2_lane", "Q.(<qU)I", "sUsPshQsQUsQPsQh",
+                         [ImmCheck<2, ImmCheck0_3>]>;
+  def VLUTI2_H_Q  : SInst<"vluti2_laneq", "Q.(<QU)I", "sUsPshQsQUsQPsQh",
+                         [ImmCheck<2, ImmCheck0_7>]>;
+  def VLUTI4_B    : SInst<"vluti4_lane", "..(qU)I", "QcQUcQPc",
+                         [ImmCheck<2, ImmCheck0_0>]>;
+  def VLUTI4_B_Q  : SInst<"vluti4_laneq", "..UI", "QcQUcQPc",
+                         [ImmCheck<2, ImmCheck0_1>]>;
+  def VLUTI4_H_X2 : SInst<"vluti4_lane_x2", ".2(<qU)I", "QsQUsQPsQh",
+                          [ImmCheck<3, ImmCheck0_1>]>;
+  def VLUTI4_H_X2_Q : SInst<"vluti4_laneq_x2", ".2(<U)I", "QsQUsQPsQh",
+                          [ImmCheck<3, ImmCheck0_3>]>;
+
+  let TargetGuard = "lut,bf16" in {
+    def VLUTI2_BF      : SInst<"vluti2_lane", "Q.(<qU)I", "bQb",
+                              [ImmCheck<2, ImmCheck0_3>]>;
+    def VLUTI2_BF_Q    : SInst<"vluti2_laneq", "Q.(<QU)I", "bQb",
+                              [ImmCheck<2, ImmCheck0_7>]>;
+    def VLUTI4_BF_X2   : SInst<"vluti4_lane_x2", ".2(<qU)I", "Qb",
+                              [ImmCheck<3, ImmCheck0_1>]>;
+    def VLUTI4_BF_X2_Q   : SInst<"vluti4_laneq_x2", ".2(<U)I", "Qb",
+                              [ImmCheck<3, ImmCheck0_3>]>;
   }
-}
+}
\ No newline at end of file
diff --git a/clang/test/CodeGen/aarch64-neon-luti.c b/clang/test/CodeGen/aarch64-neon-luti.c
index 40daf742eb966b..4b485636d45b16 100644
--- a/clang/test/CodeGen/aarch64-neon-luti.c
+++ b/clang/test/CodeGen/aarch64-neon-luti.c
@@ -27,21 +27,21 @@ uint8x16_t test_vluti2_laneq_u8(uint8x8_t vn, uint8x16_t vm) {
 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_u8(
 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1)
 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
 //
 uint8x16_t test_vluti2q_lane_u8(uint8x16_t vn, uint8x8_t vm) {
-  return vluti2q_lane_u8(vn, vm, 3);
+  return vluti2q_lane_u8(vn, vm, 1);
 }
 
 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_u8(
 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 7)
+// CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3)
 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANEQ]]
 //
 uint8x16_t test_vluti2q_laneq_u8(uint8x16_t vn, uint8x16_t vm) {
-  return vluti2q_laneq_u8(vn, vm, 7);
+  return vluti2q_laneq_u8(vn, vm, 3);
 }
 
 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_s8(
@@ -67,21 +67,21 @@ int8x16_t test_vluti2_laneq_s8(int8x8_t vn, uint8x16_t vm) {
 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_s8(
 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1)
 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
 //
 int8x16_t test_vluti2q_lane_s8(int8x16_t vn, uint8x8_t vm) {
-  return vluti2q_lane_s8(vn, vm, 3);
+  return vluti2q_lane_s8(vn, vm, 1);
 }
 
 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_s8(
 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 7)
+// CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3)
 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANEQ]]
 //
 int8x16_t test_vluti2q_laneq_s8(int8x16_t vn, uint8x16_t vm) {
-  return vluti2q_laneq_s8(vn, vm, 7);
+  return vluti2q_laneq_s8(vn, vm, 3);
 }
 
 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_p8(
@@ -107,21 +107,21 @@ poly8x16_t test_vluti2_laneq_p8(poly8x8_t vn, uint8x16_t vm) {
 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_p8(
 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1)
 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
 //
 poly8x16_t test_vluti2q_lane_p8(poly8x16_t vn, uint8x8_t vm) {
-  return vluti2q_lane_p8(vn, vm, 3);
+  return vluti2q_lane_p8(vn, vm, 1);
 }
 
 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_p8(
 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 7)
+// CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3)
 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANEQ]]
 //
 poly8x16_t test_vluti2q_laneq_p8(poly8x16_t vn, uint8x16_t vm) {
-  return vluti2q_laneq_p8(vn, vm, 7);
+  return vluti2q_laneq_p8(vn, vm, 3);
 }
 
 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_u16(
@@ -349,11 +349,11 @@ uint8x16_t test_vluti4q_laneq_u8(uint8x16_t vn, uint8x16_t vm) {
 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_s8(
 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1)
+// CHECK-NEXT:    [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
 // CHECK-NEXT:    ret <16 x i8> [[VLUTI4Q_LANE]]
 //
 int8x16_t test_vluti4q_lane_s8(int8x16_t vn, uint8x8_t vm) {
-  return vluti4q_lane_s8(vn, vm, 1);
+  return vluti4q_lane_s8(vn, vm, 0);
 }
 
 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_s8(
@@ -369,11 +369,11 @@ int8x16_t test_vluti4q_laneq_s8(int8x16_t vn, uint8x16_t vm) {
 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_p8(
 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1)
+// CHECK-NEXT:    [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
 // CHECK-NEXT:    ret <16 x i8> [[VLUTI4Q_LANE]]
 //
 poly8x16_t test_vluti4q_lane_p8(poly8x16_t vn, uint8x8_t vm) {
-  return vluti4q_lane_p8(vn, vm, 1);
+  return vluti4q_lane_p8(vn, vm, 0);
 }
 
 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_p8(
@@ -415,11 +415,11 @@ uint16x8_t test_vluti4q_laneq_u16_x2(uint16x8x2_t vn, uint8x16_t vm) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
 // CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
-// CHECK-NEXT:    [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1)
 // CHECK-NEXT:    ret <8 x i16> [[VLUTI4Q_LANE_X24]]
 //
 int16x8_t test_vluti4q_lane_s16_x2(int16x8x2_t vn, uint8x8_t vm) {
-  return vluti4q_lane_s16_x2(vn, vm, 3);
+  return vluti4q_lane_s16_x2(vn, vm, 1);
 }
 
 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_s16_x2(
@@ -463,11 +463,11 @@ float16x8_t test_vluti4q_laneq_f16_x2(float16x8x2_t vn, uint8x16_t vm) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 0
 // CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 1
-// CHECK-NEXT:    [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.lane.x2.v8bf16(<8 x bfloat> [[VN_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 2)
+// CHECK-NEXT:    [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.lane.x2.v8bf16(<8 x bfloat> [[VN_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1)
 // CHECK-NEXT:    ret <8 x bfloat> [[VLUTI4Q_LANE_X24]]
 //
 bfloat16x8_t test_vluti4q_lane_bf16_x2(bfloat16x8x2_t vn, uint8x8_t vm) {
-  return vluti4q_lane_bf16_x2(vn, vm, 2);
+  return vluti4q_lane_bf16_x2(vn, vm, 1);
 }
 
 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti4q_laneq_bf16_x2(
diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/luti.c b/clang/test/Sema/aarch64-neon-immediate-ranges/luti.c
new file mode 100644
index 00000000000000..bed8cbc1481dd8
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-immediate-ranges/luti.c
@@ -0,0 +1,283 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -ffreestanding -fsyntax-only -verify %s
+
+#include <arm_neon.h>
+// REQUIRES: aarch64-registered-target
+
+// 2-bit indices
+
+void test_lookup_read_2bit_u8(uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vluti2_lane_u8(arg_u8x8, arg_u8x8, 0);
+	vluti2_lane_u8(arg_u8x8, arg_u8x8, 1);
+	vluti2_lane_u8(arg_u8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_u8(arg_u8x8, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_u8(arg_u8x8, arg_u8x16, 0);
+	vluti2_laneq_u8(arg_u8x8, arg_u8x16, 3);
+	vluti2_laneq_u8(arg_u8x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_u8(arg_u8x8, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_u8(arg_u8x16, arg_u8x8, 0);
+	vluti2q_lane_u8(arg_u8x16, arg_u8x8, 1);
+	vluti2q_lane_u8(arg_u8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_u8(arg_u8x16, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_u8(arg_u8x16, arg_u8x16, 0);
+	vluti2q_laneq_u8(arg_u8x16, arg_u8x16, 3);
+	vluti2q_laneq_u8(arg_u8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_u8(arg_u8x16, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_2bit_s8(int8x8_t arg_i8x8, uint8x16_t arg_u8x16, uint8x8_t arg_u8x8, int8x16_t arg_i8x16) {
+	vluti2_lane_s8(arg_i8x8, arg_u8x8, 0);
+	vluti2_lane_s8(arg_i8x8, arg_u8x8, 1);
+	vluti2_lane_s8(arg_i8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_s8(arg_i8x8, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_s8(arg_i8x8, arg_u8x16, 0);
+	vluti2_laneq_s8(arg_i8x8, arg_u8x16, 3);
+	vluti2_laneq_s8(arg_i8x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_s8(arg_i8x8, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_s8(arg_i8x16, arg_u8x8, 0);
+	vluti2q_lane_s8(arg_i8x16, arg_u8x8, 1);
+	vluti2q_lane_s8(arg_i8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_s8(arg_i8x16, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_s8(arg_i8x16, arg_u8x16, 0);
+	vluti2q_laneq_s8(arg_i8x16, arg_u8x16, 3);
+	vluti2q_laneq_s8(arg_i8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_s8(arg_i8x16, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_2bit_p8(poly8x16_t arg_p8x16, poly8x8_t arg_p8x8, uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vluti2_lane_p8(arg_p8x8, arg_u8x8, 0);
+	vluti2_lane_p8(arg_p8x8, arg_u8x8, 1);
+	vluti2_lane_p8(arg_p8x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_p8(arg_p8x8, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_p8(arg_p8x8, arg_u8x16, 0);
+	vluti2_laneq_p8(arg_p8x8, arg_u8x16, 3);
+	vluti2_laneq_p8(arg_p8x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_p8(arg_p8x8, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_p8(arg_p8x16, arg_u8x8, 0);
+	vluti2q_lane_p8(arg_p8x16, arg_u8x8, 1);
+	vluti2q_lane_p8(arg_p8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_p8(arg_p8x16, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_p8(arg_p8x16, arg_u8x16, 0);
+	vluti2q_laneq_p8(arg_p8x16, arg_u8x16, 3);
+	vluti2q_laneq_p8(arg_p8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_p8(arg_p8x16, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_2bit_u16(uint16x4_t arg_u16x4, uint8x16_t arg_u8x16, uint8x8_t arg_u8x8, uint16x8_t arg_u16x8) {
+	vluti2_lane_u16(arg_u16x4, arg_u8x8, 0);
+	vluti2_lane_u16(arg_u16x4, arg_u8x8, 3);
+	vluti2_lane_u16(arg_u16x4, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_u16(arg_u16x4, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_u16(arg_u16x4, arg_u8x16, 0);
+	vluti2_laneq_u16(arg_u16x4, arg_u8x16, 7);
+	vluti2_laneq_u16(arg_u16x4, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_u16(arg_u16x4, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_u16(arg_u16x8, arg_u8x8, 0);
+	vluti2q_lane_u16(arg_u16x8, arg_u8x8, 3);
+	vluti2q_lane_u16(arg_u16x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_u16(arg_u16x8, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_u16(arg_u16x8, arg_u8x16, 0);
+	vluti2q_laneq_u16(arg_u16x8, arg_u8x16, 7);
+	vluti2q_laneq_u16(arg_u16x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_u16(arg_u16x8, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_2bit_s16(int16x4_t arg_i16x4, int16x8_t arg_i16x8, uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vluti2_lane_s16(arg_i16x4, arg_u8x8, 0);
+	vluti2_lane_s16(arg_i16x4, arg_u8x8, 3);
+	vluti2_lane_s16(arg_i16x4, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_s16(arg_i16x4, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_s16(arg_i16x4, arg_u8x16, 0);
+	vluti2_laneq_s16(arg_i16x4, arg_u8x16, 7);
+	vluti2_laneq_s16(arg_i16x4, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_s16(arg_i16x4, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_s16(arg_i16x8, arg_u8x8, 0);
+	vluti2q_lane_s16(arg_i16x8, arg_u8x8, 3);
+	vluti2q_lane_s16(arg_i16x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_s16(arg_i16x8, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_s16(arg_i16x8, arg_u8x16, 0);
+	vluti2q_laneq_s16(arg_i16x8, arg_u8x16, 7);
+	vluti2q_laneq_s16(arg_i16x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_s16(arg_i16x8, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_2bit_f16(float16x8_t arg_f16x8, uint8x16_t arg_u8x16, float16x4_t arg_f16x4, uint8x8_t arg_u8x8) {
+	vluti2_lane_f16(arg_f16x4, arg_u8x8, 0);
+	vluti2_lane_f16(arg_f16x4, arg_u8x8, 3);
+	vluti2_lane_f16(arg_f16x4, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_f16(arg_f16x4, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_f16(arg_f16x4, arg_u8x16, 0);
+	vluti2_laneq_f16(arg_f16x4, arg_u8x16, 7);
+	vluti2_laneq_f16(arg_f16x4, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_f16(arg_f16x4, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_f16(arg_f16x8, arg_u8x8, 0);
+	vluti2q_lane_f16(arg_f16x8, arg_u8x8, 3);
+	vluti2q_lane_f16(arg_f16x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_f16(arg_f16x8, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_f16(arg_f16x8, arg_u8x16, 0);
+	vluti2q_laneq_f16(arg_f16x8, arg_u8x16, 7);
+	vluti2q_laneq_f16(arg_f16x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_f16(arg_f16x8, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_2bit_bf16(bfloat16x4_t arg_b16x4, bfloat16x8_t arg_b16x8, uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vluti2_lane_bf16(arg_b16x4, arg_u8x8, 0);
+	vluti2_lane_bf16(arg_b16x4, arg_u8x8, 3);
+	vluti2_lane_bf16(arg_b16x4, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_bf16(arg_b16x4, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_bf16(arg_b16x4, arg_u8x16, 0);
+	vluti2_laneq_bf16(arg_b16x4, arg_u8x16, 7);
+	vluti2_laneq_bf16(arg_b16x4, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_bf16(arg_b16x4, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_bf16(arg_b16x8, arg_u8x8, 0);
+	vluti2q_lane_bf16(arg_b16x8, arg_u8x8, 3);
+	vluti2q_lane_bf16(arg_b16x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_bf16(arg_b16x8, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_bf16(arg_b16x8, arg_u8x16, 0);
+	vluti2q_laneq_bf16(arg_b16x8, arg_u8x16, 7);
+	vluti2q_laneq_bf16(arg_b16x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_bf16(arg_b16x8, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_2bit_p16(poly16x4_t arg_p16x4, poly16x8_t arg_p16x8, uint8x16_t arg_u8x16, uint8x8_t arg_u8x8) {
+	vluti2_lane_p16(arg_p16x4, arg_u8x8, 0);
+	vluti2_lane_p16(arg_p16x4, arg_u8x8, 3);
+	vluti2_lane_p16(arg_p16x4, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_lane_p16(arg_p16x4, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2_laneq_p16(arg_p16x4, arg_u8x16, 0);
+	vluti2_laneq_p16(arg_p16x4, arg_u8x16, 7);
+	vluti2_laneq_p16(arg_p16x4, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2_laneq_p16(arg_p16x4, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_lane_p16(arg_p16x8, arg_u8x8, 0);
+	vluti2q_lane_p16(arg_p16x8, arg_u8x8, 3);
+	vluti2q_lane_p16(arg_p16x8, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_lane_p16(arg_p16x8, arg_u8x8, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti2q_laneq_p16(arg_p16x8, arg_u8x16, 0);
+	vluti2q_laneq_p16(arg_p16x8, arg_u8x16, 7);
+	vluti2q_laneq_p16(arg_p16x8, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti2q_laneq_p16(arg_p16x8, arg_u8x16, 8); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+// 4-bit indices 
+
+void test_lookup_read_4bit_u8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vluti4q_lane_u8(arg_u8x16, arg_u8x8, 0);
+	vluti4q_lane_u8(arg_u8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_u8(arg_u8x16, arg_u8x8, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_u8(arg_u8x16, arg_u8x16, 0);
+	vluti4q_laneq_u8(arg_u8x16, arg_u8x16, 1);
+	vluti4q_laneq_u8(arg_u8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_u8(arg_u8x16, arg_u8x16, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_4bit_s8(int8x16_t arg_i8x16, uint8x8_t arg_u8x8, uint8x16_t arg_u8x16) {
+	vluti4q_lane_s8(arg_i8x16, arg_u8x8, 0);
+	vluti4q_lane_s8(arg_i8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_s8(arg_i8x16, arg_u8x8, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_s8(arg_i8x16, arg_u8x16, 0);
+	vluti4q_laneq_s8(arg_i8x16, arg_u8x16, 1);
+	vluti4q_laneq_s8(arg_i8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_s8(arg_i8x16, arg_u8x16, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_4bit_p8(uint8x8_t arg_u8x8, uint8x16_t arg_u8x16, poly8x16_t arg_p8x16) {
+	vluti4q_lane_p8(arg_p8x16, arg_u8x8, 0);
+	vluti4q_lane_p8(arg_p8x16, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_p8(arg_p8x16, arg_u8x8, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_p8(arg_p8x16, arg_u8x16, 0);
+	vluti4q_laneq_p8(arg_p8x16, arg_u8x16, 1);
+	vluti4q_laneq_p8(arg_p8x16, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_p8(arg_p8x16, arg_u8x16, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+void test_lookup_read_4bit_x2(int16x8x2_t arg_i16x8x2, uint8x8_t arg_u8x8, float16x8x2_t arg_f16x8x2, uint8x16_t arg_u8x16, poly16x8x2_t arg_p16x8x2, uint16x8x2_t arg_u16x8x2, bfloat16x8x2_t arg_b16x8x2) {
+	vluti4q_lane_u16_x2(arg_u16x8x2, arg_u8x8, 0);
+	vluti4q_lane_u16_x2(arg_u16x8x2, arg_u8x8, 1);
+	vluti4q_lane_u16_x2(arg_u16x8x2, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_u16_x2(arg_u16x8x2, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_u16_x2(arg_u16x8x2, arg_u8x16, 0);
+	vluti4q_laneq_u16_x2(arg_u16x8x2, arg_u8x16, 3);
+	vluti4q_laneq_u16_x2(arg_u16x8x2, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_u16_x2(arg_u16x8x2, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_lane_s16_x2(arg_i16x8x2, arg_u8x8, 0);
+	vluti4q_lane_s16_x2(arg_i16x8x2, arg_u8x8, 1);
+	vluti4q_lane_s16_x2(arg_i16x8x2, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_s16_x2(arg_i16x8x2, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_s16_x2(arg_i16x8x2, arg_u8x16, 0);
+	vluti4q_laneq_s16_x2(arg_i16x8x2, arg_u8x16, 3);
+	vluti4q_laneq_s16_x2(arg_i16x8x2, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_s16_x2(arg_i16x8x2, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_lane_f16_x2(arg_f16x8x2, arg_u8x8, 0);
+	vluti4q_lane_f16_x2(arg_f16x8x2, arg_u8x8, 1);
+	vluti4q_lane_f16_x2(arg_f16x8x2, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_f16_x2(arg_f16x8x2, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_f16_x2(arg_f16x8x2, arg_u8x16, 0);
+	vluti4q_laneq_f16_x2(arg_f16x8x2, arg_u8x16, 3);
+	vluti4q_laneq_f16_x2(arg_f16x8x2, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_f16_x2(arg_f16x8x2, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_lane_bf16_x2(arg_b16x8x2, arg_u8x8, 0);
+	vluti4q_lane_bf16_x2(arg_b16x8x2, arg_u8x8, 1);
+	vluti4q_lane_bf16_x2(arg_b16x8x2, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_bf16_x2(arg_b16x8x2, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_bf16_x2(arg_b16x8x2, arg_u8x16, 0);
+	vluti4q_laneq_bf16_x2(arg_b16x8x2, arg_u8x16, 3);
+	vluti4q_laneq_bf16_x2(arg_b16x8x2, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_bf16_x2(arg_b16x8x2, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_lane_p16_x2(arg_p16x8x2, arg_u8x8, 0);
+	vluti4q_lane_p16_x2(arg_p16x8x2, arg_u8x8, 1);
+	vluti4q_lane_p16_x2(arg_p16x8x2, arg_u8x8, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_lane_p16_x2(arg_p16x8x2, arg_u8x8, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+	vluti4q_laneq_p16_x2(arg_p16x8x2, arg_u8x16, 0);
+	vluti4q_laneq_p16_x2(arg_p16x8x2, arg_u8x16, 3);
+	vluti4q_laneq_p16_x2(arg_p16x8x2, arg_u8x16, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+	vluti4q_laneq_p16_x2(arg_p16x8x2, arg_u8x16, 4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+
+}
+
+
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index bcde879e75e4c6..e0d7b0db7f5780 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -1505,7 +1505,7 @@ Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) {
     N = emitDagArg(DI->getArg(0), "").second;
   std::optional<std::string> MangledName;
   if (MatchMangledName) {
-    if (Intr.getRecord()->getValueAsString("Name").ends_with("laneq"))
+    if (Intr.getRecord()->getValueAsString("Name").contains("laneq"))
       N += "q";
     MangledName = Intr.mangleName(N, ClassS);
   }

>From 0c566bc2f52d1271bd7e40a3408fb524faebc035 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Fri, 6 Sep 2024 10:13:31 +0000
Subject: [PATCH 17/17] [NFC] Remove trailing whitespace and move default
 overload value

---
 clang/include/clang/Basic/arm_neon.td | 68 +++++++++++++--------------
 clang/include/clang/Sema/SemaARM.h    |  2 +-
 clang/lib/Sema/SemaARM.cpp            |  2 +-
 3 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 263d9020d4d00c..875ec6e90b685b 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -424,17 +424,17 @@ def VQSHLU_N   : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl",
 // is that of the return type (half as wide as the argument in this case), using
 // ImmCheckShiftRightNarrow would return in an upper bound of (sizeinbits(arg)/2)/2.
 // ImmCheckShiftRight produces the correct behavior here.
-def VSHRN_N    : IInst<"vshrn_n", "<QI", "silUsUiUl", 
+def VSHRN_N    : IInst<"vshrn_n", "<QI", "silUsUiUl",
                       [ImmCheck<1, ImmCheckShiftRight>]>;
-def VQSHRUN_N  : SInst<"vqshrun_n", "(<U)QI", "sil", 
+def VQSHRUN_N  : SInst<"vqshrun_n", "(<U)QI", "sil",
                       [ImmCheck<1, ImmCheckShiftRight>]>;
-def VQRSHRUN_N : SInst<"vqrshrun_n", "(<U)QI", "sil", 
+def VQRSHRUN_N : SInst<"vqrshrun_n", "(<U)QI", "sil",
                       [ImmCheck<1, ImmCheckShiftRight>]>;
-def VQSHRN_N   : SInst<"vqshrn_n", "<QI", "silUsUiUl", 
+def VQSHRN_N   : SInst<"vqshrn_n", "<QI", "silUsUiUl",
                       [ImmCheck<1, ImmCheckShiftRight>]>;
-def VRSHRN_N   : IInst<"vrshrn_n", "<QI", "silUsUiUl", 
+def VRSHRN_N   : IInst<"vrshrn_n", "<QI", "silUsUiUl",
                       [ImmCheck<1, ImmCheckShiftRight>]>;
-def VQRSHRN_N  : SInst<"vqrshrn_n", "<QI", "silUsUiUl", 
+def VQRSHRN_N  : SInst<"vqrshrn_n", "<QI", "silUsUiUl",
                       [ImmCheck<1, ImmCheckShiftRight>]>;
 
 // Widening left-shifts should have a range of 0..(sizeinbits(arg)-1).
@@ -466,7 +466,7 @@ def VLD1_X3   : WInst<"vld1_x3", "3(c*!)",
 def VLD1_X4   : WInst<"vld1_x4", "4(c*!)",
                       "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
 def VLD1_LANE : WInst<"vld1_lane", ".(c*!).I",
-                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs", 
+                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs",
                       [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def VLD1_DUP  : WInst<"vld1_dup", ".(c*!)",
                       "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
@@ -479,7 +479,7 @@ def VST1_X3   : WInst<"vst1_x3", "v*(3!)",
 def VST1_X4   : WInst<"vst1_x4", "v*(4!)",
                       "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
 def VST1_LANE : WInst<"vst1_lane", "v*(.!)I",
-                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs", 
+                      "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs",
                       [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 
 let ArchGuard = "(__ARM_FP & 2)" in {
@@ -487,14 +487,14 @@ def VLD1_F16      : WInst<"vld1", ".(c*!)", "hQh">;
 def VLD1_X2_F16   : WInst<"vld1_x2", "2(c*!)", "hQh">;
 def VLD1_X3_F16   : WInst<"vld1_x3", "3(c*!)", "hQh">;
 def VLD1_X4_F16   : WInst<"vld1_x4", "4(c*!)", "hQh">;
-def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh", 
+def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh",
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def VLD1_DUP_F16  : WInst<"vld1_dup", ".(c*!)", "hQh">;
 def VST1_F16      : WInst<"vst1", "v*(.!)", "hQh">;
 def VST1_X2_F16   : WInst<"vst1_x2", "v*(2!)", "hQh">;
 def VST1_X3_F16   : WInst<"vst1_x3", "v*(3!)", "hQh">;
 def VST1_X4_F16   : WInst<"vst1_x4", "v*(4!)", "hQh">;
-def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh", 
+def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh",
                           [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 }
 
@@ -509,20 +509,20 @@ def VLD3_DUP  : WInst<"vld3_dup", "3(c*!)",
                       "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
 def VLD4_DUP  : WInst<"vld4_dup", "4(c*!)",
                       "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
-def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
                       [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
-def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
                       [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
-def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
                       [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
 def VST2 : WInst<"vst2", "v*(2!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
 def VST3 : WInst<"vst3", "v*(3!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
 def VST4 : WInst<"vst4", "v*(4!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
-def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
                       [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
-def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
                       [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
-def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs", 
+def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
                       [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 let ArchGuard = "(__ARM_FP & 2)" in {
 def VLD2_F16      : WInst<"vld2", "2(c*!)", "hQh">;
@@ -531,20 +531,20 @@ def VLD4_F16      : WInst<"vld4", "4(c*!)", "hQh">;
 def VLD2_DUP_F16  : WInst<"vld2_dup", "2(c*!)", "hQh">;
 def VLD3_DUP_F16  : WInst<"vld3_dup", "3(c*!)", "hQh">;
 def VLD4_DUP_F16  : WInst<"vld4_dup", "4(c*!)", "hQh">;
-def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh", 
+def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh",
                           [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
-def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh", 
+def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh",
                           [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
-def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh", 
+def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh",
                           [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
 def VST2_F16      : WInst<"vst2", "v*(2!)", "hQh">;
 def VST3_F16      : WInst<"vst3", "v*(3!)", "hQh">;
 def VST4_F16      : WInst<"vst4", "v*(4!)", "hQh">;
-def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh", 
+def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh",
                           [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
-def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh", 
+def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh",
                          [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
-def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh", 
+def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh",
                           [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
 }
 
@@ -613,7 +613,7 @@ def VCVT_N_S32   : SInst<"vcvt_n_s32", "S.I", "fQf",
                         [ImmCheck<1, ImmCheck1_32>]>;
 def VCVT_N_U32   : SInst<"vcvt_n_u32", "U.I", "fQf",
                         [ImmCheck<1, ImmCheck1_32>]>;
-def VCVT_N_F32   : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi", 
+def VCVT_N_F32   : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi",
                         [ImmCheck<1, ImmCheck1_32>]>;
 
 def VMOVN        : IInst<"vmovn", "<Q",  "silUsUiUl">;
@@ -661,9 +661,9 @@ def VQDMULH_LANE  : SOpInst<"vqdmulh_lane", "..qI", "siQsQi", OP_QDMULH_LN>;
 def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "..qI", "siQsQi", OP_QRDMULH_LN>;
 }
 let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
-def A64_VQDMULH_LANE  : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi", 
+def A64_VQDMULH_LANE  : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi",
                               [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi", 
+def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi",
                               [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 }
 
@@ -985,7 +985,7 @@ def QRSHRN_HIGH_N  : SOpInst<"vqrshrn_high_n", "<(<q).I",
 // Converting vectors
 def VMOVL_HIGH   : SOpInst<"vmovl_high", ">.", "HcHsHiHUcHUsHUi", OP_MOVL_HI>;
 
-def CVTF_N_F64   : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl", 
+def CVTF_N_F64   : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl",
                         [ImmCheck<1, ImmCheck1_64>]>;
 def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd",
                         [ImmCheck<1, ImmCheck1_64>]>;
@@ -1065,9 +1065,9 @@ def VMLA_LANEQ   : IOpInst<"vmla_laneq", "...QI",
                            "siUsUifQsQiQUsQUiQf", OP_MLA_LN>;
 def VMLS_LANEQ   : IOpInst<"vmls_laneq", "...QI",
                            "siUsUifQsQiQUsQUiQf", OP_MLS_LN>;
-def VFMA_LANE    : IInst<"vfma_lane", "...qI", "fdQfQd", 
+def VFMA_LANE    : IInst<"vfma_lane", "...qI", "fdQfQd",
                         [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
-def VFMA_LANEQ   : IInst<"vfma_laneq", "...QI", "fdQfQd", 
+def VFMA_LANEQ   : IInst<"vfma_laneq", "...QI", "fdQfQd",
                         [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 def VFMS_LANE    : IOpInst<"vfms_lane", "...qI", "fdQfQd", OP_FMS_LN>;
 def VFMS_LANEQ   : IOpInst<"vfms_laneq", "...QI", "fdQfQd", OP_FMS_LNQ>;
@@ -1136,7 +1136,7 @@ def FMINNMV : SInst<"vminnmv", "1.", "fQfQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Newly added Vector Extract for f64
-def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl", 
+def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl",
                     [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1732,11 +1732,11 @@ let TargetGuard = "fullfp16,neon" in {
     def VCLTH      : SOpInst<"vclt", "U..", "hQh", OP_LT>;
 
   // Vector conversion
-    def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs", 
+    def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs",
                           [ImmCheck<1, ImmCheck1_16>]>;
-    def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh", 
+    def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh",
                           [ImmCheck<1, ImmCheck1_16>]>;
-    def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh", 
+    def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh",
                           [ImmCheck<1, ImmCheck1_16>]>;
 
   // Max/Min
@@ -1867,9 +1867,9 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
   def VZIP2H     : SOpInst<"vzip2", "...", "hQh", OP_ZIP2>;
   def VUZP2H     : SOpInst<"vuzp2", "...", "hQh", OP_UZP2>;
 
-  def SCALAR_VDUP_LANEH  : IInst<"vdup_lane", "1.I", "Sh", 
+  def SCALAR_VDUP_LANEH  : IInst<"vdup_lane", "1.I", "Sh",
                                 [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
-  def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh", 
+  def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh",
                                 [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
 }
 
diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index bf9a9fb75c2dc1..8c4c56e2221301 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -51,7 +51,7 @@ class SemaARM : public SemaBase {
   bool PerformNeonImmChecks(
       CallExpr *TheCall,
       SmallVectorImpl<std::tuple<int, int, int, int>> &ImmChecks,
-      int OverloadType);
+      int OverloadType = -1);
   bool
   PerformSVEImmChecks(CallExpr *TheCall,
                       SmallVectorImpl<std::tuple<int, int, int>> &ImmChecks);
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 80a79599844994..770968cb2d4b46 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -514,7 +514,7 @@ bool SemaARM::CheckImmediateArg(CallExpr *TheCall, unsigned CheckTy,
 bool SemaARM::PerformNeonImmChecks(
     CallExpr *TheCall,
     SmallVectorImpl<std::tuple<int, int, int, int>> &ImmChecks,
-    int OverloadType = -1) {
+    int OverloadType) {
   bool HasError = false;
 
   for (const auto &I : ImmChecks) {