[llvm] 9c6b7f6 - Revert "[ARM][MVE] Add intrinsics for immediate shifts."

Mon Dec 9 16:57:05 PST 2019

Author: Eric Christopher
Date: 2019-12-09T16:47:38-08:00
New Revision: 9c6b7f68b807250e7c3aa01938339fdbd239c4ea

URL: https://github.com/llvm/llvm-project/commit/9c6b7f68b807250e7c3aa01938339fdbd239c4ea
DIFF: https://github.com/llvm/llvm-project/commit/9c6b7f68b807250e7c3aa01938339fdbd239c4ea.diff

LOG: Revert "[ARM][MVE] Add intrinsics for immediate shifts."
and two follow-on commits: one warning fix and one functionality.

As it's breaking at least the lto bot:

http://lab.llvm.org:8011/builders/clang-with-lto-ubuntu/builds/15132/steps/test-stage1-compiler/logs/stdio

This reverts commits:

 8d70f3c933a5b81a87a5ab1af0e3e98ee2cd7c67
 ff4dceef9201c5ae3924e92f6955977f243ac71d
 d97b3e3e65cd77a81b39732af84a1a4229e95091

Added: 
    

Modified: 
    clang/include/clang/Basic/arm_mve.td
    clang/include/clang/Basic/arm_mve_defs.td
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/utils/TableGen/MveEmitter.cpp
    llvm/include/llvm/IR/IntrinsicsARM.td
    llvm/lib/Target/ARM/ARMInstrMVE.td

Removed: 
    clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
    llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll


################################################################################
diff  --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index 426d3b5a2f44..f3d3f4124101 100644

--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -609,33 +609,6 @@ defm vstrhq: scatter_offset_both<!listconcat(T.All16, T.Int32), u16, 1>;
 defm vstrwq: scatter_offset_both<T.All32, u32, 2>;
 defm vstrdq: scatter_offset_both<T.Int64, u64, 3>;
 
-multiclass PredicatedImmediateVectorShift<
-    Immediate immtype, string predIntrName, list<dag> unsignedFlag = []> {
-  foreach predIntr = [IRInt<predIntrName, [Vector, Predicate]>] in {
-    def _m_n: Intrinsic<Vector, (args Vector:$inactive, Vector:$v,
-                                      immtype:$sh, Predicate:$pred),
-       !con((predIntr $v, $sh), !dag(predIntr, unsignedFlag, ?),
-            (predIntr $pred, $inactive))>;
-    def _x_n: Intrinsic<Vector, (args Vector:$v, immtype:$sh,
-                                      Predicate:$pred),
-       !con((predIntr $v, $sh), !dag(predIntr, unsignedFlag, ?),
-            (predIntr $pred, (undef Vector)))>;
-  }
-}
-
-let params = T.Int in {
-  def vshlq_n: Intrinsic<Vector, (args Vector:$v, imm_0toNm1:$sh),
-                         (shl $v, (splat (Scalar $sh)))>;
-  defm vshlq: PredicatedImmediateVectorShift<imm_0toNm1, "shl_imm_predicated">;
-
-  let pnt = PNT_NType in {
-    def vshrq_n: Intrinsic<Vector, (args Vector:$v, imm_1toN:$sh),
-                            (immshr $v, $sh, (unsignedflag Scalar))>;
-    defm vshrq: PredicatedImmediateVectorShift<imm_1toN, "shr_imm_predicated",
-                                     [(unsignedflag Scalar)]>;
-  }
-}
-
 // Base class for the scalar shift intrinsics.
 class ScalarShift<Type argtype, dag shiftCountArg, dag shiftCodeGen>:
   Intrinsic<argtype, !con((args argtype:$value), shiftCountArg), shiftCodeGen> {

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index 6bc9b35f0fc4..1d72cc45796c 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -66,10 +66,6 @@ def xor: IRBuilder<"CreateXor">;
 def sub: IRBuilder<"CreateSub">;
 def shl: IRBuilder<"CreateShl">;
 def lshr: IRBuilder<"CreateLShr">;
-def immshr: CGHelperFn<"MVEImmediateShr"> {
-  let special_params = [IRBuilderIntParam<1, "unsigned">,
-                        IRBuilderIntParam<2, "bool">];
-}
 def fadd: IRBuilder<"CreateFAdd">;
 def fmul: IRBuilder<"CreateFMul">;
 def fsub: IRBuilder<"CreateFSub">;
@@ -322,8 +318,8 @@ def imm_simd_vmvn : Immediate<u32, IB_UEltValue> {
 //
 // imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1
 // inclusive.
-def imm_1toN : Immediate<sint, IB_EltBit<1>>;
-def imm_0toNm1 : Immediate<sint, IB_EltBit<0>>;
+def imm_1toN : Immediate<u32, IB_EltBit<1>>;
+def imm_0toNm1 : Immediate<u32, IB_EltBit<0>>;
 
 // imm_lane has to be the index of a vector lane in the main vector type, i.e
 // it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane)

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 890019ac51c2..7447a5841599 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6802,14 +6802,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   }
 }
 
-template<typename Integer>
-static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
-  llvm::APSInt IntVal;
-  assert(E->isIntegerConstantExpr(IntVal, Context) &&
-         "Sema should have checked this was a constant");
-  return IntVal.getExtValue();
-}
-
 static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
                                      llvm::Type *T, bool Unsigned) {
   // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
@@ -6817,27 +6809,6 @@ static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
   return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
 }
 
-static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
-                                    uint32_t Shift, bool Unsigned) {
-  // MVE helper function for integer shift right. This must handle signed vs
-  // unsigned, and also deal specially with the case where the shift count is
-  // equal to the lane size. In LLVM IR, an LShr with that parameter would be
-  // undefined behavior, but in MVE it's legal, so we must convert it to code
-  // that is not undefined in IR.
-  unsigned LaneBits =
-      V->getType()->getVectorElementType()->getPrimitiveSizeInBits();
-  if (Shift == LaneBits) {
-    // An unsigned shift of the full lane size always generates zero, so we can
-    // simply emit a zero vector. A signed shift of the full lane size does the
-    // same thing as shifting by one bit fewer.
-    if (Unsigned)
-      return llvm::Constant::getNullValue(V->getType());
-    else
-      --Shift;
-  }
-  return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
-}
-
 static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
   // MVE-specific helper function for a vector splat, which infers the element
   // count of the output vector by knowing that MVE vectors are all 128 bits

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
deleted file mode 100644
index 200273c03654..000000000000
--- a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
+++ /dev/null
@@ -1,722 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-
-#include <arm_mve.h>
-
-// CHECK-LABEL: @test_vshlq_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
-// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
-//
-int8x16_t test_vshlq_n_s8(int8x16_t a)
-{
-#ifdef POLYMORPHIC
-    return vshlq_n(a, 5);
-#else /* POLYMORPHIC */
-    return vshlq_n_s8(a, 5);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
-// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
-//
-int16x8_t test_vshlq_n_s16(int16x8_t a)
-{
-#ifdef POLYMORPHIC
-    return vshlq_n(a, 5);
-#else /* POLYMORPHIC */
-    return vshlq_n_s16(a, 5);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], <i32 18, i32 18, i32 18, i32 18>
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
-//
-int32x4_t test_vshlq_n_s32(int32x4_t a)
-{
-#ifdef POLYMORPHIC
-    return vshlq_n(a, 18);
-#else /* POLYMORPHIC */
-    return vshlq_n_s32(a, 18);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_n_s8_trivial(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], zeroinitializer
-// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
-//
-int8x16_t test_vshlq_n_s8_trivial(int8x16_t a)
-{
-#ifdef POLYMORPHIC
-    return vshlq_n(a, 0);
-#else /* POLYMORPHIC */
-    return vshlq_n_s8(a, 0);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_n_s16_trivial(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], zeroinitializer
-// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
-//
-int16x8_t test_vshlq_n_s16_trivial(int16x8_t a)
-{
-#ifdef POLYMORPHIC
-    return vshlq_n(a, 0);
-#else /* POLYMORPHIC */
-    return vshlq_n_s16(a, 0);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_n_s32_trivial(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], zeroinitializer
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
-//
-int32x4_t test_vshlq_n_s32_trivial(int32x4_t a)
-{
-#ifdef POLYMORPHIC
-    return vshlq_n(a, 0);
-#else /* POLYMORPHIC */
-    return vshlq_n_s32(a, 0);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_n_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
-//
-uint8x16_t test_vshlq_n_u8(uint8x16_t a)
-{
-#ifdef POLYMORPHIC
-    return vshlq_n(a, 3);
-#else /* POLYMORPHIC */
-    return vshlq_n_u8(a, 3);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_n_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
-// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
-//
-uint16x8_t test_vshlq_n_u16(uint16x8_t a)
-{
-#ifdef POLYMORPHIC
-    return vshlq_n(a, 11);
-#else /* POLYMORPHIC */
-    return vshlq_n_u16(a, 11);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_n_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], <i32 7, i32 7, i32 7, i32 7>
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
-//
-uint32x4_t test_vshlq_n_u32(uint32x4_t a)
-{
-#ifdef POLYMORPHIC
-    return vshlq_n(a, 7);
-#else /* POLYMORPHIC */
-    return vshlq_n_u32(a, 7);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_n_u8_trivial(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], zeroinitializer
-// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
-//
-uint8x16_t test_vshlq_n_u8_trivial(uint8x16_t a)
-{
-#ifdef POLYMORPHIC
-    return vshlq_n(a, 0);
-#else /* POLYMORPHIC */
-    return vshlq_n_u8(a, 0);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_n_u16_trivial(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], zeroinitializer
-// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
-//
-uint16x8_t test_vshlq_n_u16_trivial(uint16x8_t a)
-{
-#ifdef POLYMORPHIC
-    return vshlq_n(a, 0);
-#else /* POLYMORPHIC */
-    return vshlq_n_u16(a, 0);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_n_u32_trivial(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], zeroinitializer
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
-//
-uint32x4_t test_vshlq_n_u32_trivial(uint32x4_t a)
-{
-#ifdef POLYMORPHIC
-    return vshlq_n(a, 0);
-#else /* POLYMORPHIC */
-    return vshlq_n_u32(a, 0);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
-// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
-//
-int8x16_t test_vshrq_n_s8(int8x16_t a)
-{
-#ifdef POLYMORPHIC
-    return vshrq(a, 4);
-#else /* POLYMORPHIC */
-    return vshrq_n_s8(a, 4);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
-// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
-//
-int16x8_t test_vshrq_n_s16(int16x8_t a)
-{
-#ifdef POLYMORPHIC
-    return vshrq(a, 10);
-#else /* POLYMORPHIC */
-    return vshrq_n_s16(a, 10);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], <i32 19, i32 19, i32 19, i32 19>
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
-//
-int32x4_t test_vshrq_n_s32(int32x4_t a)
-{
-#ifdef POLYMORPHIC
-    return vshrq(a, 19);
-#else /* POLYMORPHIC */
-    return vshrq_n_s32(a, 19);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_n_s8_trivial(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
-//
-int8x16_t test_vshrq_n_s8_trivial(int8x16_t a)
-{
-#ifdef POLYMORPHIC
-    return vshrq(a, 8);
-#else /* POLYMORPHIC */
-    return vshrq_n_s8(a, 8);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_n_s16_trivial(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
-//
-int16x8_t test_vshrq_n_s16_trivial(int16x8_t a)
-{
-#ifdef POLYMORPHIC
-    return vshrq(a, 16);
-#else /* POLYMORPHIC */
-    return vshrq_n_s16(a, 16);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_n_s32_trivial(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], <i32 31, i32 31, i32 31, i32 31>
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
-//
-int32x4_t test_vshrq_n_s32_trivial(int32x4_t a)
-{
-#ifdef POLYMORPHIC
-    return vshrq(a, 32);
-#else /* POLYMORPHIC */
-    return vshrq_n_s32(a, 32);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_n_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = lshr <16 x i8> [[A:%.*]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
-//
-uint8x16_t test_vshrq_n_u8(uint8x16_t a)
-{
-#ifdef POLYMORPHIC
-    return vshrq(a, 1);
-#else /* POLYMORPHIC */
-    return vshrq_n_u8(a, 1);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_n_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = lshr <8 x i16> [[A:%.*]], <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
-// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
-//
-uint16x8_t test_vshrq_n_u16(uint16x8_t a)
-{
-#ifdef POLYMORPHIC
-    return vshrq(a, 10);
-#else /* POLYMORPHIC */
-    return vshrq_n_u16(a, 10);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_n_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = lshr <4 x i32> [[A:%.*]], <i32 10, i32 10, i32 10, i32 10>
-// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
-//
-uint32x4_t test_vshrq_n_u32(uint32x4_t a)
-{
-#ifdef POLYMORPHIC
-    return vshrq(a, 10);
-#else /* POLYMORPHIC */
-    return vshrq_n_u32(a, 10);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_n_u8_trivial(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <16 x i8> zeroinitializer
-//
-uint8x16_t test_vshrq_n_u8_trivial(uint8x16_t a)
-{
-#ifdef POLYMORPHIC
-    return vshrq(a, 8);
-#else /* POLYMORPHIC */
-    return vshrq_n_u8(a, 8);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_n_u16_trivial(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <8 x i16> zeroinitializer
-//
-uint16x8_t test_vshrq_n_u16_trivial(uint16x8_t a)
-{
-#ifdef POLYMORPHIC
-    return vshrq(a, 16);
-#else /* POLYMORPHIC */
-    return vshrq_n_u16(a, 16);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_n_u32_trivial(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <4 x i32> zeroinitializer
-//
-uint32x4_t test_vshrq_n_u32_trivial(uint32x4_t a)
-{
-#ifdef POLYMORPHIC
-    return vshrq(a, 32);
-#else /* POLYMORPHIC */
-    return vshrq_n_u32(a, 32);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_m_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 6, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
-//
-int8x16_t test_vshlq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshlq_m_n(inactive, a, 6, p);
-#else /* POLYMORPHIC */
-    return vshlq_m_n_s8(inactive, a, 6, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_m_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 13, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
-//
-int16x8_t test_vshlq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshlq_m_n(inactive, a, 13, p);
-#else /* POLYMORPHIC */
-    return vshlq_m_n_s16(inactive, a, 13, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_m_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
-//
-int32x4_t test_vshlq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshlq_m_n(inactive, a, 0, p);
-#else /* POLYMORPHIC */
-    return vshlq_m_n_s32(inactive, a, 0, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_m_n_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 3, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
-//
-uint8x16_t test_vshlq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshlq_m_n(inactive, a, 3, p);
-#else /* POLYMORPHIC */
-    return vshlq_m_n_u8(inactive, a, 3, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_m_n_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
-//
-uint16x8_t test_vshlq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshlq_m_n(inactive, a, 1, p);
-#else /* POLYMORPHIC */
-    return vshlq_m_n_u16(inactive, a, 1, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_m_n_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 24, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
-//
-uint32x4_t test_vshlq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshlq_m_n(inactive, a, 24, p);
-#else /* POLYMORPHIC */
-    return vshlq_m_n_u32(inactive, a, 24, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_m_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
-//
-int8x16_t test_vshrq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshrq_m(inactive, a, 2, p);
-#else /* POLYMORPHIC */
-    return vshrq_m_n_s8(inactive, a, 2, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_m_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 3, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
-//
-int16x8_t test_vshrq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshrq_m(inactive, a, 3, p);
-#else /* POLYMORPHIC */
-    return vshrq_m_n_s16(inactive, a, 3, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_m_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 13, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
-//
-int32x4_t test_vshrq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshrq_m(inactive, a, 13, p);
-#else /* POLYMORPHIC */
-    return vshrq_m_n_s32(inactive, a, 13, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_m_n_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
-//
-uint8x16_t test_vshrq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshrq_m(inactive, a, 4, p);
-#else /* POLYMORPHIC */
-    return vshrq_m_n_u8(inactive, a, 4, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_m_n_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 14, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
-//
-uint16x8_t test_vshrq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshrq_m(inactive, a, 14, p);
-#else /* POLYMORPHIC */
-    return vshrq_m_n_u16(inactive, a, 14, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_m_n_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 21, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
-//
-uint32x4_t test_vshrq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshrq_m(inactive, a, 21, p);
-#else /* POLYMORPHIC */
-    return vshrq_m_n_u32(inactive, a, 21, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_x_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef)
-// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
-//
-int8x16_t test_vshlq_x_n_s8(int8x16_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshlq_x_n(a, 1, p);
-#else /* POLYMORPHIC */
-    return vshlq_x_n_s8(a, 1, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_x_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 15, <8 x i1> [[TMP1]], <8 x i16> undef)
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
-//
-int16x8_t test_vshlq_x_n_s16(int16x8_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshlq_x_n(a, 15, p);
-#else /* POLYMORPHIC */
-    return vshlq_x_n_s16(a, 15, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_x_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 13, <4 x i1> [[TMP1]], <4 x i32> undef)
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
-//
-int32x4_t test_vshlq_x_n_s32(int32x4_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshlq_x_n(a, 13, p);
-#else /* POLYMORPHIC */
-    return vshlq_x_n_s32(a, 13, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_x_n_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, <16 x i1> [[TMP1]], <16 x i8> undef)
-// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
-//
-uint8x16_t test_vshlq_x_n_u8(uint8x16_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshlq_x_n(a, 4, p);
-#else /* POLYMORPHIC */
-    return vshlq_x_n_u8(a, 4, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_x_n_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, <8 x i1> [[TMP1]], <8 x i16> undef)
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
-//
-uint16x8_t test_vshlq_x_n_u16(uint16x8_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshlq_x_n(a, 10, p);
-#else /* POLYMORPHIC */
-    return vshlq_x_n_u16(a, 10, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshlq_x_n_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 30, <4 x i1> [[TMP1]], <4 x i32> undef)
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
-//
-uint32x4_t test_vshlq_x_n_u32(uint32x4_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshlq_x_n(a, 30, p);
-#else /* POLYMORPHIC */
-    return vshlq_x_n_u32(a, 30, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_x_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 0, <16 x i1> [[TMP1]], <16 x i8> undef)
-// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
-//
-int8x16_t test_vshrq_x_n_s8(int8x16_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshrq_x(a, 4, p);
-#else /* POLYMORPHIC */
-    return vshrq_x_n_s8(a, 4, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_x_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
-//
-int16x8_t test_vshrq_x_n_s16(int16x8_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshrq_x(a, 10, p);
-#else /* POLYMORPHIC */
-    return vshrq_x_n_s16(a, 10, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_x_n_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 7, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
-//
-int32x4_t test_vshrq_x_n_s32(int32x4_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshrq_x(a, 7, p);
-#else /* POLYMORPHIC */
-    return vshrq_x_n_s32(a, 7, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_x_n_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 7, i32 1, <16 x i1> [[TMP1]], <16 x i8> undef)
-// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
-//
-uint8x16_t test_vshrq_x_n_u8(uint8x16_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshrq_x(a, 7, p);
-#else /* POLYMORPHIC */
-    return vshrq_x_n_u8(a, 7, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_x_n_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 7, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
-//
-uint16x8_t test_vshrq_x_n_u16(uint16x8_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshrq_x(a, 7, p);
-#else /* POLYMORPHIC */
-    return vshrq_x_n_u16(a, 7, p);
-#endif /* POLYMORPHIC */
-}
-
-// CHECK-LABEL: @test_vshrq_x_n_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 6, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
-//
-uint32x4_t test_vshrq_x_n_u32(uint32x4_t a, mve_pred16_t p)
-{
-#ifdef POLYMORPHIC
-    return vshrq_x(a, 6, p);
-#else /* POLYMORPHIC */
-    return vshrq_x_n_u32(a, 6, p);
-#endif /* POLYMORPHIC */
-}

diff  --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp
index 35a5e52bf4a1..81c08a2baa71 100644
--- a/clang/utils/TableGen/MveEmitter.cpp
+++ b/clang/utils/TableGen/MveEmitter.cpp
@@ -470,10 +470,6 @@ class Result {
   virtual void genCode(raw_ostream &OS, CodeGenParamAllocator &) const = 0;
   virtual bool hasIntegerConstantValue() const { return false; }
   virtual uint32_t integerConstantValue() const { return 0; }
-  virtual bool hasIntegerValue() const { return false; }
-  virtual std::string getIntegerValue(const std::string &) {
-    llvm_unreachable("non-working Result::getIntegerValue called");
-  }
   virtual std::string typeName() const { return "Value *"; }
 
   // Mostly, when a code-generation operation has a dependency on prior
@@ -548,9 +544,8 @@ class BuiltinArgResult : public Result {
 public:
   unsigned ArgNum;
   bool AddressType;
-  bool Immediate;
-  BuiltinArgResult(unsigned ArgNum, bool AddressType, bool Immediate)
-      : ArgNum(ArgNum), AddressType(AddressType), Immediate(Immediate) {}
+  BuiltinArgResult(unsigned ArgNum, bool AddressType)
+      : ArgNum(ArgNum), AddressType(AddressType) {}
   void genCode(raw_ostream &OS, CodeGenParamAllocator &) const override {
     OS << (AddressType ? "EmitPointerWithAlignment" : "EmitScalarExpr")
        << "(E->getArg(" << ArgNum << "))";
@@ -564,11 +559,6 @@ class BuiltinArgResult : public Result {
       return "(" + varname() + ".getPointer())";
     return Result::asValue();
   }
-  bool hasIntegerValue() const override { return Immediate; }
-  std::string getIntegerValue(const std::string &IntType) override {
-    return "GetIntegerConstantValue<" + IntType + ">(E->getArg(" +
-           utostr(ArgNum) + "), getContext())";
-  }
 };
 
 // Result subclass for an integer literal appearing in Tablegen. This may need
@@ -643,34 +633,27 @@ class IRBuilderResult : public Result {
   StringRef CallPrefix;
   std::vector<Ptr> Args;
   std::set<unsigned> AddressArgs;
-  std::map<unsigned, std::string> IntegerArgs;
+  std::map<unsigned, std::string> IntConstantArgs;
   IRBuilderResult(StringRef CallPrefix, std::vector<Ptr> Args,
                   std::set<unsigned> AddressArgs,
-                  std::map<unsigned, std::string> IntegerArgs)
-      : CallPrefix(CallPrefix), Args(Args), AddressArgs(AddressArgs),
-        IntegerArgs(IntegerArgs) {}
+                  std::map<unsigned, std::string> IntConstantArgs)
+    : CallPrefix(CallPrefix), Args(Args), AddressArgs(AddressArgs),
+        IntConstantArgs(IntConstantArgs) {}
   void genCode(raw_ostream &OS,
                CodeGenParamAllocator &ParamAlloc) const override {
     OS << CallPrefix;
     const char *Sep = "";
     for (unsigned i = 0, e = Args.size(); i < e; ++i) {
       Ptr Arg = Args[i];
-      auto it = IntegerArgs.find(i);
-
-      OS << Sep;
-      Sep = ", ";
-
-      if (it != IntegerArgs.end()) {
-        if (Arg->hasIntegerConstantValue())
-          OS << "static_cast<" << it->second << ">("
-             << ParamAlloc.allocParam(it->second,
-                                      utostr(Arg->integerConstantValue()))
-             << ")";
-        else if (Arg->hasIntegerValue())
-          OS << ParamAlloc.allocParam(it->second,
-                                      Arg->getIntegerValue(it->second));
+      auto it = IntConstantArgs.find(i);
+      if (it != IntConstantArgs.end()) {
+        assert(Arg->hasIntegerConstantValue());
+        OS << Sep << "static_cast<" << it->second << ">("
+           << ParamAlloc.allocParam("unsigned",
+                                    utostr(Arg->integerConstantValue()))
+           << ")";
       } else {
-        OS << Arg->varname();
+        OS << Sep << Arg->varname();
       }
       Sep = ", ";
     }
@@ -679,8 +662,7 @@ class IRBuilderResult : public Result {
   void morePrerequisites(std::vector<Ptr> &output) const override {
     for (unsigned i = 0, e = Args.size(); i < e; ++i) {
       Ptr Arg = Args[i];
-      if (IntegerArgs.find(i) != IntegerArgs.end() &&
-          Arg->hasIntegerConstantValue())
+      if (IntConstantArgs.find(i) != IntConstantArgs.end())
         continue;
       output.push_back(Arg);
     }
@@ -999,8 +981,8 @@ class MveEmitter {
                             const Type *Param);
   Result::Ptr getCodeForDagArg(DagInit *D, unsigned ArgNum,
                                const Result::Scope &Scope, const Type *Param);
-  Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType, bool Promote,
-                            bool Immediate);
+  Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType,
+                            bool Promote);
 
   // Constructor and top-level functions.
 
@@ -1173,17 +1155,17 @@ Result::Ptr MveEmitter::getCodeForDag(DagInit *D, const Result::Scope &Scope,
       Args.push_back(getCodeForDagArg(D, i, Scope, Param));
     if (Op->isSubClassOf("IRBuilderBase")) {
       std::set<unsigned> AddressArgs;
-      std::map<unsigned, std::string> IntegerArgs;
+      std::map<unsigned, std::string> IntConstantArgs;
       for (Record *sp : Op->getValueAsListOfDefs("special_params")) {
         unsigned Index = sp->getValueAsInt("index");
         if (sp->isSubClassOf("IRBuilderAddrParam")) {
           AddressArgs.insert(Index);
         } else if (sp->isSubClassOf("IRBuilderIntParam")) {
-          IntegerArgs[Index] = sp->getValueAsString("type");
+          IntConstantArgs[Index] = sp->getValueAsString("type");
         }
       }
-      return std::make_shared<IRBuilderResult>(Op->getValueAsString("prefix"),
-                                               Args, AddressArgs, IntegerArgs);
+      return std::make_shared<IRBuilderResult>(
+          Op->getValueAsString("prefix"), Args, AddressArgs, IntConstantArgs);
     } else if (Op->isSubClassOf("IRIntBase")) {
       std::vector<const Type *> ParamTypes;
       for (Record *RParam : Op->getValueAsListOfDefs("params"))
@@ -1233,9 +1215,9 @@ Result::Ptr MveEmitter::getCodeForDagArg(DagInit *D, unsigned ArgNum,
 }
 
 Result::Ptr MveEmitter::getCodeForArg(unsigned ArgNum, const Type *ArgType,
-                                      bool Promote, bool Immediate) {
-  Result::Ptr V = std::make_shared<BuiltinArgResult>(
-      ArgNum, isa<PointerType>(ArgType), Immediate);
+                                      bool Promote) {
+  Result::Ptr V =
+      std::make_shared<BuiltinArgResult>(ArgNum, isa<PointerType>(ArgType));
 
   if (Promote) {
     if (const auto *ST = dyn_cast<ScalarType>(ArgType)) {
@@ -1309,14 +1291,17 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
     const Type *ArgType = ME.getType(TypeInit, Param);
     ArgTypes.push_back(ArgType);
 
+    // The argument will usually have a name in the arguments dag, which goes
+    // into the variable-name scope that the code gen will refer to.
+    StringRef ArgName = ArgsDag->getArgNameStr(i);
+    if (!ArgName.empty())
+      Scope[ArgName] = ME.getCodeForArg(i, ArgType, Promote);
+
     // If the argument is a subclass of Immediate, record the details about
     // what values it can take, for Sema checking.
-    bool Immediate = false;
     if (auto TypeDI = dyn_cast<DefInit>(TypeInit)) {
       Record *TypeRec = TypeDI->getDef();
       if (TypeRec->isSubClassOf("Immediate")) {
-        Immediate = true;
-
         Record *Bounds = TypeRec->getValueAsDef("bounds");
         ImmediateArg &IA = ImmediateArgs[i];
         if (Bounds->isSubClassOf("IB_ConstRange")) {
@@ -1330,7 +1315,7 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
           IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
           IA.i1 = 0;
           IA.i2 = 128 / Param->sizeInBits() - 1;
-        } else if (Bounds->isSubClassOf("IB_EltBit")) {
+        } else if (Bounds->getName() == "IB_EltBit") {
           IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
           IA.i1 = Bounds->getValueAsInt("base");
           IA.i2 = IA.i1 + Param->sizeInBits() - 1;
@@ -1347,12 +1332,6 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
         }
       }
     }
-
-    // The argument will usually have a name in the arguments dag, which goes
-    // into the variable-name scope that the code gen will refer to.
-    StringRef ArgName = ArgsDag->getArgNameStr(i);
-    if (!ArgName.empty())
-      Scope[ArgName] = ME.getCodeForArg(i, ArgType, Promote, Immediate);
   }
 
   // Finally, go through the codegen dag and translate it into a Result object

diff  --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index d20540480a82..6e63022d4cf5 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -913,14 +913,6 @@ defm int_arm_mve_vstr_scatter_offset: MVEPredicated<
    [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty,
    llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem]>;
 
-def int_arm_mve_shl_imm_predicated: Intrinsic<[llvm_anyvector_ty],
-   [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
-   [IntrNoMem]>;
-def int_arm_mve_shr_imm_predicated: Intrinsic<[llvm_anyvector_ty],
-   [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, // extra i32 is unsigned flag
-    llvm_anyvector_ty, LLVMMatchType<0>],
-   [IntrNoMem]>;
-
 // MVE scalar shifts.
 class ARM_MVE_qrshift_single<list<LLVMType> value,
                              list<LLVMType> saturate = []> :

diff  --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 2f56d183e11a..d351ae8905b6 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -2816,39 +2816,27 @@ def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
   let Inst{21} = 0b1;
 }
 
-multiclass MVE_immediate_shift_patterns_inner<
-    MVEVectorVTInfo VTI, Operand imm_operand_type, SDNode unpred_op,
-    Intrinsic pred_int, Instruction inst, list<int> unsignedFlag = []> {
-
-  def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src), imm_operand_type:$imm)),
-            (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm))>;
-
-  def : Pat<(VTI.Vec !con((pred_int (VTI.Vec MQPR:$src), imm_operand_type:$imm),
-                          !dag(pred_int, unsignedFlag, ?),
-                          (pred_int (VTI.Pred VCCR:$mask),
-                                   (VTI.Vec MQPR:$inactive)))),
-            (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm,
-                           ARMVCCThen, (VTI.Pred VCCR:$mask),
-                           (VTI.Vec MQPR:$inactive)))>;
-}
-
-multiclass MVE_immediate_shift_patterns<MVEVectorVTInfo VTI,
-                                        Operand imm_operand_type> {
-  defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
-      ARMvshlImm, int_arm_mve_shl_imm_predicated,
-      !cast<Instruction>("MVE_VSHL_immi" # VTI.BitsSuffix)>;
-  defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
-      ARMvshruImm, int_arm_mve_shr_imm_predicated,
-      !cast<Instruction>("MVE_VSHR_immu" # VTI.BitsSuffix), [1]>;
-  defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
-      ARMvshrsImm, int_arm_mve_shr_imm_predicated,
-      !cast<Instruction>("MVE_VSHR_imms" # VTI.BitsSuffix), [0]>;
-}
-
 let Predicates = [HasMVEInt] in {
-  defm : MVE_immediate_shift_patterns<MVE_v16i8, imm0_7>;
-  defm : MVE_immediate_shift_patterns<MVE_v8i16, imm0_15>;
-  defm : MVE_immediate_shift_patterns<MVE_v4i32, imm0_31>;
+  def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)),
+            (v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>;
+  def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)),
+            (v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>;
+  def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)),
+            (v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+
+  def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)),
+            (v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>;
+  def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)),
+            (v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>;
+  def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)),
+            (v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+
+  def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)),
+            (v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>;
+  def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)),
+            (v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>;
+  def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)),
+            (v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>;
 }
 
 // end of mve_shift instructions

diff  --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll
deleted file mode 100644
index 86228ef94b38..000000000000
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll
+++ /dev/null
@@ -1,398 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
-
-define arm_aapcs_vfpcc <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) {
-; CHECK-LABEL: test_vshlq_n_s8:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vshl.i8 q0, q0, #5
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = shl <16 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
-  ret <16 x i8> %0
-}
-
-define arm_aapcs_vfpcc <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) {
-; CHECK-LABEL: test_vshlq_n_s16:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vshl.i16 q0, q0, #5
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = shl <8 x i16> %a, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
-  ret <8 x i16> %0
-}
-
-define arm_aapcs_vfpcc <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) {
-; CHECK-LABEL: test_vshlq_n_s32:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vshl.i32 q0, q0, #18
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = shl <4 x i32> %a, <i32 18, i32 18, i32 18, i32 18>
-  ret <4 x i32> %0
-}
-
-define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
-; CHECK-LABEL: test_vshrq_n_s8:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vshr.s8 q0, q0, #4
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = ashr <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
-  ret <16 x i8> %0
-}
-
-define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
-; CHECK-LABEL: test_vshrq_n_s16:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vshr.s16 q0, q0, #10
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = ashr <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
-  ret <8 x i16> %0
-}
-
-define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
-; CHECK-LABEL: test_vshrq_n_s32:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vshr.s32 q0, q0, #19
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = ashr <4 x i32> %a, <i32 19, i32 19, i32 19, i32 19>
-  ret <4 x i32> %0
-}
-
-define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
-; CHECK-LABEL: test_vshrq_n_u8:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vshr.u8 q0, q0, #1
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = lshr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-  ret <16 x i8> %0
-}
-
-define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
-; CHECK-LABEL: test_vshrq_n_u16:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vshr.u16 q0, q0, #10
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = lshr <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
-  ret <8 x i16> %0
-}
-
-define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
-; CHECK-LABEL: test_vshrq_n_u32:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vshr.u32 q0, q0, #10
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = lshr <4 x i32> %a, <i32 10, i32 10, i32 10, i32 10>
-  ret <4 x i32> %0
-}
-
-define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshlq_m_n_s8:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshlt.i8 q0, q1, #6
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
-  %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 6, <16 x i1> %1, <16 x i8> %inactive)
-  ret <16 x i8> %2
-}
-
-define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshlq_m_n_s16:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshlt.i16 q0, q1, #13
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
-  %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, <8 x i1> %1, <8 x i16> %inactive)
-  ret <8 x i16> %2
-}
-
-define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshlq_m_n_s32:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshlt.i32 q0, q1, #0
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
-  %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1, <4 x i32> %inactive)
-  ret <4 x i32> %2
-}
-
-define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshrq_m_n_s8:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshrt.s8 q0, q1, #2
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
-  %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, <16 x i1> %1, <16 x i8> %inactive)
-  ret <16 x i8> %2
-}
-
-define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshrq_m_n_s16:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshrt.s16 q0, q1, #3
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
-  %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 3, i32 0, <8 x i1> %1, <8 x i16> %inactive)
-  ret <8 x i16> %2
-}
-
-define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshrq_m_n_s32:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshrt.s32 q0, q1, #13
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
-  %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, i32 0, <4 x i1> %1, <4 x i32> %inactive)
-  ret <4 x i32> %2
-}
-
-define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshrq_m_n_u8:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshrt.u8 q0, q1, #4
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
-  %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 1, <16 x i1> %1, <16 x i8> %inactive)
-  ret <16 x i8> %2
-}
-
-define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshrq_m_n_u16:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshrt.u16 q0, q1, #14
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
-  %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 14, i32 1, <8 x i1> %1, <8 x i16> %inactive)
-  ret <8 x i16> %2
-}
-
-define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshrq_m_n_u32:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshrt.u32 q0, q1, #21
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
-  %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 21, i32 1, <4 x i1> %1, <4 x i32> %inactive)
-  ret <4 x i32> %2
-}
-
-define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshlq_x_n_s8:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshlt.i8 q0, q0, #1
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
-  %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, <16 x i1> %1, <16 x i8> undef)
-  ret <16 x i8> %2
-}
-
-define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshlq_x_n_s16:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshlt.i16 q0, q0, #15
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
-  %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 15, <8 x i1> %1, <8 x i16> undef)
-  ret <8 x i16> %2
-}
-
-define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_s32(<4 x i32> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshlq_x_n_s32:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshlt.i32 q0, q0, #13
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
-  %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, <4 x i1> %1, <4 x i32> undef)
-  ret <4 x i32> %2
-}
-
-define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshlq_x_n_u8:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshlt.i8 q0, q0, #4
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
-  %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, <16 x i1> %1, <16 x i8> undef)
-  ret <16 x i8> %2
-}
-
-define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshlq_x_n_u16:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshlt.i16 q0, q0, #10
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
-  %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, <8 x i1> %1, <8 x i16> undef)
-  ret <8 x i16> %2
-}
-
-define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_u32(<4 x i32> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshlq_x_n_u32:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshlt.i32 q0, q0, #30
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
-  %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 30, <4 x i1> %1, <4 x i32> undef)
-  ret <4 x i32> %2
-}
-
-define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshrq_x_n_s8:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshrt.s8 q0, q0, #4
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
-  %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 0, <16 x i1> %1, <16 x i8> undef)
-  ret <16 x i8> %2
-}
-
-define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshrq_x_n_s16:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshrt.s16 q0, q0, #10
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
-  %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, i32 0, <8 x i1> %1, <8 x i16> undef)
-  ret <8 x i16> %2
-}
-
-define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_s32(<4 x i32> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshrq_x_n_s32:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshrt.s32 q0, q0, #7
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
-  %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 7, i32 0, <4 x i1> %1, <4 x i32> undef)
-  ret <4 x i32> %2
-}
-
-define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshrq_x_n_u8:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshrt.u8 q0, q0, #7
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
-  %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 7, i32 1, <16 x i1> %1, <16 x i8> undef)
-  ret <16 x i8> %2
-}
-
-define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshrq_x_n_u16:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshrt.u16 q0, q0, #7
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
-  %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 7, i32 1, <8 x i1> %1, <8 x i16> undef)
-  ret <8 x i16> %2
-}
-
-define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_u32(<4 x i32> %a, i16 zeroext %p) {
-; CHECK-LABEL: test_vshrq_x_n_u32:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmsr p0, r0
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vshrt.u32 q0, q0, #6
-; CHECK-NEXT:    bx lr
-entry:
-  %0 = zext i16 %p to i32
-  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
-  %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 6, i32 1, <4 x i1> %1, <4 x i32> undef)
-  ret <4 x i32> %2
-}
-
-declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
-declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
-declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
-
-declare <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>)
-declare <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>)
-declare <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>)
-
-declare <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>)
-declare <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>)
-declare <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>)