[llvm] 4321c6a - [ARM, MVE] Support immediate vbicq, vorrq, vmvnq intrinsics.

Thu Jan 23 03:54:13 PST 2020

Author: Simon Tatham
Date: 2020-01-23T11:53:52Z
New Revision: 4321c6af28e9cc26d385fc388b8f0a74b32741c1

URL: https://github.com/llvm/llvm-project/commit/4321c6af28e9cc26d385fc388b8f0a74b32741c1
DIFF: https://github.com/llvm/llvm-project/commit/4321c6af28e9cc26d385fc388b8f0a74b32741c1.diff

LOG: [ARM,MVE] Support immediate vbicq,vorrq,vmvnq intrinsics.

Summary:
Immediate vmvnq is code-generated as a simple vector constant in IR,
and left to the backend to recognize that it can be created with an
MVE VMVN instruction. The predicated version is represented as a
select between the input and the same constant, and I've added a
Tablegen isel rule to turn that into a predicated VMVN. (That should
be better than the previous VMVN + VPSEL: it's the same number of
instructions but now it can fold into an adjacent VPT block.)

The unpredicated forms of VBIC and VORR are done by enabling the same
isel lowering as for NEON, recognizing appropriate immediates and
rewriting them as ARMISD::VBICIMM / ARMISD::VORRIMM SDNodes, which I
then instruction-select into the right MVE instructions (now that I've
also reworked those instructions to use the same MC operand encoding).
In order to do that, I had to promote the Tablegen SDNode instance
`NEONvorrImm` to a general `ARMvorrImm` available in MVE as well, and
similarly for `NEONvbicImm`.

The predicated forms of VBIC and VORR are represented as a vector
select between the original input vector and the output of the
unpredicated operation. The main convenience of this is that it still
lets me use the existing isel lowering for VBICIMM/VORRIMM, and not
have to write another copy of the operand encoding translation code.

This intrinsic family is the first to use the `imm_simd` system I put
into the MveEmitter tablegen backend. So, naturally, it showed up a
bug or two (emitting bogus range checks and the like). Fixed those,
and added a full set of tests for the permissible immediates in the
existing Sema test.

Also adjusted the isel pattern for `vmovlb.u8`, which stopped matching
because lowering started turning its input into a VBICIMM. Now it
recognizes the VBICIMM instead.

Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D72934

Added: 
    clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c
    llvm/test/CodeGen/Thumb2/mve-intrinsics/bitwise-imm.ll

Modified: 
    clang/include/clang/Basic/arm_mve.td
    clang/include/clang/Basic/arm_mve_defs.td
    clang/include/clang/Sema/Sema.h
    clang/lib/Sema/SemaChecking.cpp
    clang/test/Sema/arm-mve-immediates.c
    clang/utils/TableGen/MveEmitter.cpp
    llvm/lib/Target/ARM/ARMISelLowering.cpp
    llvm/lib/Target/ARM/ARMInstrInfo.td
    llvm/lib/Target/ARM/ARMInstrMVE.td
    llvm/lib/Target/ARM/ARMInstrNEON.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index d28f97390614..ee0ce25bf516 100644

--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -116,6 +116,28 @@ def vmulqf: Intrinsic<Vector, (args Vector:$a, Vector:$b), (fmul $a, $b)>,
             NameOverride<"vmulq">;
 }
 
+let params = !listconcat(T.Int16, T.Int32) in {
+  let pnt = PNT_None in {
+    def vmvnq_n: Intrinsic<Vector, (args imm_simd_vmvn:$imm),
+                           (not (splat (Scalar $imm)))>;
+  }
+  defm vmvnq: IntrinsicMX<Vector, (args imm_simd_vmvn:$imm, Predicate:$pred),
+                     (select $pred, (not (splat (Scalar $imm))), $inactive),
+                     1, "_n", PNT_NType, PNT_None>;
+  let pnt = PNT_NType in {
+    def vbicq_n: Intrinsic<Vector, (args Vector:$v, imm_simd_restrictive:$imm),
+                           (and $v, (not (splat (Scalar $imm))))>;
+    def vorrq_n: Intrinsic<Vector, (args Vector:$v, imm_simd_restrictive:$imm),
+                           (or $v, (splat (Scalar $imm)))>;
+  }
+  def vbicq_m_n: Intrinsic<
+    Vector, (args Vector:$v, imm_simd_restrictive:$imm, Predicate:$pred),
+    (select $pred, (and $v, (not (splat (Scalar $imm)))), $v)>;
+  def vorrq_m_n: Intrinsic<
+    Vector, (args Vector:$v, imm_simd_restrictive:$imm, Predicate:$pred),
+    (select $pred, (or $v, (splat (Scalar $imm))), $v)>;
+}
+
 // The bitcasting below is not overcomplicating the IR because while
 // Vector and UVector may be 
diff erent vector types at the C level i.e.
 // vectors of same size signed/unsigned ints. Once they're lowered

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index a9afddb57968..0f3c08cb9c53 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -319,6 +319,7 @@ class IB_EltBit<int base_, Type type_ = Scalar> : ImmediateBounds {
   int base = base_;
   Type type = type_;
 }
+def IB_ExtraArg_LaneSize;
 
 // -----------------------------------------------------------------------------
 // End-user definitions for immediate arguments.
@@ -327,11 +328,13 @@ class IB_EltBit<int base_, Type type_ = Scalar> : ImmediateBounds {
 // intrinsics like vmvnq or vorrq. imm_simd_restrictive has to be an 8-bit
 // value shifted left by a whole number of bytes; imm_simd_vmvn can also be of
 // the form 0xXXFF for some byte value XX.
-def imm_simd_restrictive : Immediate<u32, IB_UEltValue> {
+def imm_simd_restrictive : Immediate<Scalar, IB_UEltValue> {
   let extra = "ShiftedByte";
+  let extraarg = "!lanesize";
 }
-def imm_simd_vmvn : Immediate<u32, IB_UEltValue> {
+def imm_simd_vmvn : Immediate<Scalar, IB_UEltValue> {
   let extra = "ShiftedByteOrXXFF";
+  let extraarg = "!lanesize";
 }
 
 // imm_1toN can take any value from 1 to N inclusive, where N is the number of
@@ -457,26 +460,31 @@ class NameOverride<string basename_> {
 
 // A wrapper to define both _m and _x versions of a predicated
 // intrinsic.
+//
+// We provide optional parameters to override the polymorphic name
+// types separately for the _m and _x variants, because sometimes they
+// polymorph 
diff erently (typically because the type of the inactive
+// parameter can be used as a disambiguator if it's present).
 multiclass IntrinsicMX<Type rettype, dag arguments, dag cg,
                        int wantXVariant = 1,
                        string nameSuffix = "",
+                       PolymorphicNameType pnt_m = PNT_Type,
                        PolymorphicNameType pnt_x = PNT_Type> {
   // The _m variant takes an initial parameter called $inactive, which
   // provides the input value of the output register, i.e. all the
   // inactive lanes in the predicated operation take their values from
   // this.
   def "_m" # nameSuffix:
-     Intrinsic<rettype, !con((args rettype:$inactive), arguments), cg>;
+     Intrinsic<rettype, !con((args rettype:$inactive), arguments), cg> {
+    let pnt = pnt_m;
+  }
 
   foreach unusedVar = !if(!eq(wantXVariant, 1), [1], []<int>) in {
     // The _x variant leaves off that parameter, and simply uses an
     // undef value of the same type.
+
     def "_x" # nameSuffix:
-       Intrinsic<rettype, arguments, (seq (undef rettype):$inactive, cg)> {
-      // Allow overriding of the polymorphic name type, because
-      // sometimes the _m and _x variants polymorph 
diff erently
-      // (typically because the type of the inactive parameter can be
-      // used as a disambiguator if it's present).
+      Intrinsic<rettype, arguments, (seq (undef rettype):$inactive, cg)> {
       let pnt = pnt_x;
     }
   }

diff  --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 08f657374bcf..40f6acd7d9e2 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11670,8 +11670,10 @@ class Sema final {
   bool SemaBuiltinConstantArgMultiple(CallExpr *TheCall, int ArgNum,
                                       unsigned Multiple);
   bool SemaBuiltinConstantArgPower2(CallExpr *TheCall, int ArgNum);
-  bool SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum);
-  bool SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, int ArgNum);
+  bool SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum,
+                                         unsigned ArgBits);
+  bool SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, int ArgNum,
+                                               unsigned ArgBits);
   bool SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
                                 int ArgNum, unsigned ExpectedFieldNum,
                                 bool AllowName);

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index e826dfad5b4a..1f361569e09d 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5592,7 +5592,8 @@ static bool IsShiftedByte(llvm::APSInt Value) {
 /// SemaBuiltinConstantArgShiftedByte - Check if argument ArgNum of TheCall is
 /// a constant expression representing an arbitrary byte value shifted left by
 /// a multiple of 8 bits.
-bool Sema::SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum) {
+bool Sema::SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum,
+                                             unsigned ArgBits) {
   llvm::APSInt Result;
 
   // We can't check the value of a dependent argument.
@@ -5604,6 +5605,10 @@ bool Sema::SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum) {
   if (SemaBuiltinConstantArg(TheCall, ArgNum, Result))
     return true;
 
+  // Truncate to the given size.
+  Result = Result.getLoBits(ArgBits);
+  Result.setIsUnsigned(true);
+
   if (IsShiftedByte(Result))
     return false;
 
@@ -5617,7 +5622,8 @@ bool Sema::SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum) {
 /// 0x00FF, 0x01FF, ..., 0xFFFF). This strange range check is needed for some
 /// Arm MVE intrinsics.
 bool Sema::SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall,
-                                                   int ArgNum) {
+                                                   int ArgNum,
+                                                   unsigned ArgBits) {
   llvm::APSInt Result;
 
   // We can't check the value of a dependent argument.
@@ -5629,6 +5635,10 @@ bool Sema::SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall,
   if (SemaBuiltinConstantArg(TheCall, ArgNum, Result))
     return true;
 
+  // Truncate to the given size.
+  Result = Result.getLoBits(ArgBits);
+  Result.setIsUnsigned(true);
+
   // Check to see if it's in either of the required forms.
   if (IsShiftedByte(Result) ||
       (Result > 0 && Result < 0x10000 && (Result & 0xFF) == 0xFF))

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c b/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c
new file mode 100644
index 000000000000..de1bdee41eb3
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c
@@ -0,0 +1,402 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vbicq_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], <i16 11007, i16 11007, i16 11007, i16 11007, i16 11007, i16 11007, i16 11007, i16 11007>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vbicq_n_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vbicq(a, 0xd500);
+#else /* POLYMORPHIC */
+    return vbicq_n_s16(a, 0xd500);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vbicq_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], <i32 -252, i32 -252, i32 -252, i32 -252>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vbicq_n_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vbicq(a, 0xfb);
+#else /* POLYMORPHIC */
+    return vbicq_n_s32(a, 0xfb);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vbicq_n_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], <i16 -243, i16 -243, i16 -243, i16 -243, i16 -243, i16 -243, i16 -243, i16 -243>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vbicq_n_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vbicq(a, 0xf2);
+#else /* POLYMORPHIC */
+    return vbicq_n_u16(a, 0xf2);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vbicq_n_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], <i32 -8193, i32 -8193, i32 -8193, i32 -8193>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vbicq_n_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vbicq(a, 0x2000);
+#else /* POLYMORPHIC */
+    return vbicq_n_u32(a, 0x2000);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], <i16 195, i16 195, i16 195, i16 195, i16 195, i16 195, i16 195, i16 195>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vorrq_n_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vorrq(a, 0xc3);
+#else /* POLYMORPHIC */
+    return vorrq_n_s16(a, 0xc3);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], <i32 65536, i32 65536, i32 65536, i32 65536>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vorrq_n_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vorrq(a, 0x10000);
+#else /* POLYMORPHIC */
+    return vorrq_n_s32(a, 0x10000);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_n_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], <i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vorrq_n_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vorrq(a, 0xf000);
+#else /* POLYMORPHIC */
+    return vorrq_n_u16(a, 0xf000);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_n_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], <i32 8978432, i32 8978432, i32 8978432, i32 8978432>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vorrq_n_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vorrq(a, 0x890000);
+#else /* POLYMORPHIC */
+    return vorrq_n_u32(a, 0x890000);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x i16> <i16 27391, i16 27391, i16 27391, i16 27391, i16 27391, i16 27391, i16 27391, i16 27391>
+//
+int16x8_t test_vmvnq_n_s16()
+{
+    return vmvnq_n_s16(0x9500);
+}
+
+// CHECK-LABEL: @test_vmvnq_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> <i32 -5570561, i32 -5570561, i32 -5570561, i32 -5570561>
+//
+int32x4_t test_vmvnq_n_s32()
+{
+    return vmvnq_n_s32(0x550000);
+}
+
+// CHECK-LABEL: @test_vmvnq_n_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x i16> <i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689>
+//
+uint16x8_t test_vmvnq_n_u16()
+{
+    return vmvnq_n_u16(0x4900);
+}
+
+// CHECK-LABEL: @test_vmvnq_n_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> <i32 1023410175, i32 1023410175, i32 1023410175, i32 1023410175>
+//
+uint32x4_t test_vmvnq_n_u32()
+{
+    return vmvnq_n_u32(0xc3000000);
+}
+
+// CHECK-LABEL: @test_vbicq_m_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], <i16 -11265, i16 -11265, i16 -11265, i16 -11265, i16 -11265, i16 -11265, i16 -11265, i16 -11265>
+// CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]]
+// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+//
+int16x8_t test_vbicq_m_n_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vbicq_m_n(a, 0x2c00, p);
+#else /* POLYMORPHIC */
+    return vbicq_m_n_s16(a, 0x2c00, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vbicq_m_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], <i32 -13893633, i32 -13893633, i32 -13893633, i32 -13893633>
+// CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+int32x4_t test_vbicq_m_n_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vbicq_m_n(a, 0xd40000, p);
+#else /* POLYMORPHIC */
+    return vbicq_m_n_s32(a, 0xd40000, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vbicq_m_n_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], <i16 -37, i16 -37, i16 -37, i16 -37, i16 -37, i16 -37, i16 -37, i16 -37>
+// CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]]
+// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+//
+uint16x8_t test_vbicq_m_n_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vbicq_m_n(a, 0x24, p);
+#else /* POLYMORPHIC */
+    return vbicq_m_n_u16(a, 0x24, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vbicq_m_n_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], <i32 -1644167169, i32 -1644167169, i32 -1644167169, i32 -1644167169>
+// CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+uint32x4_t test_vbicq_m_n_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vbicq_m_n(a, 0x62000000, p);
+#else /* POLYMORPHIC */
+    return vbicq_m_n_u32(a, 0x62000000, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_m_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], <i16 13568, i16 13568, i16 13568, i16 13568, i16 13568, i16 13568, i16 13568, i16 13568>
+// CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]]
+// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+//
+int16x8_t test_vorrq_m_n_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vorrq_m_n(a, 0x3500, p);
+#else /* POLYMORPHIC */
+    return vorrq_m_n_s16(a, 0x3500, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_m_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], <i32 654311424, i32 654311424, i32 654311424, i32 654311424>
+// CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+int32x4_t test_vorrq_m_n_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vorrq_m_n(a, 0x27000000, p);
+#else /* POLYMORPHIC */
+    return vorrq_m_n_s32(a, 0x27000000, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_m_n_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], <i16 175, i16 175, i16 175, i16 175, i16 175, i16 175, i16 175, i16 175>
+// CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]]
+// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+//
+uint16x8_t test_vorrq_m_n_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vorrq_m_n(a, 0xaf, p);
+#else /* POLYMORPHIC */
+    return vorrq_m_n_u16(a, 0xaf, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vorrq_m_n_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], <i32 89, i32 89, i32 89, i32 89>
+// CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+uint32x4_t test_vorrq_m_n_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vorrq_m_n(a, 0x59, p);
+#else /* POLYMORPHIC */
+    return vorrq_m_n_u32(a, 0x59, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841>, <8 x i16> [[INACTIVE:%.*]]
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmvnq_m_n_s16(int16x8_t inactive, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, 0xf00, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_n_s16(inactive, 0xf00, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -18945, i32 -18945, i32 -18945, i32 -18945>, <4 x i32> [[INACTIVE:%.*]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmvnq_m_n_s32(int32x4_t inactive, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, 0x4a00, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_n_s32(inactive, 0x4a00, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_n_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 23295, i16 23295, i16 23295, i16 23295, i16 23295, i16 23295, i16 23295, i16 23295>, <8 x i16> [[INACTIVE:%.*]]
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmvnq_m_n_u16(uint16x8_t inactive, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, 0xa500, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_n_u16(inactive, 0xa500, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_n_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -63489, i32 -63489, i32 -63489, i32 -63489>, <4 x i32> [[INACTIVE:%.*]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmvnq_m_n_u32(uint32x4_t inactive, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, 0xf800, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_n_u32(inactive, 0xf800, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 767, i16 767, i16 767, i16 767, i16 767, i16 767, i16 767, i16 767>, <8 x i16> undef
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmvnq_x_n_s16(mve_pred16_t p)
+{
+    return vmvnq_x_n_s16(0xfd00, p);
+}
+
+// CHECK-LABEL: @test_vmvnq_x_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -12189697, i32 -12189697, i32 -12189697, i32 -12189697>, <4 x i32> undef
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmvnq_x_n_s32(mve_pred16_t p)
+{
+    return vmvnq_x_n_s32(0xba0000, p);
+}
+
+// CHECK-LABEL: @test_vmvnq_x_n_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505>, <8 x i16> undef
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmvnq_x_n_u16(mve_pred16_t p)
+{
+    return vmvnq_x_n_u16(0x5400, p);
+}
+
+// CHECK-LABEL: @test_vmvnq_x_n_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -4865, i32 -4865, i32 -4865, i32 -4865>, <4 x i32> undef
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmvnq_x_n_u32(mve_pred16_t p)
+{
+    return vmvnq_x_n_u32(0x1300, p);
+}
+

diff  --git a/clang/test/Sema/arm-mve-immediates.c b/clang/test/Sema/arm-mve-immediates.c
index b8106fbb7028..7d404d74ab51 100644
--- a/clang/test/Sema/arm-mve-immediates.c
+++ b/clang/test/Sema/arm-mve-immediates.c
@@ -203,3 +203,73 @@ void test_immediate_shifts(uint8x16_t vb, uint16x8_t vh, uint32x4_t vw)
   vsriq(vw, vw, 0); // expected-error {{argument value 0 is outside the valid range [1, 32]}}
   vsriq(vw, vw, 33); // expected-error {{argument value 33 is outside the valid range [1, 32]}}
 }
+
+void test_simd_bic_orr(int16x8_t h, int32x4_t w)
+{
+    h = vbicq(h, 0x0000);
+    h = vbicq(h, 0x0001);
+    h = vbicq(h, 0x00FF);
+    h = vbicq(h, 0x0100);
+    h = vbicq(h, 0x0101); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+    h = vbicq(h, 0x01FF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+    h = vbicq(h, 0xFF00);
+
+    w = vbicq(w, 0x00000000);
+    w = vbicq(w, 0x00000001);
+    w = vbicq(w, 0x000000FF);
+    w = vbicq(w, 0x00000100);
+    w = vbicq(w, 0x0000FF00);
+    w = vbicq(w, 0x00010000);
+    w = vbicq(w, 0x00FF0000);
+    w = vbicq(w, 0x01000000);
+    w = vbicq(w, 0xFF000000);
+    w = vbicq(w, 0x01000001); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+    w = vbicq(w, 0x01FFFFFF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+
+    h = vorrq(h, 0x0000);
+    h = vorrq(h, 0x0001);
+    h = vorrq(h, 0x00FF);
+    h = vorrq(h, 0x0100);
+    h = vorrq(h, 0x0101); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+    h = vorrq(h, 0x01FF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+    h = vorrq(h, 0xFF00);
+
+    w = vorrq(w, 0x00000000);
+    w = vorrq(w, 0x00000001);
+    w = vorrq(w, 0x000000FF);
+    w = vorrq(w, 0x00000100);
+    w = vorrq(w, 0x0000FF00);
+    w = vorrq(w, 0x00010000);
+    w = vorrq(w, 0x00FF0000);
+    w = vorrq(w, 0x01000000);
+    w = vorrq(w, 0xFF000000);
+    w = vorrq(w, 0x01000001); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+    w = vorrq(w, 0x01FFFFFF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}}
+}
+
+void test_simd_vmvn(void)
+{
+    uint16x8_t h;
+    h = vmvnq_n_u16(0x0000);
+    h = vmvnq_n_u16(0x0001);
+    h = vmvnq_n_u16(0x00FF);
+    h = vmvnq_n_u16(0x0100);
+    h = vmvnq_n_u16(0x0101); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}}
+    h = vmvnq_n_u16(0x01FF);
+    h = vmvnq_n_u16(0xFF00);
+
+    uint32x4_t w;
+    w = vmvnq_n_u32(0x00000000);
+    w = vmvnq_n_u32(0x00000001);
+    w = vmvnq_n_u32(0x000000FF);
+    w = vmvnq_n_u32(0x00000100);
+    w = vmvnq_n_u32(0x0000FF00);
+    w = vmvnq_n_u32(0x00010000);
+    w = vmvnq_n_u32(0x00FF0000);
+    w = vmvnq_n_u32(0x01000000);
+    w = vmvnq_n_u32(0xFF000000);
+    w = vmvnq_n_u32(0x01000001); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}}
+    w = vmvnq_n_u32(0x01FFFFFF); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}}
+    w = vmvnq_n_u32(0x0001FFFF); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}}
+    w = vmvnq_n_u32(0x000001FF);
+}

diff  --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp
index 165c757d9a84..a27481d946c4 100644
--- a/clang/utils/TableGen/MveEmitter.cpp
+++ b/clang/utils/TableGen/MveEmitter.cpp
@@ -883,38 +883,41 @@ class ACLEIntrinsic {
         break;
       case ImmediateArg::BoundsType::UInt:
         lo = 0;
-        hi = IA.i1;
+        hi = llvm::APInt::getMaxValue(IA.i1).zext(128);
         break;
       }
 
-      llvm::APInt typelo, typehi;
-      unsigned Bits = IA.ArgType->sizeInBits();
-      if (cast<ScalarType>(IA.ArgType)->kind() == ScalarTypeKind::SignedInt) {
-        typelo = llvm::APInt::getSignedMinValue(Bits).sext(128);
-        typehi = llvm::APInt::getSignedMaxValue(Bits).sext(128);
-      } else {
-        typelo = llvm::APInt::getMinValue(Bits).zext(128);
-        typehi = llvm::APInt::getMaxValue(Bits).zext(128);
-      }
-
       std::string Index = utostr(kv.first);
 
-      if (lo.sle(typelo) && hi.sge(typehi))
-        SemaChecks.push_back("SemaBuiltinConstantArg(TheCall, " + Index + ")");
-      else
+      // Emit a range check if the legal range of values for the
+      // immediate is smaller than the _possible_ range of values for
+      // its type.
+      unsigned ArgTypeBits = IA.ArgType->sizeInBits();
+      llvm::APInt ArgTypeRange = llvm::APInt::getMaxValue(ArgTypeBits).zext(128);
+      llvm::APInt ActualRange = (hi-lo).trunc(64).sext(128);
+      if (ActualRange.ult(ArgTypeRange))
         SemaChecks.push_back("SemaBuiltinConstantArgRange(TheCall, " + Index +
                              ", " + signedHexLiteral(lo) + ", " +
                              signedHexLiteral(hi) + ")");
 
       if (!IA.ExtraCheckType.empty()) {
         std::string Suffix;
-        if (!IA.ExtraCheckArgs.empty())
-          Suffix = (Twine(", ") + IA.ExtraCheckArgs).str();
+        if (!IA.ExtraCheckArgs.empty()) {
+          std::string tmp;
+          StringRef Arg = IA.ExtraCheckArgs;
+          if (Arg == "!lanesize") {
+            tmp = utostr(IA.ArgType->sizeInBits());
+            Arg = tmp;
+          }
+          Suffix = (Twine(", ") + Arg).str();
+        }
         SemaChecks.push_back((Twine("SemaBuiltinConstantArg") +
                               IA.ExtraCheckType + "(TheCall, " + Index +
                               Suffix + ")")
                                  .str());
       }
+
+      assert(!SemaChecks.empty());
     }
     if (SemaChecks.empty())
       return "";

diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 5789f7f5ac61..b6e631edbef6 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -12176,7 +12176,7 @@ static SDValue PerformANDCombine(SDNode *N,
   APInt SplatBits, SplatUndef;
   unsigned SplatBitSize;
   bool HasAnyUndefs;
-  if (BVN && Subtarget->hasNEON() &&
+  if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
       BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
     if (SplatBitSize <= 64) {
       EVT VbicVT;
@@ -12483,7 +12483,7 @@ static SDValue PerformORCombine(SDNode *N,
   APInt SplatBits, SplatUndef;
   unsigned SplatBitSize;
   bool HasAnyUndefs;
-  if (BVN && Subtarget->hasNEON() &&
+  if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
       BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
     if (SplatBitSize <= 64) {
       EVT VorrVT;

diff  --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index ce67af6f1b49..c9fc8333da84 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -274,6 +274,10 @@ def ARMvmovImm   : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
 def ARMvmvnImm   : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
 def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
 
+def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                           SDTCisVT<2, i32>]>;
+def ARMvorrImm   : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
+def ARMvbicImm   : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
 
 def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
                                         SDTCisVT<2, i32>]>;

diff  --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 2ef8f6f7ce35..c27ea47e9bbe 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1367,37 +1367,51 @@ class MVE_bit_cmode<string iname, string suffix, bit halfword, dag inOps>
   let Inst{3-0} = imm{3-0};
 }
 
-class MVE_VORR<string suffix, bit hw, Operand imm_type>
-  : MVE_bit_cmode<"vorr", suffix, hw, (ins MQPR:$Qd_src, imm_type:$imm)> {
-  let Inst{5} = 0b0;
-  let validForTailPredication = 1;
+multiclass MVE_bit_cmode_p<string iname, bit opcode,
+                           MVEVectorVTInfo VTI, Operand imm_type, SDNode op> {
+  def "" : MVE_bit_cmode<iname, VTI.Suffix, VTI.Size{0},
+                         (ins MQPR:$Qd_src, imm_type:$imm)> {
+    let Inst{5} = opcode;
+    let validForTailPredication = 1;
+  }
+
+  defvar Inst = !cast<Instruction>(NAME);
+  defvar UnpredPat = (VTI.Vec (op (VTI.Vec MQPR:$src), timm:$simm));
+
+  let Predicates = [HasMVEInt] in {
+    def : Pat<UnpredPat, (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm))>;
+    def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+                          UnpredPat, (VTI.Vec MQPR:$src))),
+              (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm,
+                             ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
+  }
+}
+
+multiclass MVE_VORRimm<MVEVectorVTInfo VTI, Operand imm_type> {
+  defm "": MVE_bit_cmode_p<"vorr", 0, VTI, imm_type, ARMvorrImm>;
+}
+multiclass MVE_VBICimm<MVEVectorVTInfo VTI, Operand imm_type> {
+  defm "": MVE_bit_cmode_p<"vbic", 1, VTI, imm_type, ARMvbicImm>;
 }
 
-def MVE_VORRimmi16 : MVE_VORR<"i16", 1, nImmSplatI16>;
-def MVE_VORRimmi32 : MVE_VORR<"i32", 0, nImmSplatI32>;
+defm MVE_VORRimmi16 : MVE_VORRimm<MVE_v8i16, nImmSplatI16>;
+defm MVE_VORRimmi32 : MVE_VORRimm<MVE_v4i32, nImmSplatI32>;
+defm MVE_VBICimmi16 : MVE_VBICimm<MVE_v8i16, nImmSplatI16>;
+defm MVE_VBICimmi32 : MVE_VBICimm<MVE_v4i32, nImmSplatI32>;
 
 def MVE_VORNimmi16 : MVEInstAlias<"vorn${vp}.i16\t$Qd, $imm",
     (MVE_VORRimmi16 MQPR:$Qd, nImmSplatNotI16:$imm, vpred_n:$vp), 0>;
 def MVE_VORNimmi32 : MVEInstAlias<"vorn${vp}.i32\t$Qd, $imm",
     (MVE_VORRimmi32 MQPR:$Qd, nImmSplatNotI32:$imm, vpred_n:$vp), 0>;
 
-def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm",
-    (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>;
-
-class MVE_VBIC<string suffix, bit hw, Operand imm_type>
-  : MVE_bit_cmode<"vbic", suffix, hw, (ins MQPR:$Qd_src, imm_type:$imm)> {
-  let Inst{5} = 0b1;
-  let validForTailPredication = 1;
-}
-
-def MVE_VBICimmi16 : MVE_VBIC<"i16", 1, nImmSplatI16>;
-def MVE_VBICimmi32 : MVE_VBIC<"i32", 0, nImmSplatI32>;
-
 def MVE_VANDimmi16 : MVEInstAlias<"vand${vp}.i16\t$Qd, $imm",
     (MVE_VBICimmi16 MQPR:$Qd, nImmSplatNotI16:$imm, vpred_n:$vp), 0>;
 def MVE_VANDimmi32 : MVEInstAlias<"vand${vp}.i32\t$Qd, $imm",
     (MVE_VBICimmi32 MQPR:$Qd, nImmSplatNotI32:$imm, vpred_n:$vp), 0>;
 
+def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm",
+    (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>;
+
 class MVE_VMOV_lane_direction {
   bit bit_20;
   dag oops;
@@ -2206,6 +2220,15 @@ let Predicates = [HasMVEInt] in {
 
   def : Pat<(v4f32 (ARMvmovFPImm timm:$simm)),
             (v4f32 (MVE_VMOVimmf32 nImmVMOVF32:$simm))>;
+
+  def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (ARMvmvnImm timm:$simm),
+                            MQPR:$inactive)),
+            (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm,
+                            ARMVCCThen, VCCR:$pred, MQPR:$inactive))>;
+  def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (ARMvmvnImm timm:$simm),
+                            MQPR:$inactive)),
+            (v4i32 (MVE_VMVNimmi32 nImmSplatI32:$simm,
+                            ARMVCCThen, VCCR:$pred, MQPR:$inactive))>;
 }
 
 class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
@@ -2341,7 +2364,7 @@ let Predicates = [HasMVEInt] in {
   def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))),
             (MVE_VMOVLu16bh MQPR:$src)>;
   // zext_inreg 8 -> 16
-  def : Pat<(and (v8i16 MQPR:$src), (v8i16 (ARMvmovImm (i32 0x8FF)))),
+  def : Pat<(ARMvbicImm (v8i16 MQPR:$src), (i32 0xAFF)),
             (MVE_VMOVLu8bh MQPR:$src)>;
 }
 

diff  --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 6244d8d9e27e..a20acd8c3919 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -509,11 +509,6 @@ def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
 def NEONvsliImm      : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
 def NEONvsriImm      : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
 
-def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
-                                           SDTCisVT<2, i32>]>;
-def NEONvorrImm   : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
-def NEONvbicImm   : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
-
 def NEONvbsl      : SDNode<"ARMISD::VBSL",
                            SDTypeProfile<1, 3, [SDTCisVec<0>,
                                                 SDTCisSameAs<0, 1>,
@@ -5296,7 +5291,7 @@ def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
                           IIC_VMOVImm,
                           "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
                           [(set DPR:$Vd,
-                            (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+                            (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
   let Inst{9} = SIMM{9};
 }
 
@@ -5305,7 +5300,7 @@ def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
                           IIC_VMOVImm,
                           "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
                           [(set DPR:$Vd,
-                            (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+                            (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
   let Inst{10-9} = SIMM{10-9};
 }
 
@@ -5314,7 +5309,7 @@ def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
                           IIC_VMOVImm,
                           "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
                           [(set QPR:$Vd,
-                            (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+                            (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
   let Inst{9} = SIMM{9};
 }
 
@@ -5323,7 +5318,7 @@ def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
                           IIC_VMOVImm,
                           "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
                           [(set QPR:$Vd,
-                            (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+                            (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
   let Inst{10-9} = SIMM{10-9};
 }
 
@@ -5347,7 +5342,7 @@ def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
                           IIC_VMOVImm,
                           "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
                           [(set DPR:$Vd,
-                            (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+                            (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
   let Inst{9} = SIMM{9};
 }
 
@@ -5356,7 +5351,7 @@ def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
                           IIC_VMOVImm,
                           "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
                           [(set DPR:$Vd,
-                            (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+                            (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
   let Inst{10-9} = SIMM{10-9};
 }
 
@@ -5365,7 +5360,7 @@ def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
                           IIC_VMOVImm,
                           "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
                           [(set QPR:$Vd,
-                            (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+                            (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
   let Inst{9} = SIMM{9};
 }
 
@@ -5374,7 +5369,7 @@ def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
                           IIC_VMOVImm,
                           "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
                           [(set QPR:$Vd,
-                            (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+                            (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
   let Inst{10-9} = SIMM{10-9};
 }
 

diff  --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/bitwise-imm.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/bitwise-imm.ll
new file mode 100644
index 000000000000..54bbb0d48643
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/bitwise-imm.ll
@@ -0,0 +1,365 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <8 x i16> @test_vbicq_n_u16_sh0(<8 x i16> %a) {
+; CHECK-LABEL: test_vbicq_n_u16_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vbic.i16 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = and <8 x i16> %a, <i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101>
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vbicq_n_u16_sh8(<8 x i16> %a) {
+; CHECK-LABEL: test_vbicq_n_u16_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vbic.i16 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = and <8 x i16> %a, <i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601>
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh0(<4 x i32> %a) {
+; CHECK-LABEL: test_vbicq_n_u32_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vbic.i32 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = and <4 x i32> %a, <i32 -101, i32 -101, i32 -101, i32 -101>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh8(<4 x i32> %a) {
+; CHECK-LABEL: test_vbicq_n_u32_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vbic.i32 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = and <4 x i32> %a, <i32 -25601, i32 -25601, i32 -25601, i32 -25601>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh16(<4 x i32> %a) {
+; CHECK-LABEL: test_vbicq_n_u32_sh16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vbic.i32 q0, #0x640000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = and <4 x i32> %a, <i32 -6553601, i32 -6553601, i32 -6553601, i32 -6553601>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh24(<4 x i32> %a) {
+; CHECK-LABEL: test_vbicq_n_u32_sh24:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vbic.i32 q0, #0x64000000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = and <4 x i32> %a, <i32 -1677721601, i32 -1677721601, i32 -1677721601, i32 -1677721601>
+  ret <4 x i32> %0
+}
+
+; The immediate in this case is legal for a VMVN but not for a VBIC,
+; so in this case we expect to see the constant being prepared in
+; another register.
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_illegal(<4 x i32> %a) {
+; CHECK-LABEL: test_vbicq_n_u32_illegal:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmvn.i32 q1, #0x54ff
+; CHECK-NEXT:    vand q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = and <4 x i32> %a, <i32 -21760, i32 -21760, i32 -21760, i32 -21760>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vorrq_n_u16_sh0(<8 x i16> %a) {
+; CHECK-LABEL: test_vorrq_n_u16_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vorr.i16 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = or <8 x i16> %a, <i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100>
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vorrq_n_u16_sh8(<8 x i16> %a) {
+; CHECK-LABEL: test_vorrq_n_u16_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vorr.i16 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = or <8 x i16> %a, <i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600>
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh0(<4 x i32> %a) {
+; CHECK-LABEL: test_vorrq_n_u32_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vorr.i32 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = or <4 x i32> %a, <i32 100, i32 100, i32 100, i32 100>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh8(<4 x i32> %a) {
+; CHECK-LABEL: test_vorrq_n_u32_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vorr.i32 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = or <4 x i32> %a, <i32 25600, i32 25600, i32 25600, i32 25600>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh16(<4 x i32> %a) {
+; CHECK-LABEL: test_vorrq_n_u32_sh16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vorr.i32 q0, #0x640000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = or <4 x i32> %a, <i32 6553600, i32 6553600, i32 6553600, i32 6553600>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh24(<4 x i32> %a) {
+; CHECK-LABEL: test_vorrq_n_u32_sh24:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vorr.i32 q0, #0x64000000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = or <4 x i32> %a, <i32 1677721600, i32 1677721600, i32 1677721600, i32 1677721600>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vbicq_m_n_u16_sh0(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vbicq_m_n_u16_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vbict.i16 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = and <8 x i16> %a, <i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101>
+  %3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a
+  ret <8 x i16> %3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vbicq_m_n_u16_sh8(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vbicq_m_n_u16_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vbict.i16 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = and <8 x i16> %a, <i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601>
+  %3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a
+  ret <8 x i16> %3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh0(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vbicq_m_n_u32_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vbict.i32 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = and <4 x i32> %a, <i32 -101, i32 -101, i32 -101, i32 -101>
+  %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
+  ret <4 x i32> %3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh8(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vbicq_m_n_u32_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vbict.i32 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = and <4 x i32> %a, <i32 -25601, i32 -25601, i32 -25601, i32 -25601>
+  %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
+  ret <4 x i32> %3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh16(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vbicq_m_n_u32_sh16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vbict.i32 q0, #0x640000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = and <4 x i32> %a, <i32 -6553601, i32 -6553601, i32 -6553601, i32 -6553601>
+  %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
+  ret <4 x i32> %3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh24(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vbicq_m_n_u32_sh24:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vbict.i32 q0, #0x64000000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = and <4 x i32> %a, <i32 -1677721601, i32 -1677721601, i32 -1677721601, i32 -1677721601>
+  %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
+  ret <4 x i32> %3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vorrq_m_n_u16_sh0(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vorrq_m_n_u16_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vorrt.i16 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = or <8 x i16> %a, <i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100>
+  %3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a
+  ret <8 x i16> %3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vorrq_m_n_u16_sh8(<8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vorrq_m_n_u16_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vorrt.i16 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = or <8 x i16> %a, <i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600>
+  %3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a
+  ret <8 x i16> %3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh0(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vorrq_m_n_u32_sh0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vorrt.i32 q0, #0x64
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = or <4 x i32> %a, <i32 100, i32 100, i32 100, i32 100>
+  %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
+  ret <4 x i32> %3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh8(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vorrq_m_n_u32_sh8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vorrt.i32 q0, #0x6400
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = or <4 x i32> %a, <i32 25600, i32 25600, i32 25600, i32 25600>
+  %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
+  ret <4 x i32> %3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh16(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vorrq_m_n_u32_sh16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vorrt.i32 q0, #0x640000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = or <4 x i32> %a, <i32 6553600, i32 6553600, i32 6553600, i32 6553600>
+  %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
+  ret <4 x i32> %3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh24(<4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vorrq_m_n_u32_sh24:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vorrt.i32 q0, #0x64000000
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = or <4 x i32> %a, <i32 1677721600, i32 1677721600, i32 1677721600, i32 1677721600>
+  %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
+  ret <4 x i32> %3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_n_u16() {
+; CHECK-LABEL: test_vmvnq_n_u16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmvn.i16 q0, #0xaa00
+; CHECK-NEXT:    bx lr
+entry:
+  ret <8 x i16> <i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521>
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_n_u32() {
+; CHECK-LABEL: test_vmvnq_n_u32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmvn.i32 q0, #0xaa00
+; CHECK-NEXT:    bx lr
+entry:
+  ret <4 x i32> <i32 -43521, i32 -43521, i32 -43521, i32 -43521>
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_n_u16(<8 x i16> %inactive, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_n_u16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt.i16 q0, #0xaa00
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = select <8 x i1> %1, <8 x i16> <i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521>, <8 x i16> %inactive
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_n_u32(<4 x i32> %inactive, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_n_u32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt.i32 q0, #0xaa00
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = select <4 x i1> %1, <4 x i32> <i32 -43521, i32 -43521, i32 -43521, i32 -43521>, <4 x i32> %inactive
+  ret <4 x i32> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)