[llvm] 2d728bb - [CodeGen][SelectionDAG]Add new intrinsic experimental.vector.reverse

Mon Feb 15 05:41:41 PST 2021

Author: Caroline Concatto
Date: 2021-02-15T13:39:43Z
New Revision: 2d728bbff5c688284b8b8306ecfd3000b0ab8bb1

URL: https://github.com/llvm/llvm-project/commit/2d728bbff5c688284b8b8306ecfd3000b0ab8bb1
DIFF: https://github.com/llvm/llvm-project/commit/2d728bbff5c688284b8b8306ecfd3000b0ab8bb1.diff

LOG: [CodeGen][SelectionDAG]Add new intrinsic  experimental.vector.reverse

This patch adds  a new intrinsic experimental.vector.reduce that takes a single
vector and returns a vector of matching type but with the original lane order
 reversed. For example:

```
vector.reverse(<A,B,C,D>) ==> <D,C,B,A>
```

The new intrinsic supports fixed and scalable vectors types.
The fixed-width vector relies on shufflevector to maintain existing behaviour.
Scalable vector uses the new ISD node - VECTOR_REVERSE.

This new intrinsic is one of the named shufflevector intrinsics proposed on the
mailing-list in the RFC at [1].

Patch by Paul Walker (@paulwalker-arm).

[1] https://lists.llvm.org/pipermail/llvm-dev/2020-November/146864.html

Differential Revision: https://reviews.llvm.org/D94883

Added: 
    llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
    llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll
    llvm/test/CodeGen/X86/named-vector-shuffle-reverse.ll
    llvm/test/Transforms/InstSimplify/named-vector-shuffle-reverse.ll

Modified: 
    llvm/docs/LangRef.rst
    llvm/include/llvm/CodeGen/ISDOpcodes.h
    llvm/include/llvm/IR/Intrinsics.td
    llvm/include/llvm/Target/TargetSelectionDAG.td
    llvm/lib/Analysis/InstructionSimplify.cpp
    llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
    llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
    llvm/lib/Target/AArch64/AArch64FastISel.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.h
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Removed: 
    


################################################################################
diff  --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 7918e5fd6e4f..133edf8ccf5e 100644

--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -16233,6 +16233,33 @@ runtime, then the result vector is undefined. The ``idx`` parameter must be a
 vector index constant type (for most targets this will be an integer pointer
 type).
 
+'``llvm.experimental.vector.reverse``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a)
+      declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.vector.reverse.*``' intrinsics reverse a vector.
+The intrinsic takes a single vector and returns a vector of matching type but
+with the original lane order reversed. These intrinsics work for both fixed
+and scalable vectors. While this intrinsic is marked as experimental the
+recommended way to express reverse operations for fixed-width vectors is still
+to use a shufflevector, as that may allow for more optimization opportunities.
+
+Arguments:
+""""""""""
+
+The argument to this intrinsic must be a vector.
+
 Matrix Intrinsics
 -----------------
 

diff  --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 52bb7d99f9e5..e09dcaec46b3 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -540,6 +540,11 @@ enum NodeType {
   /// vector, but not the other way around.
   EXTRACT_SUBVECTOR,
 
+  /// VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR,
+  /// whose elements are shuffled using the following algorithm:
+  ///   RESULT[i] = VECTOR[VECTOR.ElementCount - 1 - i]
+  VECTOR_REVERSE,
+
   /// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as
   /// VEC1/VEC2.  A VECTOR_SHUFFLE node also contains an array of constant int
   /// values that indicate which value (or undef) each result element will

diff  --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 409e3070165c..99e18d001100 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1635,6 +1635,12 @@ def int_preserve_struct_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty],
                                                   ImmArg<ArgIndex<1>>,
                                                   ImmArg<ArgIndex<2>>]>;
 
+//===------------ Intrinsics to perform common vector shuffles ------------===//
+
+def int_experimental_vector_reverse : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                                   [LLVMMatchType<0>],
+                                   [IntrNoMem]>;
+
 //===---------- Intrinsics to query properties of scalable vectors --------===//
 def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
 

diff  --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index b59cdbfbcddc..b612de31beb8 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -254,6 +254,9 @@ def SDTFPVecReduce : SDTypeProfile<1, 1, [  // FP vector reduction
   SDTCisFP<0>, SDTCisVec<1>
 ]>;
 
+def SDTVecReverse : SDTypeProfile<1, 1, [  // vector reverse
+  SDTCisVec<0>, SDTCisSameAs<0,1>
+]>;
 
 def SDTSubVecExtract : SDTypeProfile<1, 2, [// subvector extract
   SDTCisSubVecOfVec<0,1>, SDTCisInt<2>
@@ -651,6 +654,7 @@ def ist        : SDNode<"ISD::STORE"      , SDTIStore,
                         [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 
 def vector_shuffle : SDNode<"ISD::VECTOR_SHUFFLE", SDTVecShuffle, []>;
+def vector_reverse : SDNode<"ISD::VECTOR_REVERSE", SDTVecReverse>;
 def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>;
 def splat_vector : SDNode<"ISD::SPLAT_VECTOR", SDTypeProfile<1, 1, []>, []>;
 def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,

diff  --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index c40e5c36cdc7..1faf0092e874 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5373,6 +5373,12 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
       return Op0;
     break;
   }
+  case Intrinsic::experimental_vector_reverse:
+    // experimental.vector.reverse(experimental.vector.reverse(x)) -> x
+    if (match(Op0,
+              m_Intrinsic<Intrinsic::experimental_vector_reverse>(m_Value(X))))
+      return X;
+    break;
   default:
     break;
   }

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index c3250e8f43b2..6a612a48e6da 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -96,6 +96,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
 
   case ISD::EXTRACT_SUBVECTOR:
                          Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::VECTOR_REVERSE:
+                         Res = PromoteIntRes_VECTOR_REVERSE(N); break;
   case ISD::VECTOR_SHUFFLE:
                          Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
   case ISD::INSERT_VECTOR_ELT:
@@ -4662,6 +4664,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
   return DAG.getBuildVector(NOutVT, dl, Ops);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_REVERSE(SDNode *N) {
+  SDLoc dl(N);
+
+  SDValue V0 = GetPromotedInteger(N->getOperand(0));
+  EVT OutVT = V0.getValueType();
+
+  return DAG.getNode(ISD::VECTOR_REVERSE, dl, OutVT, V0);
+}
 
 SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
   ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index c77a48bb58fa..8e52ba8e46f0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -298,6 +298,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
   SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo);
   SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
+  SDValue PromoteIntRes_VECTOR_REVERSE(SDNode *N);
   SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
   SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
   SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
@@ -834,6 +835,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
                                   SDValue &Hi);
   void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 57cb364f1939..dc0614a3d938 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -930,6 +930,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SETCC:
     SplitVecRes_SETCC(N, Lo, Hi);
     break;
+  case ISD::VECTOR_REVERSE:
+    SplitVecRes_VECTOR_REVERSE(N, Lo, Hi);
+    break;
   case ISD::VECTOR_SHUFFLE:
     SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
     break;
@@ -5492,3 +5495,13 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
     Ops[Idx] = FillVal;
   return DAG.getBuildVector(NVT, dl, Ops);
 }
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo,
+                                                  SDValue &Hi) {
+  SDValue InLo, InHi;
+  GetSplitVector(N->getOperand(0), InLo, InHi);
+  SDLoc DL(N);
+
+  Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi);
+  Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo);
+}

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 33505f4f6a59..387b5e6519e9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7025,6 +7025,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
     return;
   }
+  case Intrinsic::experimental_vector_reverse:
+    visitVectorReverse(I);
+    return;
   }
 }
 
@@ -10836,6 +10839,29 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
   }
 }
 
+void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+  SDLoc DL = getCurSDLoc();
+  SDValue V = getValue(I.getOperand(0));
+  assert(VT == V.getValueType() && "Malformed vector.reverse!");
+
+  if (VT.isScalableVector()) {
+    setValue(&I, DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V));
+    return;
+  }
+
+  // Use VECTOR_SHUFFLE for the fixed-length vector
+  // to maintain existing behavior.
+  SmallVector<int, 8> Mask;
+  unsigned NumElts = VT.getVectorMinNumElements();
+  for (unsigned i = 0; i != NumElts; ++i)
+    Mask.push_back(NumElts - 1 - i);
+
+  setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask));
+}
+
 void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
   SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 8f6e98c40161..d65e43ddbc1e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -773,6 +773,7 @@ class SelectionDAGBuilder {
   void visitGCResult(const GCResultInst &I);
 
   void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
+  void visitVectorReverse(const CallInst &I);
 
   void visitUserOp1(const Instruction &I) {
     llvm_unreachable("UserOp1 should not exist at instruction selection time!");

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 62352054bf7b..4cbf8a83e8c7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -289,6 +289,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::SCALAR_TO_VECTOR:           return "scalar_to_vector";
   case ISD::VECTOR_SHUFFLE:             return "vector_shuffle";
   case ISD::SPLAT_VECTOR:               return "splat_vector";
+  case ISD::VECTOR_REVERSE:             return "vector_reverse";
   case ISD::CARRY_FALSE:                return "carry_false";
   case ISD::ADDC:                       return "addc";
   case ISD::ADDE:                       return "adde";

diff  --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 9801036653f7..ca4705cc732a 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3894,7 +3894,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
       return false;
 
     // Vectors (of > 1 lane) in big endian need tricky handling.
-    if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
+    if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
         !Subtarget->isLittleEndian())
       return false;
 

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dd91d31422c6..e866fc527a35 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1853,7 +1853,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::CLASTB_N)
     MAKE_CASE(AArch64ISD::LASTA)
     MAKE_CASE(AArch64ISD::LASTB)
-    MAKE_CASE(AArch64ISD::REV)
     MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
     MAKE_CASE(AArch64ISD::TBL)
     MAKE_CASE(AArch64ISD::FADD_PRED)
@@ -3594,7 +3593,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::aarch64_sve_rev:
-    return DAG.getNode(AArch64ISD::REV, dl, Op.getValueType(),
+    return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
                        Op.getOperand(1));
   case Intrinsic::aarch64_sve_tbl:
     return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 50c3bed74a40..c9c7b6fbe8fd 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -292,7 +292,6 @@ enum NodeType : unsigned {
   CLASTB_N,
   LASTA,
   LASTB,
-  REV,
   TBL,
 
   // Floating-point reductions.

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index e09b8401c0e0..e562b1efa10f 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -249,9 +249,6 @@ def AArch64clasta_n   : SDNode<"AArch64ISD::CLASTA_N",   SDT_AArch64ReduceWithIn
 def AArch64clastb_n   : SDNode<"AArch64ISD::CLASTB_N",   SDT_AArch64ReduceWithInit>;
 def AArch64fadda_p    : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>;
 
-def SDT_AArch64Rev   : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
-def AArch64rev       : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;
-
 def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
 def AArch64ptest     : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;
 
@@ -587,8 +584,8 @@ let Predicates = [HasSVE] in {
   defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>;
   defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", int_aarch64_sve_revw>;
 
-  defm REV_PP : sve_int_perm_reverse_p<"rev", AArch64rev>;
-  defm REV_ZZ : sve_int_perm_reverse_z<"rev", AArch64rev>;
+  defm REV_PP : sve_int_perm_reverse_p<"rev", vector_reverse>;
+  defm REV_ZZ : sve_int_perm_reverse_z<"rev", vector_reverse>;
 
   defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>;
   defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>;

diff  --git a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
new file mode 100644
index 000000000000..7687573acfe3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
@@ -0,0 +1,230 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs  < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-SELDAG  %s
+; RUN: llc -verify-machineinstrs -O0 < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-FASTISEL %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; VECTOR_REVERSE
+;
+
+define <16 x i8> @reverse_v16i8(<16 x i8> %a) #0 {
+; CHECK-LABEL: .LCPI0_0:
+; CHECK:        .byte   15                      // 0xf
+; CHECK-NEXT:   .byte   14                      // 0xe
+; CHECK-NEXT:   .byte   13                      // 0xd
+; CHECK-NEXT:   .byte   12                      // 0xc
+; CHECK-NEXT:   .byte   11                      // 0xb
+; CHECK-NEXT:   .byte   10                      // 0xa
+; CHECK-NEXT:   .byte   9                       // 0x9
+; CHECK-NEXT:   .byte   8                       // 0x8
+; CHECK-NEXT:   .byte   7                       // 0x7
+; CHECK-NEXT:   .byte   6                       // 0x6
+; CHECK-NEXT:   .byte   5                       // 0x5
+; CHECK-NEXT:   .byte   4                       // 0x4
+; CHECK-NEXT:   .byte   3                       // 0x3
+; CHECK-NEXT:   .byte   2                       // 0x2
+; CHECK-NEXT:   .byte   1                       // 0x1
+; CHECK-NEXT:   .byte   0                       // 0x0
+; CHECK-LABEL: reverse_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI0_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
+; CHECK-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-NEXT:    ret
+
+  %res = call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> %a)
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @reverse_v8i16(<8 x i16> %a) #0 {
+; CHECK-LABEL: .LCPI1_0:
+; CHECK:        .byte   14                      // 0xe
+; CHECK-NEXT:   .byte   15                      // 0xf
+; CHECK-NEXT:   .byte   12                      // 0xc
+; CHECK-NEXT:   .byte   13                      // 0xd
+; CHECK-NEXT:   .byte   10                      // 0xa
+; CHECK-NEXT:   .byte   11                      // 0xb
+; CHECK-NEXT:   .byte   8                       // 0x8
+; CHECK-NEXT:   .byte   9                       // 0x9
+; CHECK-NEXT:   .byte   6                       // 0x6
+; CHECK-NEXT:   .byte   7                       // 0x7
+; CHECK-NEXT:   .byte   4                       // 0x4
+; CHECK-NEXT:   .byte   5                       // 0x5
+; CHECK-NEXT:   .byte   2                       // 0x2
+; CHECK-NEXT:   .byte   3                       // 0x3
+; CHECK-NEXT:   .byte   0                       // 0x0
+; CHECK-NEXT:   .byte   1                       // 0x1
+; CHECK-LABEL: reverse_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI1_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
+; CHECK-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-NEXT:    ret
+
+  %res = call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> %a)
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @reverse_v4i32(<4 x i32> %a) #0 {
+; CHECK-LABEL: reverse_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev64 v0.4s, v0.4s
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ret
+
+  %res = call <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32> %a)
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @reverse_v2i64(<2 x i64> %a) #0 {
+; CHECK-LABEL: reverse_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ret
+
+  %res = call <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64> %a)
+  ret <2 x i64> %res
+}
+
+define <8 x half> @reverse_v8f16(<8 x half> %a) #0 {
+; CHECK-LABEL: .LCPI4_0:
+; CHECK:        .byte   14                      // 0xe
+; CHECK-NEXT:   .byte   15                      // 0xf
+; CHECK-NEXT:   .byte   12                      // 0xc
+; CHECK-NEXT:   .byte   13                      // 0xd
+; CHECK-NEXT:   .byte   10                      // 0xa
+; CHECK-NEXT:   .byte   11                      // 0xb
+; CHECK-NEXT:   .byte   8                       // 0x8
+; CHECK-NEXT:   .byte   9                       // 0x9
+; CHECK-NEXT:   .byte   6                       // 0x6
+; CHECK-NEXT:   .byte   7                       // 0x7
+; CHECK-NEXT:   .byte   4                       // 0x4
+; CHECK-NEXT:   .byte   5                       // 0x5
+; CHECK-NEXT:   .byte   2                       // 0x2
+; CHECK-NEXT:   .byte   3                       // 0x3
+; CHECK-NEXT:   .byte   0                       // 0x0
+; CHECK-NEXT:   .byte   1                       // 0x1
+; CHECK-LABEL: reverse_v8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI4_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-NEXT:    ret
+
+  %res = call <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half> %a)
+  ret <8 x half> %res
+}
+
+define <4 x float> @reverse_v4f32(<4 x float> %a) #0 {
+; CHECK-LABEL: reverse_v4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev64 v0.4s, v0.4s
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ret
+
+  %res = call <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float> %a)
+  ret <4 x float> %res
+}
+
+define <2 x double> @reverse_v2f64(<2 x double> %a) #0 {
+; CHECK-LABEL: reverse_v2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ret
+
+  %res = call <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double> %a)
+  ret <2 x double> %res
+}
+
+; Verify promote type legalisation works as expected.
+define <2 x i8> @reverse_v2i8(<2 x i8> %a) #0 {
+; CHECK-LABEL: reverse_v2i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev64 v0.2s, v0.2s
+; CHECK-NEXT:    ret
+
+  %res = call <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a)
+  ret <2 x i8> %res
+}
+
+; Verify splitvec type legalisation works as expected.
+define <8 x i32> @reverse_v8i32(<8 x i32> %a) #0 {
+; CHECK-LABEL: reverse_v8i32:
+; CHECK-SELDAG:       // %bb.0:
+; CHECK-SELDAG-NEXT:    rev64 v1.4s, v1.4s
+; CHECK-SELDAG-NEXT:    rev64 v2.4s, v0.4s
+; CHECK-SELDAG-NEXT:    ext v0.16b, v1.16b, v1.16b, #8
+; CHECK-SELDAG-NEXT:    ext v1.16b, v2.16b, v2.16b, #8
+; CHECK-SELDAG-NEXT:    ret
+; CHECK-FASTISEL:       // %bb.0:
+; CHECK-FASTISEL-NEXT:    sub    sp, sp, #16
+; CHECK-FASTISEL-NEXT:    str    q1, [sp]
+; CHECK-FASTISEL-NEXT:    mov    v1.16b, v0.16b
+; CHECK-FASTISEL-NEXT:    ldr    q0, [sp]
+; CHECK-FASTISEL-NEXT:    rev64    v0.4s, v0.4s
+; CHECK-FASTISEL-NEXT:    ext    v0.16b, v0.16b, v0.16b, #8
+; CHECK-FASTISEL-NEXT:    rev64    v1.4s, v1.4s
+; CHECK-FASTISEL-NEXT:    ext    v1.16b, v1.16b, v1.16b, #8
+; CHECK-FASTISEL-NEXT:    add    sp, sp, #16
+; CHECK-FASTISEL-NEXT:    ret
+
+  %res = call <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32> %a)
+  ret <8 x i32> %res
+}
+
+; Verify splitvec type legalisation works as expected.
+define <16 x float> @reverse_v16f32(<16 x float> %a) #0 {
+; CHECK-LABEL: reverse_v16f32:
+; CHECK-SELDAG:       // %bb.0:
+; CHECK-SELDAG-NEXT:    rev64 v3.4s, v3.4s
+; CHECK-SELDAG-NEXT:    rev64 v2.4s, v2.4s
+; CHECK-SELDAG-NEXT:    rev64 v4.4s, v1.4s
+; CHECK-SELDAG-NEXT:    rev64 v5.4s, v0.4s
+; CHECK-SELDAG-NEXT:    ext v0.16b, v3.16b, v3.16b, #8
+; CHECK-SELDAG-NEXT:    ext v1.16b, v2.16b, v2.16b, #8
+; CHECK-SELDAG-NEXT:    ext v2.16b, v4.16b, v4.16b, #8
+; CHECK-SELDAG-NEXT:    ext v3.16b, v5.16b, v5.16b, #8
+; CHECK-SELDAG-NEXT:    ret
+; CHECK-FASTISEL:       // %bb.0:
+; CHECK-FASTISEL-NEXT:    sub    sp, sp, #32
+; CHECK-FASTISEL-NEXT:    str    q3, [sp, #16]
+; CHECK-FASTISEL-NEXT:    str    q2, [sp]
+; CHECK-FASTISEL-NEXT:    mov    v2.16b, v1.16b
+; CHECK-FASTISEL-NEXT:    ldr    q1, [sp]
+; CHECK-FASTISEL-NEXT:    mov    v3.16b, v0.16b
+; CHECK-FASTISEL-NEXT:    ldr    q0, [sp, #16]
+; CHECK-FASTISEL-NEXT:    rev64    v0.4s, v0.4s
+; CHECK-FASTISEL-NEXT:    ext    v0.16b, v0.16b, v0.16b, #8
+; CHECK-FASTISEL-NEXT:    rev64    v1.4s, v1.4s
+; CHECK-FASTISEL-NEXT:    ext    v1.16b, v1.16b, v1.16b, #8
+; CHECK-FASTISEL-NEXT:    rev64    v2.4s, v2.4s
+; CHECK-FASTISEL-NEXT:    ext    v2.16b, v2.16b, v2.16b, #8
+; CHECK-FASTISEL-NEXT:    rev64    v3.4s, v3.4s
+; CHECK-FASTISEL-NEXT:    ext    v3.16b, v3.16b, v3.16b, #8
+; CHECK-FASTISEL-NEXT:    add    sp, sp, #32
+; CHECK-FASTISEL-NEXT:    ret
+
+  %res = call <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float> %a)
+  ret <16 x float> %res
+}
+
+
+declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8>)
+declare <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8>)
+declare <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32>)
+declare <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32>)
+declare <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64>)
+declare <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half>)
+declare <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float>)
+declare <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float>)
+declare <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double>)
+
+attributes #0 = { nounwind "target-features"="+neon" }

diff  --git a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll
new file mode 100644
index 000000000000..ef67140c845b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll
@@ -0,0 +1,238 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs  < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-SELDAG  %s
+; RUN: llc -verify-machineinstrs -O0 < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-FASTISEL %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; VECTOR_REVERSE - PPR
+;
+
+define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) #0 {
+; CHECK-LABEL: reverse_nxv2i1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+
+  %res = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %a)
+  ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) #0 {
+; CHECK-LABEL: reverse_nxv4i1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+
+  %res = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %a)
+  ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) #0 {
+; CHECK-LABEL: reverse_nxv8i1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+
+  %res = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %a)
+  ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) #0 {
+; CHECK-LABEL: reverse_nxv16i1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+
+  %res = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %a)
+  ret <vscale x 16 x i1> %res
+}
+
+; Verify splitvec type legalisation works as expected.
+define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) #0 {
+; CHECK-LABEL: reverse_nxv32i1:
+; CHECK-SELDAG:       // %bb.0:
+; CHECK-SELDAG-NEXT:    rev p2.b, p1.b
+; CHECK-SELDAG-NEXT:    rev p1.b, p0.b
+; CHECK-SELDAG-NEXT:    mov p0.b, p2.b
+; CHECK-SELDAG-NEXT:    ret
+; CHECK-FASTISEL:       // %bb.0:
+; CHECK-FASTISEL-NEXT:    str    x29, [sp, #-16]
+; CHECK-FASTISEL-NEXT:    addvl    sp, sp, #-1
+; CHECK-FASTISEL-NEXT:    str    p1, [sp, #7, mul vl]
+; CHECK-FASTISEL-NEXT:    mov    p1.b, p0.b
+; CHECK-FASTISEL-NEXT:    ldr    p0, [sp, #7, mul vl]
+; CHECK-FASTISEL-NEXT:    rev    p0.b, p0.b
+; CHECK-FASTISEL-NEXT:    rev    p1.b, p1.b
+; CHECK-FASTISEL-NEXT:    addvl    sp, sp, #1
+; CHECK-FASTISEL-NEXT:    ldr    x29, [sp], #16
+; CHECK-FASTISEL-NEXT:    ret
+
+  %res = call <vscale x 32 x i1> @llvm.experimental.vector.reverse.nxv32i1(<vscale x 32 x i1> %a)
+  ret <vscale x 32 x i1> %res
+}
+
+;
+; VECTOR_REVERSE - ZPR
+;
+
+define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: reverse_nxv16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.b, z0.b
+; CHECK-NEXT:    ret
+
+  %res = call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> %a)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @reverse_nxv8i16(<vscale x 8 x i16> %a) #0 {
+; CHECK-LABEL: reverse_nxv8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.h, z0.h
+; CHECK-NEXT:    ret
+
+  %res = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> %a)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @reverse_nxv4i32(<vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: reverse_nxv4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.s, z0.s
+; CHECK-NEXT:    ret
+
+  %res = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
+  ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @reverse_nxv2i64(<vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: reverse_nxv2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.d, z0.d
+; CHECK-NEXT:    ret
+
+  %res = call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> %a)
+  ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @reverse_nxv8f16(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: reverse_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.h, z0.h
+; CHECK-NEXT:    ret
+
+  %res = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> %a)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @reverse_nxv4f32(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: reverse_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.s, z0.s
+; CHECK-NEXT:    ret
+
+  %res = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> %a)  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @reverse_nxv2f64(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: reverse_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.d, z0.d
+; CHECK-NEXT:    ret
+
+  %res = call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> %a)
+  ret <vscale x 2 x double> %res
+}
+
+; Verify promote type legalisation works as expected.
+define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) #0 {
+; CHECK-LABEL: reverse_nxv2i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rev z0.d, z0.d
+; CHECK-NEXT:    ret
+
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.reverse.nxv2i8(<vscale x 2 x i8> %a)
+  ret <vscale x 2 x i8> %res
+}
+
+; Verify splitvec type legalisation works as expected.
+define <vscale x 8 x i32> @reverse_nxv8i32(<vscale x 8 x i32> %a) #0 {
+; CHECK-LABEL: reverse_nxv8i32:
+; CHECK-SELDAG:       // %bb.0:
+; CHECK-SELDAG-NEXT:    rev z2.s, z1.s
+; CHECK-SELDAG-NEXT:    rev z1.s, z0.s
+; CHECK-SELDAG-NEXT:    mov z0.d, z2.d
+; CHECK-SELDAG-NEXT:    ret
+; CHECK-FASTISEL:       // %bb.0:
+; CHECK-FASTISEL-NEXT:    str    x29, [sp, #-16]
+; CHECK-FASTISEL-NEXT:    addvl    sp, sp, #-1
+; CHECK-FASTISEL-NEXT:    str    z1, [sp]
+; CHECK-FASTISEL-NEXT:    mov    z1.d, z0.d
+; CHECK-FASTISEL-NEXT:    ldr    z0, [sp]
+; CHECK-FASTISEL-NEXT:    rev    z0.s, z0.s
+; CHECK-FASTISEL-NEXT:    rev    z1.s, z1.s
+; CHECK-FASTISEL-NEXT:    addvl    sp, sp, #1
+; CHECK-FASTISEL-NEXT:    ldr    x29, [sp], #16
+; CHECK-FASTISEL-NEXT:    ret
+
+  %res = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> %a)
+  ret <vscale x 8 x i32> %res
+}
+
+; Verify splitvec type legalisation works as expected.
+define <vscale x 16 x float> @reverse_nxv16f32(<vscale x 16 x float> %a) #0 {
+; CHECK-LABEL: reverse_nxv16f32:
+; CHECK-SELDAG:       // %bb.0:
+; CHECK-SELDAG-NEXT:    rev z5.s, z3.s
+; CHECK-SELDAG-NEXT:    rev z4.s, z2.s
+; CHECK-SELDAG-NEXT:    rev z2.s, z1.s
+; CHECK-SELDAG-NEXT:    rev z3.s, z0.s
+; CHECK-SELDAG-NEXT:    mov z0.d, z5.d
+; CHECK-SELDAG-NEXT:    mov z1.d, z4.d
+; CHECK-SELDAG-NEXT:    ret
+; CHECK-FASTISEL:       // %bb.0:
+; CHECK-FASTISEL-NEXT:    str    x29, [sp, #-16]
+; CHECK-FASTISEL-NEXT:    addvl    sp, sp, #-2
+; CHECK-FASTISEL-NEXT:    str    z3, [sp, #1, mul vl]
+; CHECK-FASTISEL-NEXT:    str    z2, [sp]
+; CHECK-FASTISEL-NEXT:    mov    z2.d, z1.d
+; CHECK-FASTISEL-NEXT:    ldr    z1, [sp]
+; CHECK-FASTISEL-NEXT:    mov    z3.d, z0.d
+; CHECK-FASTISEL-NEXT:    ldr    z0, [sp, #1, mul vl]
+; CHECK-FASTISEL-NEXT:    rev    z0.s, z0.s
+; CHECK-FASTISEL-NEXT:    rev    z1.s, z1.s
+; CHECK-FASTISEL-NEXT:    rev    z2.s, z2.s
+; CHECK-FASTISEL-NEXT:    rev    z3.s, z3.s
+; CHECK-FASTISEL-NEXT:    addvl    sp, sp, #2
+; CHECK-FASTISEL-NEXT:    ldr    x29, [sp], #16
+; CHECK-FASTISEL-NEXT:    ret
+
+  %res = call <vscale x 16 x float> @llvm.experimental.vector.reverse.nxv16f32(<vscale x 16 x float> %a)
+  ret <vscale x 16 x float> %res
+}
+
+
+declare <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1>)
+declare <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1>)
+declare <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1>)
+declare <vscale x 32 x i1> @llvm.experimental.vector.reverse.nxv32i1(<vscale x 32 x i1>)
+declare <vscale x 2 x i8> @llvm.experimental.vector.reverse.nxv2i8(<vscale x 2 x i8>)
+declare <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32>)
+declare <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64>)
+declare <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 16 x float> @llvm.experimental.vector.reverse.nxv16f32(<vscale x 16 x float>)
+declare <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double>)
+
+
+attributes #0 = { nounwind "target-features"="+sve" }

diff  --git a/llvm/test/CodeGen/X86/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/X86/named-vector-shuffle-reverse.ll
new file mode 100644
index 000000000000..b8c85fc5d2ad
--- /dev/null
+++ b/llvm/test/CodeGen/X86/named-vector-shuffle-reverse.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs  < %s 2>%t | FileCheck %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+
+target triple = "x86_64-unknown-unknown"
+
+;
+; VECTOR_REVERSE
+;
+
+define <16 x i8> @reverse_v16i8(<16 x i8> %a) #0 {
+; CHECK-LABEL: reverse_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pxor      %xmm1, %xmm1
+; CHECK-NEXT:    movdqa    %xmm0, %xmm2
+; CHECK-NEXT:    punpcklbw  %xmm1, %xmm
+; CHECK-NEXT:    pshufd     $78, %xmm2, %xmm2
+; CHECK-NEXT:    pshuflw    $27, %xmm2, %xmm2
+; CHECK-NEXT:    pshufhw    $27, %xmm2, %xmm2
+; CHECK-NEXT:    punpckhbw  %xmm1, %xmm0
+; CHECK-NEXT:    pshufd     $78, %xmm0, %xmm0
+; CHECK-NEXT:    pshuflw    $27, %xmm0, %xmm0
+; CHECK-NEXT:    pshufhw    $27, %xmm0, %xmm0
+; CHECK-NEXT:    packuswb   %xmm2, %xmm0
+; CHECK-NEXT:    retq
+
+  %res = call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> %a)
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @reverse_v8i16(<8 x i16> %a) #0 {
+; CHECK-LABEL: reverse_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pshufd     $78, %xmm0, %xmm
+; CHECK-NEXT:    pshuflw    $27, %xmm0, %xmm0
+; CHECK-NEXT:    pshufhw    $27, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> %a)
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @reverse_v4i32(<4 x i32> %a) #0 {
+; CHECK-LABEL: reverse_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pshufd    $27, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32> %a)
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @reverse_v2i64(<2 x i64> %a) #0 {
+; CHECK-LABEL: reverse_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pshufd    $78, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64> %a)
+  ret <2 x i64> %res
+}
+
+define <4 x float> @reverse_v4f32(<4 x float> %a) #0 {
+; CHECK-LABEL: reverse_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    shufps    $27, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float> %a)
+  ret <4 x float> %res
+}
+
+define <2 x double> @reverse_v2f64(<2 x double> %a) #0 {
+; CHECK-LABEL: reverse_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    shufps    $78, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double> %a)
+  ret <2 x double> %res
+}
+
+; Verify promote type legalisation works as expected.
+define <2 x i8> @reverse_v2i8(<2 x i8> %a) #0 {
+; CHECK-LABEL: reverse_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movdqa   %xmm0, %xmm1
+; CHECK-NEXT:    psrlw    $8, %xmm1
+; CHECK-NEXT:    psllw    $8, %xmm0
+; CHECK-NEXT:    por      %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a)
+  ret <2 x i8> %res
+}
+
+; Verify splitvec type legalisation works as expected.
+define <8 x i32> @reverse_v8i32(<8 x i32> %a) #0 {
+; CHECK-LABEL: reverse_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pshufd    $27, %xmm1, %xmm2
+; CHECK-NEXT:    pshufd    $27, %xmm0, %xmm1
+; CHECK-NEXT:    movdqa %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32> %a)
+  ret <8 x i32> %res
+}
+
+; Verify splitvec type legalisation works as expected.
+define <16 x float> @reverse_v16f32(<16 x float> %a) #0 {
+; CHECK-LABEL: reverse_v16f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movaps    %xmm1, %xmm4
+; CHECK-NEXT:    movaps    %xmm0, %xmm5
+; CHECK-NEXT:    shufps    $27, %xmm3, %xmm
+; CHECK-NEXT:    shufps    $27, %xmm2, %xmm2
+; CHECK-NEXT:    shufps    $27, %xmm1, %xmm4
+; CHECK-NEXT:    shufps    $27, %xmm0, %xmm5
+; CHECK-NEXT:    movaps    %xmm3, %xmm0
+; CHECK-NEXT:    movaps    %xmm2, %xmm1
+; CHECK-NEXT:    movaps    %xmm4, %xmm2
+; CHECK-NEXT:    movaps    %xmm5, %xmm3
+
+  %res = call <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float> %a)
+  ret <16 x float> %res
+}
+
+
+declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8>)
+declare <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8>)
+declare <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32>)
+declare <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32>)
+declare <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64>)
+declare <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half>)
+declare <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float>)
+declare <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float>)
+declare <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double>)
+
+attributes #0 = { nounwind }

diff  --git a/llvm/test/Transforms/InstSimplify/named-vector-shuffle-reverse.ll b/llvm/test/Transforms/InstSimplify/named-vector-shuffle-reverse.ll
new file mode 100644
index 000000000000..076736ec15ad
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/named-vector-shuffle-reverse.ll
@@ -0,0 +1,17 @@
+; RUN: opt  -instsimplify -S < %s 2>%t | FileCheck %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+; Test back to back reverse shuffles are eliminated.
+define <vscale x 4 x i32> @shuffle_b2b_reverse(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: @shuffle_b2b_reverse(
+; CHECK: ret <vscale x 4 x i32> %a
+  %rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
+  %rev.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %rev)
+  ret <vscale x 4 x i32> %rev.rev
+}
+
+declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32>)