[llvm] 2d728bb - [CodeGen][SelectionDAG]Add new intrinsic experimental.vector.reverse
Caroline Concatto via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 15 05:41:41 PST 2021
Author: Caroline Concatto
Date: 2021-02-15T13:39:43Z
New Revision: 2d728bbff5c688284b8b8306ecfd3000b0ab8bb1
URL: https://github.com/llvm/llvm-project/commit/2d728bbff5c688284b8b8306ecfd3000b0ab8bb1
DIFF: https://github.com/llvm/llvm-project/commit/2d728bbff5c688284b8b8306ecfd3000b0ab8bb1.diff
LOG: [CodeGen][SelectionDAG]Add new intrinsic experimental.vector.reverse
This patch adds a new intrinsic experimental.vector.reduce that takes a single
vector and returns a vector of matching type but with the original lane order
reversed. For example:
```
vector.reverse(<A,B,C,D>) ==> <D,C,B,A>
```
The new intrinsic supports fixed and scalable vectors types.
The fixed-width vector relies on shufflevector to maintain existing behaviour.
Scalable vector uses the new ISD node - VECTOR_REVERSE.
This new intrinsic is one of the named shufflevector intrinsics proposed on the
mailing-list in the RFC at [1].
Patch by Paul Walker (@paulwalker-arm).
[1] https://lists.llvm.org/pipermail/llvm-dev/2020-November/146864.html
Differential Revision: https://reviews.llvm.org/D94883
Added:
llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll
llvm/test/CodeGen/X86/named-vector-shuffle-reverse.ll
llvm/test/Transforms/InstSimplify/named-vector-shuffle-reverse.ll
Modified:
llvm/docs/LangRef.rst
llvm/include/llvm/CodeGen/ISDOpcodes.h
llvm/include/llvm/IR/Intrinsics.td
llvm/include/llvm/Target/TargetSelectionDAG.td
llvm/lib/Analysis/InstructionSimplify.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
llvm/lib/Target/AArch64/AArch64FastISel.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Removed:
################################################################################
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 7918e5fd6e4f..133edf8ccf5e 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -16233,6 +16233,33 @@ runtime, then the result vector is undefined. The ``idx`` parameter must be a
vector index constant type (for most targets this will be an integer pointer
type).
+'``llvm.experimental.vector.reverse``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+ declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a)
+ declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.vector.reverse.*``' intrinsics reverse a vector.
+The intrinsic takes a single vector and returns a vector of matching type but
+with the original lane order reversed. These intrinsics work for both fixed
+and scalable vectors. While this intrinsic is marked as experimental the
+recommended way to express reverse operations for fixed-width vectors is still
+to use a shufflevector, as that may allow for more optimization opportunities.
+
+Arguments:
+""""""""""
+
+The argument to this intrinsic must be a vector.
+
Matrix Intrinsics
-----------------
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 52bb7d99f9e5..e09dcaec46b3 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -540,6 +540,11 @@ enum NodeType {
/// vector, but not the other way around.
EXTRACT_SUBVECTOR,
+ /// VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR,
+ /// whose elements are shuffled using the following algorithm:
+ /// RESULT[i] = VECTOR[VECTOR.ElementCount - 1 - i]
+ VECTOR_REVERSE,
+
/// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as
/// VEC1/VEC2. A VECTOR_SHUFFLE node also contains an array of constant int
/// values that indicate which value (or undef) each result element will
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 409e3070165c..99e18d001100 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1635,6 +1635,12 @@ def int_preserve_struct_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty],
ImmArg<ArgIndex<1>>,
ImmArg<ArgIndex<2>>]>;
+//===------------ Intrinsics to perform common vector shuffles ------------===//
+
+def int_experimental_vector_reverse : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>],
+ [IntrNoMem]>;
+
//===---------- Intrinsics to query properties of scalable vectors --------===//
def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index b59cdbfbcddc..b612de31beb8 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -254,6 +254,9 @@ def SDTFPVecReduce : SDTypeProfile<1, 1, [ // FP vector reduction
SDTCisFP<0>, SDTCisVec<1>
]>;
+def SDTVecReverse : SDTypeProfile<1, 1, [ // vector reverse
+ SDTCisVec<0>, SDTCisSameAs<0,1>
+]>;
def SDTSubVecExtract : SDTypeProfile<1, 2, [// subvector extract
SDTCisSubVecOfVec<0,1>, SDTCisInt<2>
@@ -651,6 +654,7 @@ def ist : SDNode<"ISD::STORE" , SDTIStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def vector_shuffle : SDNode<"ISD::VECTOR_SHUFFLE", SDTVecShuffle, []>;
+def vector_reverse : SDNode<"ISD::VECTOR_REVERSE", SDTVecReverse>;
def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>;
def splat_vector : SDNode<"ISD::SPLAT_VECTOR", SDTypeProfile<1, 1, []>, []>;
def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index c40e5c36cdc7..1faf0092e874 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5373,6 +5373,12 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
return Op0;
break;
}
+ case Intrinsic::experimental_vector_reverse:
+ // experimental.vector.reverse(experimental.vector.reverse(x)) -> x
+ if (match(Op0,
+ m_Intrinsic<Intrinsic::experimental_vector_reverse>(m_Value(X))))
+ return X;
+ break;
default:
break;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index c3250e8f43b2..6a612a48e6da 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -96,6 +96,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_SUBVECTOR:
Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::VECTOR_REVERSE:
+ Res = PromoteIntRes_VECTOR_REVERSE(N); break;
case ISD::VECTOR_SHUFFLE:
Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
case ISD::INSERT_VECTOR_ELT:
@@ -4662,6 +4664,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
return DAG.getBuildVector(NOutVT, dl, Ops);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_REVERSE(SDNode *N) {
+ SDLoc dl(N);
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ EVT OutVT = V0.getValueType();
+
+ return DAG.getNode(ISD::VECTOR_REVERSE, dl, OutVT, V0);
+}
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index c77a48bb58fa..8e52ba8e46f0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -298,6 +298,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo);
SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_REVERSE(SDNode *N);
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
@@ -834,6 +835,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
SDValue &Hi);
void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 57cb364f1939..dc0614a3d938 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -930,6 +930,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SETCC:
SplitVecRes_SETCC(N, Lo, Hi);
break;
+ case ISD::VECTOR_REVERSE:
+ SplitVecRes_VECTOR_REVERSE(N, Lo, Hi);
+ break;
case ISD::VECTOR_SHUFFLE:
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
break;
@@ -5492,3 +5495,13 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
Ops[Idx] = FillVal;
return DAG.getBuildVector(NVT, dl, Ops);
}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue InLo, InHi;
+ GetSplitVector(N->getOperand(0), InLo, InHi);
+ SDLoc DL(N);
+
+ Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi);
+ Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 33505f4f6a59..387b5e6519e9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7025,6 +7025,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
return;
}
+ case Intrinsic::experimental_vector_reverse:
+ visitVectorReverse(I);
+ return;
}
}
@@ -10836,6 +10839,29 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
}
}
+void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ SDLoc DL = getCurSDLoc();
+ SDValue V = getValue(I.getOperand(0));
+ assert(VT == V.getValueType() && "Malformed vector.reverse!");
+
+ if (VT.isScalableVector()) {
+ setValue(&I, DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V));
+ return;
+ }
+
+ // Use VECTOR_SHUFFLE for the fixed-length vector
+ // to maintain existing behavior.
+ SmallVector<int, 8> Mask;
+ unsigned NumElts = VT.getVectorMinNumElements();
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(NumElts - 1 - i);
+
+ setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask));
+}
+
void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 8f6e98c40161..d65e43ddbc1e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -773,6 +773,7 @@ class SelectionDAGBuilder {
void visitGCResult(const GCResultInst &I);
void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
+ void visitVectorReverse(const CallInst &I);
void visitUserOp1(const Instruction &I) {
llvm_unreachable("UserOp1 should not exist at instruction selection time!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 62352054bf7b..4cbf8a83e8c7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -289,6 +289,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
case ISD::SPLAT_VECTOR: return "splat_vector";
+ case ISD::VECTOR_REVERSE: return "vector_reverse";
case ISD::CARRY_FALSE: return "carry_false";
case ISD::ADDC: return "addc";
case ISD::ADDE: return "adde";
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 9801036653f7..ca4705cc732a 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3894,7 +3894,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
return false;
// Vectors (of > 1 lane) in big endian need tricky handling.
- if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
+ if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
!Subtarget->isLittleEndian())
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dd91d31422c6..e866fc527a35 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1853,7 +1853,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::CLASTB_N)
MAKE_CASE(AArch64ISD::LASTA)
MAKE_CASE(AArch64ISD::LASTB)
- MAKE_CASE(AArch64ISD::REV)
MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
MAKE_CASE(AArch64ISD::TBL)
MAKE_CASE(AArch64ISD::FADD_PRED)
@@ -3594,7 +3593,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_rev:
- return DAG.getNode(AArch64ISD::REV, dl, Op.getValueType(),
+ return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_tbl:
return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 50c3bed74a40..c9c7b6fbe8fd 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -292,7 +292,6 @@ enum NodeType : unsigned {
CLASTB_N,
LASTA,
LASTB,
- REV,
TBL,
// Floating-point reductions.
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index e09b8401c0e0..e562b1efa10f 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -249,9 +249,6 @@ def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithIn
def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;
def AArch64fadda_p : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>;
-def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
-def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;
-
def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;
@@ -587,8 +584,8 @@ let Predicates = [HasSVE] in {
defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>;
defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", int_aarch64_sve_revw>;
- defm REV_PP : sve_int_perm_reverse_p<"rev", AArch64rev>;
- defm REV_ZZ : sve_int_perm_reverse_z<"rev", AArch64rev>;
+ defm REV_PP : sve_int_perm_reverse_p<"rev", vector_reverse>;
+ defm REV_ZZ : sve_int_perm_reverse_z<"rev", vector_reverse>;
defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>;
defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>;
diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
new file mode 100644
index 000000000000..7687573acfe3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
@@ -0,0 +1,230 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-SELDAG %s
+; RUN: llc -verify-machineinstrs -O0 < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-FASTISEL %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; VECTOR_REVERSE
+;
+
+define <16 x i8> @reverse_v16i8(<16 x i8> %a) #0 {
+; CHECK-LABEL: .LCPI0_0:
+; CHECK: .byte 15 // 0xf
+; CHECK-NEXT: .byte 14 // 0xe
+; CHECK-NEXT: .byte 13 // 0xd
+; CHECK-NEXT: .byte 12 // 0xc
+; CHECK-NEXT: .byte 11 // 0xb
+; CHECK-NEXT: .byte 10 // 0xa
+; CHECK-NEXT: .byte 9 // 0x9
+; CHECK-NEXT: .byte 8 // 0x8
+; CHECK-NEXT: .byte 7 // 0x7
+; CHECK-NEXT: .byte 6 // 0x6
+; CHECK-NEXT: .byte 5 // 0x5
+; CHECK-NEXT: .byte 4 // 0x4
+; CHECK-NEXT: .byte 3 // 0x3
+; CHECK-NEXT: .byte 2 // 0x2
+; CHECK-NEXT: .byte 1 // 0x1
+; CHECK-NEXT: .byte 0 // 0x0
+; CHECK-LABEL: reverse_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI0_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
+; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-NEXT: ret
+
+ %res = call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> %a)
+ ret <16 x i8> %res
+}
+
+define <8 x i16> @reverse_v8i16(<8 x i16> %a) #0 {
+; CHECK-LABEL: .LCPI1_0:
+; CHECK: .byte 14 // 0xe
+; CHECK-NEXT: .byte 15 // 0xf
+; CHECK-NEXT: .byte 12 // 0xc
+; CHECK-NEXT: .byte 13 // 0xd
+; CHECK-NEXT: .byte 10 // 0xa
+; CHECK-NEXT: .byte 11 // 0xb
+; CHECK-NEXT: .byte 8 // 0x8
+; CHECK-NEXT: .byte 9 // 0x9
+; CHECK-NEXT: .byte 6 // 0x6
+; CHECK-NEXT: .byte 7 // 0x7
+; CHECK-NEXT: .byte 4 // 0x4
+; CHECK-NEXT: .byte 5 // 0x5
+; CHECK-NEXT: .byte 2 // 0x2
+; CHECK-NEXT: .byte 3 // 0x3
+; CHECK-NEXT: .byte 0 // 0x0
+; CHECK-NEXT: .byte 1 // 0x1
+; CHECK-LABEL: reverse_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI1_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
+; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-NEXT: ret
+
+ %res = call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> %a)
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @reverse_v4i32(<4 x i32> %a) #0 {
+; CHECK-LABEL: reverse_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev64 v0.4s, v0.4s
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
+
+ %res = call <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32> %a)
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @reverse_v2i64(<2 x i64> %a) #0 {
+; CHECK-LABEL: reverse_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
+
+ %res = call <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64> %a)
+ ret <2 x i64> %res
+}
+
+define <8 x half> @reverse_v8f16(<8 x half> %a) #0 {
+; CHECK-LABEL: .LCPI4_0:
+; CHECK: .byte 14 // 0xe
+; CHECK-NEXT: .byte 15 // 0xf
+; CHECK-NEXT: .byte 12 // 0xc
+; CHECK-NEXT: .byte 13 // 0xd
+; CHECK-NEXT: .byte 10 // 0xa
+; CHECK-NEXT: .byte 11 // 0xb
+; CHECK-NEXT: .byte 8 // 0x8
+; CHECK-NEXT: .byte 9 // 0x9
+; CHECK-NEXT: .byte 6 // 0x6
+; CHECK-NEXT: .byte 7 // 0x7
+; CHECK-NEXT: .byte 4 // 0x4
+; CHECK-NEXT: .byte 5 // 0x5
+; CHECK-NEXT: .byte 2 // 0x2
+; CHECK-NEXT: .byte 3 // 0x3
+; CHECK-NEXT: .byte 0 // 0x0
+; CHECK-NEXT: .byte 1 // 0x1
+; CHECK-LABEL: reverse_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI4_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-NEXT: ret
+
+ %res = call <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half> %a)
+ ret <8 x half> %res
+}
+
+define <4 x float> @reverse_v4f32(<4 x float> %a) #0 {
+; CHECK-LABEL: reverse_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev64 v0.4s, v0.4s
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
+
+ %res = call <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float> %a)
+ ret <4 x float> %res
+}
+
+define <2 x double> @reverse_v2f64(<2 x double> %a) #0 {
+; CHECK-LABEL: reverse_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
+
+ %res = call <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double> %a)
+ ret <2 x double> %res
+}
+
+; Verify promote type legalisation works as expected.
+define <2 x i8> @reverse_v2i8(<2 x i8> %a) #0 {
+; CHECK-LABEL: reverse_v2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev64 v0.2s, v0.2s
+; CHECK-NEXT: ret
+
+ %res = call <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a)
+ ret <2 x i8> %res
+}
+
+; Verify splitvec type legalisation works as expected.
+define <8 x i32> @reverse_v8i32(<8 x i32> %a) #0 {
+; CHECK-LABEL: reverse_v8i32:
+; CHECK-SELDAG: // %bb.0:
+; CHECK-SELDAG-NEXT: rev64 v1.4s, v1.4s
+; CHECK-SELDAG-NEXT: rev64 v2.4s, v0.4s
+; CHECK-SELDAG-NEXT: ext v0.16b, v1.16b, v1.16b, #8
+; CHECK-SELDAG-NEXT: ext v1.16b, v2.16b, v2.16b, #8
+; CHECK-SELDAG-NEXT: ret
+; CHECK-FASTISEL: // %bb.0:
+; CHECK-FASTISEL-NEXT: sub sp, sp, #16
+; CHECK-FASTISEL-NEXT: str q1, [sp]
+; CHECK-FASTISEL-NEXT: mov v1.16b, v0.16b
+; CHECK-FASTISEL-NEXT: ldr q0, [sp]
+; CHECK-FASTISEL-NEXT: rev64 v0.4s, v0.4s
+; CHECK-FASTISEL-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-FASTISEL-NEXT: rev64 v1.4s, v1.4s
+; CHECK-FASTISEL-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-FASTISEL-NEXT: add sp, sp, #16
+; CHECK-FASTISEL-NEXT: ret
+
+ %res = call <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32> %a)
+ ret <8 x i32> %res
+}
+
+; Verify splitvec type legalisation works as expected.
+define <16 x float> @reverse_v16f32(<16 x float> %a) #0 {
+; CHECK-LABEL: reverse_v16f32:
+; CHECK-SELDAG: // %bb.0:
+; CHECK-SELDAG-NEXT: rev64 v3.4s, v3.4s
+; CHECK-SELDAG-NEXT: rev64 v2.4s, v2.4s
+; CHECK-SELDAG-NEXT: rev64 v4.4s, v1.4s
+; CHECK-SELDAG-NEXT: rev64 v5.4s, v0.4s
+; CHECK-SELDAG-NEXT: ext v0.16b, v3.16b, v3.16b, #8
+; CHECK-SELDAG-NEXT: ext v1.16b, v2.16b, v2.16b, #8
+; CHECK-SELDAG-NEXT: ext v2.16b, v4.16b, v4.16b, #8
+; CHECK-SELDAG-NEXT: ext v3.16b, v5.16b, v5.16b, #8
+; CHECK-SELDAG-NEXT: ret
+; CHECK-FASTISEL: // %bb.0:
+; CHECK-FASTISEL-NEXT: sub sp, sp, #32
+; CHECK-FASTISEL-NEXT: str q3, [sp, #16]
+; CHECK-FASTISEL-NEXT: str q2, [sp]
+; CHECK-FASTISEL-NEXT: mov v2.16b, v1.16b
+; CHECK-FASTISEL-NEXT: ldr q1, [sp]
+; CHECK-FASTISEL-NEXT: mov v3.16b, v0.16b
+; CHECK-FASTISEL-NEXT: ldr q0, [sp, #16]
+; CHECK-FASTISEL-NEXT: rev64 v0.4s, v0.4s
+; CHECK-FASTISEL-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-FASTISEL-NEXT: rev64 v1.4s, v1.4s
+; CHECK-FASTISEL-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-FASTISEL-NEXT: rev64 v2.4s, v2.4s
+; CHECK-FASTISEL-NEXT: ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-FASTISEL-NEXT: rev64 v3.4s, v3.4s
+; CHECK-FASTISEL-NEXT: ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-FASTISEL-NEXT: add sp, sp, #32
+; CHECK-FASTISEL-NEXT: ret
+
+ %res = call <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float> %a)
+ ret <16 x float> %res
+}
+
+
+declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8>)
+declare <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8>)
+declare <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32>)
+declare <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32>)
+declare <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64>)
+declare <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half>)
+declare <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float>)
+declare <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float>)
+declare <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double>)
+
+attributes #0 = { nounwind "target-features"="+neon" }
diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll
new file mode 100644
index 000000000000..ef67140c845b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll
@@ -0,0 +1,238 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-SELDAG %s
+; RUN: llc -verify-machineinstrs -O0 < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-FASTISEL %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; VECTOR_REVERSE - PPR
+;
+
+define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) #0 {
+; CHECK-LABEL: reverse_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev p0.d, p0.d
+; CHECK-NEXT: ret
+
+ %res = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %a)
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) #0 {
+; CHECK-LABEL: reverse_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev p0.s, p0.s
+; CHECK-NEXT: ret
+
+ %res = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %a)
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) #0 {
+; CHECK-LABEL: reverse_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev p0.h, p0.h
+; CHECK-NEXT: ret
+
+ %res = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %a)
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) #0 {
+; CHECK-LABEL: reverse_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev p0.b, p0.b
+; CHECK-NEXT: ret
+
+ %res = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %a)
+ ret <vscale x 16 x i1> %res
+}
+
+; Verify splitvec type legalisation works as expected.
+define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) #0 {
+; CHECK-LABEL: reverse_nxv32i1:
+; CHECK-SELDAG: // %bb.0:
+; CHECK-SELDAG-NEXT: rev p2.b, p1.b
+; CHECK-SELDAG-NEXT: rev p1.b, p0.b
+; CHECK-SELDAG-NEXT: mov p0.b, p2.b
+; CHECK-SELDAG-NEXT: ret
+; CHECK-FASTISEL: // %bb.0:
+; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]
+; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1
+; CHECK-FASTISEL-NEXT: str p1, [sp, #7, mul vl]
+; CHECK-FASTISEL-NEXT: mov p1.b, p0.b
+; CHECK-FASTISEL-NEXT: ldr p0, [sp, #7, mul vl]
+; CHECK-FASTISEL-NEXT: rev p0.b, p0.b
+; CHECK-FASTISEL-NEXT: rev p1.b, p1.b
+; CHECK-FASTISEL-NEXT: addvl sp, sp, #1
+; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16
+; CHECK-FASTISEL-NEXT: ret
+
+ %res = call <vscale x 32 x i1> @llvm.experimental.vector.reverse.nxv32i1(<vscale x 32 x i1> %a)
+ ret <vscale x 32 x i1> %res
+}
+
+;
+; VECTOR_REVERSE - ZPR
+;
+
+define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: reverse_nxv16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev z0.b, z0.b
+; CHECK-NEXT: ret
+
+ %res = call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @reverse_nxv8i16(<vscale x 8 x i16> %a) #0 {
+; CHECK-LABEL: reverse_nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev z0.h, z0.h
+; CHECK-NEXT: ret
+
+ %res = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @reverse_nxv4i32(<vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: reverse_nxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev z0.s, z0.s
+; CHECK-NEXT: ret
+
+ %res = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @reverse_nxv2i64(<vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: reverse_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev z0.d, z0.d
+; CHECK-NEXT: ret
+
+ %res = call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @reverse_nxv8f16(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: reverse_nxv8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev z0.h, z0.h
+; CHECK-NEXT: ret
+
+ %res = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> %a)
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @reverse_nxv4f32(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: reverse_nxv4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev z0.s, z0.s
+; CHECK-NEXT: ret
+
+ %res = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> %a) ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @reverse_nxv2f64(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: reverse_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev z0.d, z0.d
+; CHECK-NEXT: ret
+
+ %res = call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> %a)
+ ret <vscale x 2 x double> %res
+}
+
+; Verify promote type legalisation works as expected.
+define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) #0 {
+; CHECK-LABEL: reverse_nxv2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rev z0.d, z0.d
+; CHECK-NEXT: ret
+
+ %res = call <vscale x 2 x i8> @llvm.experimental.vector.reverse.nxv2i8(<vscale x 2 x i8> %a)
+ ret <vscale x 2 x i8> %res
+}
+
+; Verify splitvec type legalisation works as expected.
+define <vscale x 8 x i32> @reverse_nxv8i32(<vscale x 8 x i32> %a) #0 {
+; CHECK-LABEL: reverse_nxv8i32:
+; CHECK-SELDAG: // %bb.0:
+; CHECK-SELDAG-NEXT: rev z2.s, z1.s
+; CHECK-SELDAG-NEXT: rev z1.s, z0.s
+; CHECK-SELDAG-NEXT: mov z0.d, z2.d
+; CHECK-SELDAG-NEXT: ret
+; CHECK-FASTISEL: // %bb.0:
+; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]
+; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1
+; CHECK-FASTISEL-NEXT: str z1, [sp]
+; CHECK-FASTISEL-NEXT: mov z1.d, z0.d
+; CHECK-FASTISEL-NEXT: ldr z0, [sp]
+; CHECK-FASTISEL-NEXT: rev z0.s, z0.s
+; CHECK-FASTISEL-NEXT: rev z1.s, z1.s
+; CHECK-FASTISEL-NEXT: addvl sp, sp, #1
+; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16
+; CHECK-FASTISEL-NEXT: ret
+
+ %res = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> %a)
+ ret <vscale x 8 x i32> %res
+}
+
+; Verify splitvec type legalisation works as expected.
+define <vscale x 16 x float> @reverse_nxv16f32(<vscale x 16 x float> %a) #0 {
+; CHECK-LABEL: reverse_nxv16f32:
+; CHECK-SELDAG: // %bb.0:
+; CHECK-SELDAG-NEXT: rev z5.s, z3.s
+; CHECK-SELDAG-NEXT: rev z4.s, z2.s
+; CHECK-SELDAG-NEXT: rev z2.s, z1.s
+; CHECK-SELDAG-NEXT: rev z3.s, z0.s
+; CHECK-SELDAG-NEXT: mov z0.d, z5.d
+; CHECK-SELDAG-NEXT: mov z1.d, z4.d
+; CHECK-SELDAG-NEXT: ret
+; CHECK-FASTISEL: // %bb.0:
+; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]
+; CHECK-FASTISEL-NEXT: addvl sp, sp, #-2
+; CHECK-FASTISEL-NEXT: str z3, [sp, #1, mul vl]
+; CHECK-FASTISEL-NEXT: str z2, [sp]
+; CHECK-FASTISEL-NEXT: mov z2.d, z1.d
+; CHECK-FASTISEL-NEXT: ldr z1, [sp]
+; CHECK-FASTISEL-NEXT: mov z3.d, z0.d
+; CHECK-FASTISEL-NEXT: ldr z0, [sp, #1, mul vl]
+; CHECK-FASTISEL-NEXT: rev z0.s, z0.s
+; CHECK-FASTISEL-NEXT: rev z1.s, z1.s
+; CHECK-FASTISEL-NEXT: rev z2.s, z2.s
+; CHECK-FASTISEL-NEXT: rev z3.s, z3.s
+; CHECK-FASTISEL-NEXT: addvl sp, sp, #2
+; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16
+; CHECK-FASTISEL-NEXT: ret
+
+ %res = call <vscale x 16 x float> @llvm.experimental.vector.reverse.nxv16f32(<vscale x 16 x float> %a)
+ ret <vscale x 16 x float> %res
+}
+
+
+declare <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1>)
+declare <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1>)
+declare <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1>)
+declare <vscale x 32 x i1> @llvm.experimental.vector.reverse.nxv32i1(<vscale x 32 x i1>)
+declare <vscale x 2 x i8> @llvm.experimental.vector.reverse.nxv2i8(<vscale x 2 x i8>)
+declare <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32>)
+declare <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64>)
+declare <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 16 x float> @llvm.experimental.vector.reverse.nxv16f32(<vscale x 16 x float>)
+declare <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double>)
+
+
+attributes #0 = { nounwind "target-features"="+sve" }
diff --git a/llvm/test/CodeGen/X86/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/X86/named-vector-shuffle-reverse.ll
new file mode 100644
index 000000000000..b8c85fc5d2ad
--- /dev/null
+++ b/llvm/test/CodeGen/X86/named-vector-shuffle-reverse.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs < %s 2>%t | FileCheck %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+
+target triple = "x86_64-unknown-unknown"
+
+;
+; VECTOR_REVERSE
+;
+
+define <16 x i8> @reverse_v16i8(<16 x i8> %a) #0 {
+; CHECK-LABEL: reverse_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: punpcklbw %xmm1, %xmm
+; CHECK-NEXT: pshufd $78, %xmm2, %xmm2
+; CHECK-NEXT: pshuflw $27, %xmm2, %xmm2
+; CHECK-NEXT: pshufhw $27, %xmm2, %xmm2
+; CHECK-NEXT: punpckhbw %xmm1, %xmm0
+; CHECK-NEXT: pshufd $78, %xmm0, %xmm0
+; CHECK-NEXT: pshuflw $27, %xmm0, %xmm0
+; CHECK-NEXT: pshufhw $27, %xmm0, %xmm0
+; CHECK-NEXT: packuswb %xmm2, %xmm0
+; CHECK-NEXT: retq
+
+ %res = call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> %a)
+ ret <16 x i8> %res
+}
+
+define <8 x i16> @reverse_v8i16(<8 x i16> %a) #0 {
+; CHECK-LABEL: reverse_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pshufd $78, %xmm0, %xmm
+; CHECK-NEXT: pshuflw $27, %xmm0, %xmm0
+; CHECK-NEXT: pshufhw $27, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> %a)
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @reverse_v4i32(<4 x i32> %a) #0 {
+; CHECK-LABEL: reverse_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pshufd $27, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32> %a)
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @reverse_v2i64(<2 x i64> %a) #0 {
+; CHECK-LABEL: reverse_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pshufd $78, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64> %a)
+ ret <2 x i64> %res
+}
+
+define <4 x float> @reverse_v4f32(<4 x float> %a) #0 {
+; CHECK-LABEL: reverse_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: shufps $27, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float> %a)
+ ret <4 x float> %res
+}
+
+define <2 x double> @reverse_v2f64(<2 x double> %a) #0 {
+; CHECK-LABEL: reverse_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: shufps $78, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double> %a)
+ ret <2 x double> %res
+}
+
+; Verify promote type legalisation works as expected.
+define <2 x i8> @reverse_v2i8(<2 x i8> %a) #0 {
+; CHECK-LABEL: reverse_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlw $8, %xmm1
+; CHECK-NEXT: psllw $8, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a)
+ ret <2 x i8> %res
+}
+
+; Verify splitvec type legalisation works as expected.
+define <8 x i32> @reverse_v8i32(<8 x i32> %a) #0 {
+; CHECK-LABEL: reverse_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pshufd $27, %xmm1, %xmm2
+; CHECK-NEXT: pshufd $27, %xmm0, %xmm1
+; CHECK-NEXT: movdqa %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32> %a)
+ ret <8 x i32> %res
+}
+
+; Verify splitvec type legalisation works as expected.
+define <16 x float> @reverse_v16f32(<16 x float> %a) #0 {
+; CHECK-LABEL: reverse_v16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movaps %xmm1, %xmm4
+; CHECK-NEXT: movaps %xmm0, %xmm5
+; CHECK-NEXT: shufps $27, %xmm3, %xmm
+; CHECK-NEXT: shufps $27, %xmm2, %xmm2
+; CHECK-NEXT: shufps $27, %xmm1, %xmm4
+; CHECK-NEXT: shufps $27, %xmm0, %xmm5
+; CHECK-NEXT: movaps %xmm3, %xmm0
+; CHECK-NEXT: movaps %xmm2, %xmm1
+; CHECK-NEXT: movaps %xmm4, %xmm2
+; CHECK-NEXT: movaps %xmm5, %xmm3
+
+ %res = call <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float> %a)
+ ret <16 x float> %res
+}
+
+
+declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8>)
+declare <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8>)
+declare <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32>)
+declare <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32>)
+declare <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64>)
+declare <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half>)
+declare <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float>)
+declare <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float>)
+declare <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double>)
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/InstSimplify/named-vector-shuffle-reverse.ll b/llvm/test/Transforms/InstSimplify/named-vector-shuffle-reverse.ll
new file mode 100644
index 000000000000..076736ec15ad
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/named-vector-shuffle-reverse.ll
@@ -0,0 +1,17 @@
+; RUN: opt -instsimplify -S < %s 2>%t | FileCheck %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+; Test back to back reverse shuffles are eliminated.
+define <vscale x 4 x i32> @shuffle_b2b_reverse(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: @shuffle_b2b_reverse(
+; CHECK: ret <vscale x 4 x i32> %a
+ %rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
+ %rev.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %rev)
+ ret <vscale x 4 x i32> %rev.rev
+}
+
+declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32>)
More information about the llvm-commits
mailing list