[llvm] 969d2d1 - [AArch64][GlobalISel] Add selection support for rev16, rev32, and rev64

Wed Jun 3 15:30:54 PDT 2020

Author: Jessica Paquette
Date: 2020-06-03T15:30:30-07:00
New Revision: 969d2d1ea9466143e7099040f5f0735cc81963b1

URL: https://github.com/llvm/llvm-project/commit/969d2d1ea9466143e7099040f5f0735cc81963b1
DIFF: https://github.com/llvm/llvm-project/commit/969d2d1ea9466143e7099040f5f0735cc81963b1.diff

LOG: [AArch64][GlobalISel] Add selection support for rev16, rev32, and rev64

This does three things:

1) Adds G_REV16, G_REV32, and G_REV64. These are equivalent to AArch64rev16,
   AArch64rev32, and AArch64rev64 respectively.

2) Adds support for producing G_REV64 in the postlegalizer combiner.
   We don't legalize any of the shuffles which could give us a G_REV32 or
   G_REV16 yet. Since the function for detecting the rev mask is lifted from
   AArch64ISelLowering, it should work for G_REV32 and G_REV16 when we get
   there.

3) Adds a selection test for a good portion of the patterns imported for the rev
   family. The only ones which are not tested are the ones with bitconvert.

This also does a little cleanup, and adds a struct for shuffle vector pseudo
matchdata. This lets us still use `applyShuffleVectorPseudo` rather than adding
a new function.

It should also make it a bit easier to port some of the other masks from
AArch64ISelLowering. (e.g. `isZIP_v_undef_Mask` and friends)

Differential Revision: https://reviews.llvm.org/D81112

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-rev.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-rev.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64Combine.td
    llvm/lib/Target/AArch64/AArch64InstrGISel.td
    llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 21d4450d43a3..0281f4ffce99 100644

--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -24,17 +24,26 @@ def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
   let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
 }
 
-def zip_matchdata : GIDefMatchData<"unsigned">;
+// Matchdata for combines which replace a G_SHUFFLE_VECTOR with a
+// target-specific opcode.
+def shuffle_matchdata : GIDefMatchData<"ShuffleVectorPseudo">;
+
+def rev : GICombineRule<
+  (defs root:$root, shuffle_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchREV(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
 def zip : GICombineRule<
-  (defs root:$root, zip_matchdata:$matchinfo),
+  (defs root:$root, shuffle_matchdata:$matchinfo),
   (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
          [{ return matchZip(*${root}, MRI, ${matchinfo}); }]),
   (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
 >;
 
-def uzp_matchdata : GIDefMatchData<"unsigned">;
 def uzp : GICombineRule<
-  (defs root:$root, uzp_matchdata:$matchinfo),
+  (defs root:$root, shuffle_matchdata:$matchinfo),
   (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
          [{ return matchUZP(*${root}, MRI, ${matchinfo}); }]),
   (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
@@ -42,7 +51,7 @@ def uzp : GICombineRule<
 
 // Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo
 // instruction.
-def shuffle_vector_pseudos : GICombineGroup<[zip, uzp]>;
+def shuffle_vector_pseudos : GICombineGroup<[rev, zip, uzp]>;
 
 def AArch64PostLegalizerCombinerHelper
     : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 650b0eee53c8..a4cb99005188 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -25,6 +25,27 @@ def G_ADD_LOW : AArch64GenericInstruction {
   let hasSideEffects = 0;
 }
 
+// Pseudo for a rev16 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_REV16 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+}
+
+// Pseudo for a rev32 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_REV32 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+}
+
+// Pseudo for a rev64 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_REV64 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+}
+
 // Represents an uzp1 instruction. Produced post-legalization from
 // G_SHUFFLE_VECTORs with appropriate masks.
 def G_UZP1 : AArch64GenericInstruction {
@@ -53,6 +74,9 @@ def G_ZIP2 : AArch64GenericInstruction {
   let InOperandList = (ins type0:$v1, type0:$v2);
 }
 
+def : GINodeEquiv<G_REV16, AArch64rev16>;
+def : GINodeEquiv<G_REV32, AArch64rev32>;
+def : GINodeEquiv<G_REV64, AArch64rev64>;
 def : GINodeEquiv<G_UZP1, AArch64uzp1>;
 def : GINodeEquiv<G_UZP2, AArch64uzp2>;
 def : GINodeEquiv<G_ZIP1, AArch64zip1>;

diff  --git a/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp
index baafe080764b..99f77b2e65b7 100644
--- a/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp
@@ -28,6 +28,48 @@
 
 using namespace llvm;
 
+/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
+///
+/// Used for matching target-supported shuffles before codegen.
+struct ShuffleVectorPseudo {
+  unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
+  Register Dst; ///< Destination register.
+  SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
+  ShuffleVectorPseudo(unsigned Opc, Register Dst,
+                      std::initializer_list<SrcOp> SrcOps)
+      : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
+  ShuffleVectorPseudo() {}
+};
+
+/// Check if a vector shuffle corresponds to a REV instruction with the
+/// specified blocksize.
+static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
+                      unsigned BlockSize) {
+  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
+         "Only possible block sizes for REV are: 16, 32, 64");
+  assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");
+
+  unsigned BlockElts = M[0] + 1;
+
+  // If the first shuffle index is UNDEF, be optimistic.
+  if (M[0] < 0)
+    BlockElts = BlockSize / EltSize;
+
+  if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
+    return false;
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    // Ignore undef indices.
+    if (M[i] < 0)
+      continue;
+    if (static_cast<unsigned>(M[i]) !=
+        (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
+      return false;
+  }
+
+  return true;
+}
+
 /// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
 /// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
 static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
@@ -62,41 +104,78 @@ static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
   return true;
 }
 
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
+/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
+static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
+                     ShuffleVectorPseudo &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+  Register Dst = MI.getOperand(0).getReg();
+  Register Src = MI.getOperand(1).getReg();
+  LLT Ty = MRI.getType(Dst);
+  unsigned EltSize = Ty.getScalarSizeInBits();
+
+  // Element size for a rev cannot be 64.
+  if (EltSize == 64)
+    return false;
+
+  unsigned NumElts = Ty.getNumElements();
+
+  // Try to produce G_REV64
+  if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) {
+    MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src});
+    return true;
+  }
+
+  // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support.
+  // This should be identical to above, but with a constant 32 and constant
+  // 16.
+  return false;
+}
+
 /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
 /// a G_UZP1 or G_UZP2 instruction.
 ///
 /// \param [in] MI - The shuffle vector instruction.
 /// \param [out] Opc - Either G_UZP1 or G_UZP2 on success.
 static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     unsigned &Opc) {
+                     ShuffleVectorPseudo &MatchInfo) {
   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
   unsigned WhichResult;
   ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
-  unsigned NumElts = MRI.getType(MI.getOperand(0).getReg()).getNumElements();
+  Register Dst = MI.getOperand(0).getReg();
+  unsigned NumElts = MRI.getType(Dst).getNumElements();
   if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
     return false;
-  Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
+  unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
+  Register V1 = MI.getOperand(1).getReg();
+  Register V2 = MI.getOperand(2).getReg();
+  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
   return true;
 }
 
 static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     unsigned &Opc) {
+                     ShuffleVectorPseudo &MatchInfo) {
   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
   unsigned WhichResult;
   ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
-  unsigned NumElts = MRI.getType(MI.getOperand(0).getReg()).getNumElements();
+  Register Dst = MI.getOperand(0).getReg();
+  unsigned NumElts = MRI.getType(Dst).getNumElements();
   if (!isZipMask(ShuffleMask, NumElts, WhichResult))
     return false;
-  Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
+  unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
+  Register V1 = MI.getOperand(1).getReg();
+  Register V2 = MI.getOperand(2).getReg();
+  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
   return true;
 }
 
 /// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
 /// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
-static bool applyShuffleVectorPseudo(MachineInstr &MI, unsigned Opc) {
+static bool applyShuffleVectorPseudo(MachineInstr &MI,
+                                     ShuffleVectorPseudo &MatchInfo) {
   MachineIRBuilder MIRBuilder(MI);
-  MIRBuilder.buildInstr(Opc, {MI.getOperand(0).getReg()},
-                        {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
+  MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
   MI.eraseFromParent();
   return true;
 }

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-rev.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-rev.mir
new file mode 100644
index 000000000000..0f6653d94b32
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-rev.mir
@@ -0,0 +1,85 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Test producing a G_REV from an appropriate G_SHUFFLE_VECTOR.
+
+...
+---
+name:            rev64_mask_1_0
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d0, $d1
+    ; CHECK-LABEL: name: rev64_mask_1_0
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+    ; CHECK: [[REV64_:%[0-9]+]]:_(<2 x s32>) = G_REV64 [[COPY]]
+    ; CHECK: $d0 = COPY [[REV64_]](<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:_(<2 x s32>) = COPY $d0
+    %1:_(<2 x s32>) = COPY $d1
+    %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(1, 0)
+    $d0 = COPY %2(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            rev64_mask_1_undef
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d0, $d1
+    ; CHECK-LABEL: name: rev64_mask_1_undef
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+    ; CHECK: [[REV64_:%[0-9]+]]:_(<2 x s32>) = G_REV64 [[COPY]]
+    ; CHECK: $d0 = COPY [[REV64_]](<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:_(<2 x s32>) = COPY $d0
+    %1:_(<2 x s32>) = COPY $d1
+    %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(1, undef)
+    $d0 = COPY %2(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:           no_rev64_mask_1
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d0, $d1
+
+    ; Verify that we don't produce a G_REV64 when
+    ;
+    ; M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)
+    ; In this example, BlockElts = 2
+    ;
+    ; At i = 1
+    ; M[i] = 3
+    ; i % BlockElts = i % 2 = 1
+    ;
+    ; So
+    ;
+    ; 3 != (1 - 1) + (2 - 1 - 1)
+    ; 3 != 0
+    ;
+    ; And so we should not produce a G_REV64.
+    ;
+    ; CHECK-LABEL: name: no_rev64_mask_1
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+    ; CHECK: [[ZIP2_:%[0-9]+]]:_(<2 x s32>) = G_ZIP2 [[COPY]], [[COPY1]]
+    ; CHECK: $d0 = COPY [[ZIP2_]](<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:_(<2 x s32>) = COPY $d0
+    %1:_(<2 x s32>) = COPY $d1
+    %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(1, 3)
+    $d0 = COPY %2(<2 x s32>)
+    RET_ReallyLR implicit $d0

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-rev.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-rev.mir
new file mode 100644
index 000000000000..d3dd8a97e6f4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-rev.mir
@@ -0,0 +1,239 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Test selecting G_REV instructions.
+#
+# Each test is named like:
+#
+# (G_REV_VERSION)_(INSTRUCTION_PRODUCED)
+#
+# Each of these patterns come from AArch64GenGlobalISel.inc.
+#
+
+...
+---
+name:            rev64_REV64v2i32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $d0
+    ; CHECK-LABEL: name: rev64_REV64v2i32
+    ; CHECK: liveins: $d0
+    ; CHECK: %copy:fpr64 = COPY $d0
+    ; CHECK: %rev:fpr64 = REV64v2i32 %copy
+    ; CHECK: RET_ReallyLR
+    %copy:fpr(<2 x s32>) = COPY $d0
+    %rev:fpr(<2 x s32>) = G_REV64 %copy
+    RET_ReallyLR
+
+...
+---
+name:            rev64_REV64v4i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $d0
+    ; CHECK-LABEL: name: rev64_REV64v4i16
+    ; CHECK: liveins: $d0
+    ; CHECK: %copy:fpr64 = COPY $d0
+    ; CHECK: %rev:fpr64 = REV64v4i16 %copy
+    ; CHECK: RET_ReallyLR
+    %copy:fpr(<4 x s16>) = COPY $d0
+    %rev:fpr(<4 x s16>) = G_REV64 %copy
+    RET_ReallyLR
+
+...
+---
+name:            rev64_REV64v4i32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $q0
+    ; CHECK-LABEL: name: rev64_REV64v4i32
+    ; CHECK: liveins: $q0
+    ; CHECK: %copy:fpr128 = COPY $q0
+    ; CHECK: %rev:fpr128 = REV64v4i32 %copy
+    ; CHECK: RET_ReallyLR
+    %copy:fpr(<4 x s32>) = COPY $q0
+    %rev:fpr(<4 x s32>) = G_REV64 %copy
+    RET_ReallyLR
+
+...
+---
+name:            rev64_REV64v8i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $q0
+    ; CHECK-LABEL: name: rev64_REV64v8i8
+    ; CHECK: liveins: $q0
+    ; CHECK: %copy:fpr64 = COPY $d0
+    ; CHECK: %rev:fpr64 = REV64v8i8 %copy
+    ; CHECK: RET_ReallyLR
+    %copy:fpr(<8 x s8>) = COPY $d0
+    %rev:fpr(<8 x s8>) = G_REV64 %copy
+    RET_ReallyLR
+
+...
+---
+name:            rev64_REV64v8i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $q0
+    ; CHECK-LABEL: name: rev64_REV64v8i16
+    ; CHECK: liveins: $q0
+    ; CHECK: %copy:fpr128 = COPY $q0
+    ; CHECK: %rev:fpr128 = REV64v8i16 %copy
+    ; CHECK: RET_ReallyLR
+    %copy:fpr(<8 x s16>) = COPY $q0
+    %rev:fpr(<8 x s16>) = G_REV64 %copy
+    RET_ReallyLR
+
+...
+---
+name:            rev64_REV64v16i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $q0
+    ; CHECK-LABEL: name: rev64_REV64v16i8
+    ; CHECK: liveins: $q0
+    ; CHECK: %copy:fpr128 = COPY $q0
+    ; CHECK: %rev:fpr128 = REV64v16i8 %copy
+    ; CHECK: RET_ReallyLR
+    %copy:fpr(<16 x s8>) = COPY $q0
+    %rev:fpr(<16 x s8>) = G_REV64 %copy
+    RET_ReallyLR
+
+...
+---
+name:            rev32_REV32v4i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $d0
+    ; CHECK-LABEL: name: rev32_REV32v4i16
+    ; CHECK: liveins: $d0
+    ; CHECK: %copy:fpr64 = COPY $d0
+    ; CHECK: %rev:fpr64 = REV32v4i16 %copy
+    ; CHECK: RET_ReallyLR
+    %copy:fpr(<4 x s16>) = COPY $d0
+    %rev:fpr(<4 x s16>) = G_REV32 %copy
+    RET_ReallyLR
+
+...
+---
+name:            rev32_REV32v8i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $d0
+    ; CHECK-LABEL: name: rev32_REV32v8i8
+    ; CHECK: liveins: $d0
+    ; CHECK: %copy:fpr64 = COPY $d0
+    ; CHECK: %rev:fpr64 = REV32v8i8 %copy
+    ; CHECK: RET_ReallyLR
+    %copy:fpr(<8 x s8>) = COPY $d0
+    %rev:fpr(<8 x s8>) = G_REV32 %copy
+    RET_ReallyLR
+
+...
+---
+name:            rev32_REV32v8i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $q0
+    ; CHECK-LABEL: name: rev32_REV32v8i16
+    ; CHECK: liveins: $q0
+    ; CHECK: %copy:fpr128 = COPY $q0
+    ; CHECK: %rev:fpr128 = REV32v8i16 %copy
+    ; CHECK: RET_ReallyLR
+    %copy:fpr(<8 x s16>) = COPY $q0
+    %rev:fpr(<8 x s16>) = G_REV32 %copy
+    RET_ReallyLR
+
+...
+---
+name:            rev32_REV32v16i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $q0
+    ; CHECK-LABEL: name: rev32_REV32v16i8
+    ; CHECK: liveins: $q0
+    ; CHECK: %copy:fpr128 = COPY $q0
+    ; CHECK: %rev:fpr128 = REV32v16i8 %copy
+    ; CHECK: RET_ReallyLR
+    %copy:fpr(<16 x s8>) = COPY $q0
+    %rev:fpr(<16 x s8>) = G_REV32 %copy
+    RET_ReallyLR
+
+...
+---
+name:            rev16_REV16v8i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $q0
+    ; CHECK-LABEL: name: rev16_REV16v8i8
+    ; CHECK: liveins: $q0
+    ; CHECK: %copy:fpr64 = COPY $d0
+    ; CHECK: %rev:fpr64 = REV16v8i8 %copy
+    ; CHECK: RET_ReallyLR
+    %copy:fpr(<8 x s8>) = COPY $d0
+    %rev:fpr(<8 x s8>) = G_REV16 %copy
+    RET_ReallyLR
+
+...
+---
+name:            rev16_REV16v16i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $q0
+    ; CHECK-LABEL: name: rev16_REV16v16i8
+    ; CHECK: liveins: $q0
+    ; CHECK: %copy:fpr128 = COPY $q0
+    ; CHECK: %rev:fpr128 = REV16v16i8 %copy
+    ; CHECK: RET_ReallyLR
+    %copy:fpr(<16 x s8>) = COPY $q0
+    %rev:fpr(<16 x s8>) = G_REV16 %copy
+    RET_ReallyLR