[llvm] 7f7993e - [ARM] Expand distributing increments to also handle existing pre/post inc instructions.

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 17 08:58:52 PDT 2020


Author: David Green
Date: 2020-09-17T16:58:35+01:00
New Revision: 7f7993e0daf459c308747f034e3fbd73889c7ab3

URL: https://github.com/llvm/llvm-project/commit/7f7993e0daf459c308747f034e3fbd73889c7ab3
DIFF: https://github.com/llvm/llvm-project/commit/7f7993e0daf459c308747f034e3fbd73889c7ab3.diff

LOG: [ARM] Expand distributing increments to also handle existing pre/post inc instructions.

This extends the distributing postinc code in load/store optimizer to
also handle the case where there is an existing pre/post inc instruction,
where subsequent instructions can be modified to use the adjusted
offset from the increment. This can save us having to keep the old
register live past the increment instruction.

Differential Revision: https://reviews.llvm.org/D83377

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
    llvm/test/CodeGen/Thumb2/mve-postinc-distribute.mir
    llvm/test/CodeGen/Thumb2/mve-vst2.ll
    llvm/test/CodeGen/Thumb2/mve-vst3.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 09bb3b3c6f72..a5da50608087 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -2570,10 +2570,85 @@ static int getBaseOperandIndex(MachineInstr &MI) {
   case ARM::t2STRHi8:
   case ARM::t2STRHi12:
     return 1;
+  case ARM::MVE_VLDRBS16_post:
+  case ARM::MVE_VLDRBS32_post:
+  case ARM::MVE_VLDRBU16_post:
+  case ARM::MVE_VLDRBU32_post:
+  case ARM::MVE_VLDRHS32_post:
+  case ARM::MVE_VLDRHU32_post:
+  case ARM::MVE_VLDRBU8_post:
+  case ARM::MVE_VLDRHU16_post:
+  case ARM::MVE_VLDRWU32_post:
+  case ARM::MVE_VSTRB16_post:
+  case ARM::MVE_VSTRB32_post:
+  case ARM::MVE_VSTRH32_post:
+  case ARM::MVE_VSTRBU8_post:
+  case ARM::MVE_VSTRHU16_post:
+  case ARM::MVE_VSTRWU32_post:
+  case ARM::MVE_VLDRBS16_pre:
+  case ARM::MVE_VLDRBS32_pre:
+  case ARM::MVE_VLDRBU16_pre:
+  case ARM::MVE_VLDRBU32_pre:
+  case ARM::MVE_VLDRHS32_pre:
+  case ARM::MVE_VLDRHU32_pre:
+  case ARM::MVE_VLDRBU8_pre:
+  case ARM::MVE_VLDRHU16_pre:
+  case ARM::MVE_VLDRWU32_pre:
+  case ARM::MVE_VSTRB16_pre:
+  case ARM::MVE_VSTRB32_pre:
+  case ARM::MVE_VSTRH32_pre:
+  case ARM::MVE_VSTRBU8_pre:
+  case ARM::MVE_VSTRHU16_pre:
+  case ARM::MVE_VSTRWU32_pre:
+    return 2;
   }
   return -1;
 }
 
+static bool isPostIndex(MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case ARM::MVE_VLDRBS16_post:
+  case ARM::MVE_VLDRBS32_post:
+  case ARM::MVE_VLDRBU16_post:
+  case ARM::MVE_VLDRBU32_post:
+  case ARM::MVE_VLDRHS32_post:
+  case ARM::MVE_VLDRHU32_post:
+  case ARM::MVE_VLDRBU8_post:
+  case ARM::MVE_VLDRHU16_post:
+  case ARM::MVE_VLDRWU32_post:
+  case ARM::MVE_VSTRB16_post:
+  case ARM::MVE_VSTRB32_post:
+  case ARM::MVE_VSTRH32_post:
+  case ARM::MVE_VSTRBU8_post:
+  case ARM::MVE_VSTRHU16_post:
+  case ARM::MVE_VSTRWU32_post:
+    return true;
+  }
+  return false;
+}
+
+static bool isPreIndex(MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case ARM::MVE_VLDRBS16_pre:
+  case ARM::MVE_VLDRBS32_pre:
+  case ARM::MVE_VLDRBU16_pre:
+  case ARM::MVE_VLDRBU32_pre:
+  case ARM::MVE_VLDRHS32_pre:
+  case ARM::MVE_VLDRHU32_pre:
+  case ARM::MVE_VLDRBU8_pre:
+  case ARM::MVE_VLDRHU16_pre:
+  case ARM::MVE_VLDRWU32_pre:
+  case ARM::MVE_VSTRB16_pre:
+  case ARM::MVE_VSTRB32_pre:
+  case ARM::MVE_VSTRH32_pre:
+  case ARM::MVE_VSTRBU8_pre:
+  case ARM::MVE_VSTRHU16_pre:
+  case ARM::MVE_VSTRWU32_pre:
+    return true;
+  }
+  return false;
+}
+
 // Given a memory access Opcode, check that the give Imm would be a valid Offset
 // for this instruction (same as isLegalAddressImm), Or if the instruction
 // could be easily converted to one where that was valid. For example converting
@@ -2703,19 +2778,26 @@ static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
 }
 
 // Given a Base Register, optimise the load/store uses to attempt to create more
-// post-inc accesses. We do this by taking zero offset loads/stores with an add,
-// and convert them to a postinc load/store of the same type. Any subsequent
-// accesses will be adjusted to use and account for the post-inc value.
+// post-inc accesses and less register moves. We do this by taking zero offset
+// loads/stores with an add, and convert them to a postinc load/store of the
+// same type. Any subsequent accesses will be adjusted to use and account for
+// the post-inc value.
 // For example:
 // LDR #0            LDR_POSTINC #16
 // LDR #4            LDR #-12
 // LDR #8            LDR #-8
 // LDR #12           LDR #-4
 // ADD #16
+//
+// At the same time if we do not find an increment but do find an existing
+// pre/post inc instruction, we can still adjust the offsets of subsequent
+// instructions to save the register move that would otherwise be needed for the
+// in-place increment.
 bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
   // We are looking for:
   // One zero offset load/store that can become postinc
   MachineInstr *BaseAccess = nullptr;
+  MachineInstr *PrePostInc = nullptr;
   // An increment that can be folded in
   MachineInstr *Increment = nullptr;
   // Other accesses after BaseAccess that will need to be updated to use the
@@ -2734,40 +2816,62 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
     if (!Use.getOperand(BaseOp).isReg() ||
         Use.getOperand(BaseOp).getReg() != Base)
       return false;
-    if (Use.getOperand(BaseOp + 1).getImm() == 0)
+    if (isPreIndex(Use) || isPostIndex(Use))
+      PrePostInc = &Use;
+    else if (Use.getOperand(BaseOp + 1).getImm() == 0)
       BaseAccess = &Use;
     else
       OtherAccesses.insert(&Use);
   }
 
-  if (!BaseAccess || !Increment ||
-      BaseAccess->getParent() != Increment->getParent())
-    return false;
-  Register PredReg;
-  if (Increment->definesRegister(ARM::CPSR) ||
-      getInstrPredicate(*Increment, PredReg) != ARMCC::AL)
-    return false;
+  int IncrementOffset;
+  Register NewBaseReg;
+  if (BaseAccess && Increment) {
+    if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
+      return false;
+    Register PredReg;
+    if (Increment->definesRegister(ARM::CPSR) ||
+        getInstrPredicate(*Increment, PredReg) != ARMCC::AL)
+      return false;
+
+    LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
+                      << Base.virtRegIndex() << "\n");
 
-  LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
-                    << Base.virtRegIndex() << "\n");
+    // Make sure that Increment has no uses before BaseAccess.
+    for (MachineInstr &Use :
+        MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
+      if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) {
+        LLVM_DEBUG(dbgs() << "  BaseAccess doesn't dominate use of increment\n");
+        return false;
+      }
+    }
 
-  // Make sure that Increment has no uses before BaseAccess.
-  for (MachineInstr &Use :
-       MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
-    if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) {
-      LLVM_DEBUG(dbgs() << "  BaseAccess doesn't dominate use of increment\n");
+    // Make sure that Increment can be folded into Base
+    IncrementOffset = getAddSubImmediate(*Increment);
+    unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
+        BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
+    if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
+      LLVM_DEBUG(dbgs() << "  Illegal addressing mode immediate on postinc\n");
       return false;
     }
   }
+  else if (PrePostInc) {
+    // If we already have a pre/post index load/store then set BaseAccess,
+    // IncrementOffset and NewBaseReg to the values it already produces,
+    // allowing us to update and subsequent uses of BaseOp reg with the
+    // incremented value.
+    if (Increment)
+      return false;
 
-  // Make sure that Increment can be folded into Base
-  int IncrementOffset = getAddSubImmediate(*Increment);
-  unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
-      BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
-  if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
-    LLVM_DEBUG(dbgs() << "  Illegal addressing mode immediate on postinc\n");
-    return false;
+    LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
+                      << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
+    int BaseOp = getBaseOperandIndex(*PrePostInc);
+    IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
+    BaseAccess = PrePostInc;
+    NewBaseReg = PrePostInc->getOperand(0).getReg();
   }
+  else
+    return false;
 
   // And make sure that the negative value of increment can be added to all
   // other offsets after the BaseAccess. We rely on either
@@ -2801,16 +2905,18 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
     return false;
   }
 
-  // Replace BaseAccess with a post inc
-  LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
-  LLVM_DEBUG(dbgs() << "  And   : "; Increment->dump());
-  Register NewBaseReg = Increment->getOperand(0).getReg();
-  MachineInstr *BaseAccessPost =
-      createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
-  BaseAccess->eraseFromParent();
-  Increment->eraseFromParent();
-  (void)BaseAccessPost;
-  LLVM_DEBUG(dbgs() << "  To    : "; BaseAccessPost->dump());
+  if (!PrePostInc) {
+    // Replace BaseAccess with a post inc
+    LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
+    LLVM_DEBUG(dbgs() << "  And   : "; Increment->dump());
+    NewBaseReg = Increment->getOperand(0).getReg();
+    MachineInstr *BaseAccessPost =
+        createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
+    BaseAccess->eraseFromParent();
+    Increment->eraseFromParent();
+    (void)BaseAccessPost;
+    LLVM_DEBUG(dbgs() << "  To    : "; BaseAccessPost->dump());
+  }
 
   for (auto *Use : SuccessorAccesses) {
     LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());

diff  --git a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.mir b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.mir
index d4ac622f0ffd..77ca49378e63 100644
--- a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.mir
+++ b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.mir
@@ -919,7 +919,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
     ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRWU32_post]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:gprnopc = COPY $r0
@@ -947,7 +947,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
     ; CHECK: [[MVE_VLDRHU16_post:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_post1:%[0-9]+]]:mqpr = MVE_VLDRHU16_post [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRHU16_:%[0-9]+]]:mqpr = MVE_VLDRHU16 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRHU16_:%[0-9]+]]:mqpr = MVE_VLDRHU16 [[MVE_VLDRHU16_post]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRHU16_post]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:gprnopc = COPY $r0
@@ -975,7 +975,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
     ; CHECK: [[MVE_VLDRBU8_post:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_post1:%[0-9]+]]:mqpr = MVE_VLDRBU8_post [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRBU8_:%[0-9]+]]:mqpr = MVE_VLDRBU8 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRBU8_:%[0-9]+]]:mqpr = MVE_VLDRBU8 [[MVE_VLDRBU8_post]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRBU8_post]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:gprnopc = COPY $r0
@@ -1003,7 +1003,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VLDRBS32_post:%[0-9]+]]:tgpr, [[MVE_VLDRBS32_post1:%[0-9]+]]:mqpr = MVE_VLDRBS32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRBS32_:%[0-9]+]]:mqpr = MVE_VLDRBS32 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRBS32_:%[0-9]+]]:mqpr = MVE_VLDRBS32 [[MVE_VLDRBS32_post]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRBS32_post]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:tgpr = COPY $r0
@@ -1031,7 +1031,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VLDRBU32_post:%[0-9]+]]:tgpr, [[MVE_VLDRBU32_post1:%[0-9]+]]:mqpr = MVE_VLDRBU32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRBU32_:%[0-9]+]]:mqpr = MVE_VLDRBU32 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRBU32_:%[0-9]+]]:mqpr = MVE_VLDRBU32 [[MVE_VLDRBU32_post]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRBU32_post]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:tgpr = COPY $r0
@@ -1059,7 +1059,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VLDRHS32_post:%[0-9]+]]:tgpr, [[MVE_VLDRHS32_post1:%[0-9]+]]:mqpr = MVE_VLDRHS32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRHS32_:%[0-9]+]]:mqpr = MVE_VLDRHS32 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRHS32_:%[0-9]+]]:mqpr = MVE_VLDRHS32 [[MVE_VLDRHS32_post]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRHS32_post]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:tgpr = COPY $r0
@@ -1087,7 +1087,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VLDRHU32_post:%[0-9]+]]:tgpr, [[MVE_VLDRHU32_post1:%[0-9]+]]:mqpr = MVE_VLDRHU32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRHU32_:%[0-9]+]]:mqpr = MVE_VLDRHU32 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRHU32_:%[0-9]+]]:mqpr = MVE_VLDRHU32 [[MVE_VLDRHU32_post]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRHU32_post]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:tgpr = COPY $r0
@@ -1115,7 +1115,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VLDRBS16_post:%[0-9]+]]:tgpr, [[MVE_VLDRBS16_post1:%[0-9]+]]:mqpr = MVE_VLDRBS16_post [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRBS16_:%[0-9]+]]:mqpr = MVE_VLDRBS16 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRBS16_:%[0-9]+]]:mqpr = MVE_VLDRBS16 [[MVE_VLDRBS16_post]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRBS16_post]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:tgpr = COPY $r0
@@ -1143,7 +1143,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VLDRBU16_post:%[0-9]+]]:tgpr, [[MVE_VLDRBU16_post1:%[0-9]+]]:mqpr = MVE_VLDRBU16_post [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRBU16_:%[0-9]+]]:mqpr = MVE_VLDRBU16 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRBU16_:%[0-9]+]]:mqpr = MVE_VLDRBU16 [[MVE_VLDRBU16_post]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRBU16_post]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:tgpr = COPY $r0
@@ -1172,7 +1172,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0
     ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
     ; CHECK: [[MVE_VSTRWU32_post:%[0-9]+]]:rgpr = MVE_VSTRWU32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRWU32 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRWU32 [[COPY]], [[MVE_VSTRWU32_post]], -16, 0, $noreg :: (store 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VSTRWU32_post]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %1:mqpr = COPY $q0
@@ -1202,7 +1202,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0
     ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
     ; CHECK: [[MVE_VSTRHU16_post:%[0-9]+]]:rgpr = MVE_VSTRHU16_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRHU16 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRHU16 [[COPY]], [[MVE_VSTRHU16_post]], -16, 0, $noreg :: (store 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VSTRHU16_post]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %1:mqpr = COPY $q0
@@ -1232,7 +1232,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0
     ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
     ; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRBU8 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRBU8 [[COPY]], [[MVE_VSTRBU8_post]], -16, 0, $noreg :: (store 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VSTRBU8_post]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %1:mqpr = COPY $q0
@@ -1262,7 +1262,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0
     ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VSTRH32_post:%[0-9]+]]:tgpr = MVE_VSTRH32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRH32 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRH32 [[COPY]], [[MVE_VSTRH32_post]], -16, 0, $noreg :: (store 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VSTRH32_post]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %1:mqpr = COPY $q0
@@ -1292,7 +1292,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0
     ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VSTRB32_post:%[0-9]+]]:tgpr = MVE_VSTRB32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRB32 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRB32 [[COPY]], [[MVE_VSTRB32_post]], -16, 0, $noreg :: (store 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VSTRB32_post]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %1:mqpr = COPY $q0
@@ -1322,7 +1322,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0
     ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VSTRB16_post:%[0-9]+]]:tgpr = MVE_VSTRB16_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_post]], -16, 0, $noreg :: (store 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VSTRB16_post]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %1:mqpr = COPY $q0
@@ -1351,7 +1351,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0
     ; CHECK: [[MVE_VLDRWU32_pre:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_pre1:%[0-9]+]]:mqpr = MVE_VLDRWU32_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_pre]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRWU32_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:rgpr = COPY $r0
@@ -1379,7 +1379,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0
     ; CHECK: [[MVE_VLDRHU16_pre:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_pre1:%[0-9]+]]:mqpr = MVE_VLDRHU16_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRHU16_:%[0-9]+]]:mqpr = MVE_VLDRHU16 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRHU16_:%[0-9]+]]:mqpr = MVE_VLDRHU16 [[MVE_VLDRHU16_pre]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRHU16_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:rgpr = COPY $r0
@@ -1407,7 +1407,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0
     ; CHECK: [[MVE_VLDRBU8_pre:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_pre1:%[0-9]+]]:mqpr = MVE_VLDRBU8_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRBU8_:%[0-9]+]]:mqpr = MVE_VLDRBU8 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRBU8_:%[0-9]+]]:mqpr = MVE_VLDRBU8 [[MVE_VLDRBU8_pre]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRBU8_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:rgpr = COPY $r0
@@ -1435,7 +1435,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VLDRBS32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBS32_pre1:%[0-9]+]]:mqpr = MVE_VLDRBS32_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRBS32_:%[0-9]+]]:mqpr = MVE_VLDRBS32 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRBS32_:%[0-9]+]]:mqpr = MVE_VLDRBS32 [[MVE_VLDRBS32_pre]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRBS32_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:tgpr = COPY $r0
@@ -1463,7 +1463,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VLDRBU32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBU32_pre1:%[0-9]+]]:mqpr = MVE_VLDRBU32_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRBU32_:%[0-9]+]]:mqpr = MVE_VLDRBU32 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRBU32_:%[0-9]+]]:mqpr = MVE_VLDRBU32 [[MVE_VLDRBU32_pre]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRBU32_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:tgpr = COPY $r0
@@ -1491,7 +1491,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VLDRHS32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRHS32_pre1:%[0-9]+]]:mqpr = MVE_VLDRHS32_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRHS32_:%[0-9]+]]:mqpr = MVE_VLDRHS32 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRHS32_:%[0-9]+]]:mqpr = MVE_VLDRHS32 [[MVE_VLDRHS32_pre]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRHS32_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:tgpr = COPY $r0
@@ -1519,7 +1519,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VLDRHU32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRHU32_pre1:%[0-9]+]]:mqpr = MVE_VLDRHU32_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRHU32_:%[0-9]+]]:mqpr = MVE_VLDRHU32 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRHU32_:%[0-9]+]]:mqpr = MVE_VLDRHU32 [[MVE_VLDRHU32_pre]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRHU32_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:tgpr = COPY $r0
@@ -1547,7 +1547,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VLDRBS16_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBS16_pre1:%[0-9]+]]:mqpr = MVE_VLDRBS16_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRBS16_:%[0-9]+]]:mqpr = MVE_VLDRBS16 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRBS16_:%[0-9]+]]:mqpr = MVE_VLDRBS16 [[MVE_VLDRBS16_pre]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRBS16_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:tgpr = COPY $r0
@@ -1575,7 +1575,7 @@ body:             |
     ; CHECK: liveins: $r0, $q0
     ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VLDRBU16_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBU16_pre1:%[0-9]+]]:mqpr = MVE_VLDRBU16_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8)
-    ; CHECK: [[MVE_VLDRBU16_:%[0-9]+]]:mqpr = MVE_VLDRBU16 [[COPY]], 16, 0, $noreg :: (load 16, align 8)
+    ; CHECK: [[MVE_VLDRBU16_:%[0-9]+]]:mqpr = MVE_VLDRBU16 [[MVE_VLDRBU16_pre]], -16, 0, $noreg :: (load 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VLDRBU16_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0:tgpr = COPY $r0
@@ -1604,7 +1604,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0
     ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
     ; CHECK: [[MVE_VSTRWU32_pre:%[0-9]+]]:rgpr = MVE_VSTRWU32_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRWU32 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRWU32 [[COPY]], [[MVE_VSTRWU32_pre]], -16, 0, $noreg :: (store 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VSTRWU32_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %1:mqpr = COPY $q0
@@ -1634,7 +1634,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0
     ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
     ; CHECK: [[MVE_VSTRHU16_pre:%[0-9]+]]:rgpr = MVE_VSTRHU16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRHU16 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRHU16 [[COPY]], [[MVE_VSTRHU16_pre]], -16, 0, $noreg :: (store 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VSTRHU16_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %1:mqpr = COPY $q0
@@ -1664,7 +1664,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0
     ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
     ; CHECK: [[MVE_VSTRBU8_pre:%[0-9]+]]:rgpr = MVE_VSTRBU8_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRBU8 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRBU8 [[COPY]], [[MVE_VSTRBU8_pre]], -16, 0, $noreg :: (store 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VSTRBU8_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %1:mqpr = COPY $q0
@@ -1694,7 +1694,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0
     ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VSTRH32_pre:%[0-9]+]]:tgpr = MVE_VSTRH32_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRH32 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRH32 [[COPY]], [[MVE_VSTRH32_pre]], -16, 0, $noreg :: (store 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VSTRH32_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %1:mqpr = COPY $q0
@@ -1724,7 +1724,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0
     ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VSTRB32_pre:%[0-9]+]]:tgpr = MVE_VSTRB32_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRB32 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRB32 [[COPY]], [[MVE_VSTRB32_pre]], -16, 0, $noreg :: (store 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VSTRB32_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %1:mqpr = COPY $q0
@@ -1754,7 +1754,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0
     ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], -16, 0, $noreg :: (store 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VSTRB16_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %1:mqpr = COPY $q0
@@ -1784,9 +1784,9 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0
     ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], -16, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], 34, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], -16, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], -48, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], 2, 0, $noreg :: (store 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VSTRB16_pre]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %1:mqpr = COPY $q0
@@ -1820,7 +1820,7 @@ body:             |
     ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0
     ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8)
     ; CHECK: [[MVE_VSTRB16_pre1:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 64, 0, $noreg :: (store 16, align 8)
-    ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8)
+    ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre1]], -48, 0, $noreg :: (store 16, align 8)
     ; CHECK: $r0 = COPY [[MVE_VSTRB16_pre1]]
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
     %1:mqpr = COPY $q0

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vst2.ll b/llvm/test/CodeGen/Thumb2/mve-vst2.ll
index 9b68f7d4c074..b815ed24ae26 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst2.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst2.ll
@@ -303,25 +303,24 @@ define void @vst2_v4i64(<4 x i64> *%src, <8 x i64> *%dst) {
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11}
 ; CHECK-NEXT:    vldrw.u32 q0, [r0, #16]
 ; CHECK-NEXT:    vldrw.u32 q1, [r0]
-; CHECK-NEXT:    vldrw.u32 q2, [r0, #32]
 ; CHECK-NEXT:    vldrw.u32 q4, [r0, #48]
+; CHECK-NEXT:    vldrw.u32 q2, [r0, #32]
 ; CHECK-NEXT:    vmov.f64 d6, d1
-; CHECK-NEXT:    mov r0, r1
 ; CHECK-NEXT:    vmov.f64 d10, d3
 ; CHECK-NEXT:    vmov.f32 s13, s3
 ; CHECK-NEXT:    vmov.f32 s21, s7
+; CHECK-NEXT:    vmov.f32 s2, s16
 ; CHECK-NEXT:    vmov.f32 s6, s8
 ; CHECK-NEXT:    vmov.f32 s14, s18
 ; CHECK-NEXT:    vmov.f32 s22, s10
-; CHECK-NEXT:    vmov.f32 s2, s16
-; CHECK-NEXT:    vmov.f32 s7, s9
-; CHECK-NEXT:    vmov.f32 s23, s11
-; CHECK-NEXT:    vstrb.8 q1, [r0], #48
-; CHECK-NEXT:    vmov.f32 s15, s19
-; CHECK-NEXT:    vstrw.32 q5, [r1, #16]
 ; CHECK-NEXT:    vmov.f32 s3, s17
-; CHECK-NEXT:    vstrw.32 q3, [r0]
+; CHECK-NEXT:    vmov.f32 s7, s9
 ; CHECK-NEXT:    vstrw.32 q0, [r1, #32]
+; CHECK-NEXT:    vmov.f32 s15, s19
+; CHECK-NEXT:    vstrb.8 q1, [r1], #48
+; CHECK-NEXT:    vmov.f32 s23, s11
+; CHECK-NEXT:    vstrw.32 q3, [r1]
+; CHECK-NEXT:    vstrw.32 q5, [r1, #-32]
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11}
 ; CHECK-NEXT:    bx lr
 entry:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
index 52de7a45e85b..600c5279ca91 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
@@ -1085,7 +1085,6 @@ define void @vst3_v2i64(<2 x i64> *%src, <6 x i64> *%dst) {
 ; CHECK-NEXT:    vldrw.u32 q2, [r0, #16]
 ; CHECK-NEXT:    vldrw.u32 q0, [r0, #32]
 ; CHECK-NEXT:    vldrw.u32 q1, [r0]
-; CHECK-NEXT:    mov r0, r1
 ; CHECK-NEXT:    vmov.f64 d6, d5
 ; CHECK-NEXT:    vmov.f32 s13, s11
 ; CHECK-NEXT:    vmov.f32 s14, s2
@@ -1093,10 +1092,10 @@ define void @vst3_v2i64(<2 x i64> *%src, <6 x i64> *%dst) {
 ; CHECK-NEXT:    vmov.f32 s2, s6
 ; CHECK-NEXT:    vmov.f32 s3, s7
 ; CHECK-NEXT:    vmov.f32 s6, s8
-; CHECK-NEXT:    vstrw.32 q0, [r1, #16]
 ; CHECK-NEXT:    vmov.f32 s7, s9
-; CHECK-NEXT:    vstrb.8 q1, [r0], #32
-; CHECK-NEXT:    vstrw.32 q3, [r0]
+; CHECK-NEXT:    vstrb.8 q1, [r1], #32
+; CHECK-NEXT:    vstrw.32 q3, [r1]
+; CHECK-NEXT:    vstrw.32 q0, [r1, #-16]
 ; CHECK-NEXT:    bx lr
 entry:
   %s1 = getelementptr <2 x i64>, <2 x i64>* %src, i32 0


        


More information about the llvm-commits mailing list