[llvm] [ARM] Do not assume alignment in vld1xN and vst1xN intrinsics (PR #106984)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 2 06:06:27 PDT 2024


https://github.com/nikic created https://github.com/llvm/llvm-project/pull/106984

These intrinsics currently assume natural alignment. Instead, respect the alignment attribute on the intrinsic. Teach InstCombine to improve that alignment.

If desired I could also adjust the clang frontend to add alignment annotations equivalent to the previous behavior, but I don't see any indication that such an assumption is correct in the ARM intrinsics docs.

Fixes https://github.com/llvm/llvm-project/issues/59081.

>From 83e3409c8d252c0dc9f00b708532c6069417c12a Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Mon, 2 Sep 2024 14:12:40 +0200
Subject: [PATCH] [ARM] Do not assume alignment in vld1xN and vst1xN intrinsics

These intrinsics currently assume natural alignment. Instead,
respect the alignment attribute on the intrinsic. Teach InstCombine
to improve that alignment.

If desired I could also adjust the clang frontend to add alignment
annotations equivalent to the previous behavior, but I don't see
any indication that such an assumption is correct in the ARM
intrinsics docs.

Fixes https://github.com/llvm/llvm-project/issues/59081.
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |   4 +-
 .../lib/Target/ARM/ARMTargetTransformInfo.cpp |  16 ++
 llvm/test/CodeGen/ARM/arm-vld1.ll             | 182 ++++++++++-------
 llvm/test/CodeGen/ARM/arm-vst1.ll             | 193 ++++++++++--------
 .../test/CodeGen/ARM/bf16-intrinsics-ld-st.ll |  32 +--
 .../InstCombine/ARM/neon-intrinsics.ll        |   6 +-
 6 files changed, 256 insertions(+), 177 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 9096617a948557..aa663556deb760 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -21073,7 +21073,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
     Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
     Info.offset = 0;
-    Info.align.reset();
+    Info.align = I.getParamAlign(I.arg_size() - 1).valueOrOne();
     // volatile loads with NEON intrinsics not supported
     Info.flags = MachineMemOperand::MOLoad;
     return true;
@@ -21120,7 +21120,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
-    Info.align.reset();
+    Info.align = I.getParamAlign(0).valueOrOne();
     // volatile stores with NEON intrinsics not supported
     Info.flags = MachineMemOperand::MOStore;
     return true;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 912569a8fec118..9b5349241c341b 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -163,6 +163,22 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
     break;
   }
 
+  case Intrinsic::arm_neon_vld1x2:
+  case Intrinsic::arm_neon_vld1x3:
+  case Intrinsic::arm_neon_vld1x4:
+  case Intrinsic::arm_neon_vst1x2:
+  case Intrinsic::arm_neon_vst1x3:
+  case Intrinsic::arm_neon_vst1x4: {
+    Align NewAlign =
+        getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II,
+                          &IC.getAssumptionCache(), &IC.getDominatorTree());
+    Align OldAlign = II.getParamAlign(0).valueOrOne();
+    if (NewAlign > OldAlign)
+      II.addParamAttr(0,
+                      Attribute::getWithAlignment(II.getContext(), NewAlign));
+    break;
+  }
+
   case Intrinsic::arm_mve_pred_i2v: {
     Value *Arg = II.getArgOperand(0);
     Value *ArgArg;
diff --git a/llvm/test/CodeGen/ARM/arm-vld1.ll b/llvm/test/CodeGen/ARM/arm-vld1.ll
index 78b0b92013c397..ec2793589759ea 100644
--- a/llvm/test/CodeGen/ARM/arm-vld1.ll
+++ b/llvm/test/CodeGen/ARM/arm-vld1.ll
@@ -68,7 +68,7 @@ declare %struct.uint8x16x4_t @llvm.arm.neon.vld1x4.v16i8.p0(ptr) nounwind readon
 
 define %struct.uint16x4x2_t @test_vld1_u16_x2(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1_u16_x2:
-; CHECK:         vld1.16 {d16, d17}, [r0:64]
+; CHECK:         vld1.16 {d16, d17}, [r0]
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    bx lr
@@ -76,9 +76,39 @@ define %struct.uint16x4x2_t @test_vld1_u16_x2(ptr %a) nounwind {
   ret %struct.uint16x4x2_t %tmp
 }
 
+define %struct.uint16x4x2_t @test_vld1_u16_x2_align8(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1_u16_x2_align8:
+; CHECK:         vld1.16 {d16, d17}, [r0:64]
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
+  %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 8 %a)
+  ret %struct.uint16x4x2_t %tmp
+}
+
+define %struct.uint16x4x2_t @test_vld1_u16_x2_align16(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1_u16_x2_align16:
+; CHECK:         vld1.16 {d16, d17}, [r0:128]
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
+  %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 16 %a)
+  ret %struct.uint16x4x2_t %tmp
+}
+
+define %struct.uint16x4x2_t @test_vld1_u16_x2_align32(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1_u16_x2_align32:
+; CHECK:         vld1.16 {d16, d17}, [r0:128]
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
+  %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 32 %a)
+  ret %struct.uint16x4x2_t %tmp
+}
+
 define %struct.uint16x4x3_t @test_vld1_u16_x3(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1_u16_x3:
-; CHECK:         vld1.16 {d16, d17, d18}, [r1:64]
+; CHECK:         vld1.16 {d16, d17, d18}, [r1]
 ; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
 ; CHECK-NEXT:    vstr d18, [r0]
@@ -89,7 +119,7 @@ define %struct.uint16x4x3_t @test_vld1_u16_x3(ptr %a) nounwind {
 
 define %struct.uint16x4x4_t @test_vld1_u16_x4(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1_u16_x4:
-; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1:256]
+; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1]
 ; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
 ; CHECK-NEXT:    vst1.16 {d18}, [r0:64]!
@@ -101,7 +131,7 @@ define %struct.uint16x4x4_t @test_vld1_u16_x4(ptr %a) nounwind {
 
 define %struct.uint32x2x2_t @test_vld1_u32_x2(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1_u32_x2:
-; CHECK:         vld1.32 {d16, d17}, [r0:64]
+; CHECK:         vld1.32 {d16, d17}, [r0]
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    bx lr
@@ -111,7 +141,7 @@ define %struct.uint32x2x2_t @test_vld1_u32_x2(ptr %a) nounwind {
 
 define %struct.uint32x2x3_t @test_vld1_u32_x3(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1_u32_x3:
-; CHECK:         vld1.32 {d16, d17, d18}, [r1:64]
+; CHECK:         vld1.32 {d16, d17, d18}, [r1]
 ; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
 ; CHECK-NEXT:    vstr d18, [r0]
@@ -122,7 +152,7 @@ define %struct.uint32x2x3_t @test_vld1_u32_x3(ptr %a) nounwind {
 
 define %struct.uint32x2x4_t @test_vld1_u32_x4(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1_u32_x4:
-; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1:256]
+; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1]
 ; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
 ; CHECK-NEXT:    vst1.32 {d18}, [r0:64]!
@@ -134,7 +164,7 @@ define %struct.uint32x2x4_t @test_vld1_u32_x4(ptr %a) nounwind {
 
 define %struct.uint64x1x2_t @test_vld1_u64_x2(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1_u64_x2:
-; CHECK:         vld1.64 {d16, d17}, [r0:64]
+; CHECK:         vld1.64 {d16, d17}, [r0]
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    bx lr
@@ -144,7 +174,7 @@ define %struct.uint64x1x2_t @test_vld1_u64_x2(ptr %a) nounwind {
 
 define %struct.uint64x1x3_t @test_vld1_u64_x3(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1_u64_x3:
-; CHECK:         vld1.64 {d16, d17, d18}, [r1:64]
+; CHECK:         vld1.64 {d16, d17, d18}, [r1]
 ; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
 ; CHECK-NEXT:    vstr d18, [r0]
@@ -155,7 +185,7 @@ define %struct.uint64x1x3_t @test_vld1_u64_x3(ptr %a) nounwind {
 
 define %struct.uint64x1x4_t @test_vld1_u64_x4(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1_u64_x4:
-; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:256]
+; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1]
 ; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
 ; CHECK-NEXT:    vst1.64 {d18}, [r0:64]!
@@ -167,7 +197,7 @@ define %struct.uint64x1x4_t @test_vld1_u64_x4(ptr %a) nounwind {
 
 define %struct.uint8x8x2_t @test_vld1_u8_x2(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1_u8_x2:
-; CHECK:         vld1.8 {d16, d17}, [r0:64]
+; CHECK:         vld1.8 {d16, d17}, [r0]
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    bx lr
@@ -177,7 +207,7 @@ define %struct.uint8x8x2_t @test_vld1_u8_x2(ptr %a) nounwind {
 
 define %struct.uint8x8x3_t @test_vld1_u8_x3(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1_u8_x3:
-; CHECK:         vld1.8 {d16, d17, d18}, [r1:64]
+; CHECK:         vld1.8 {d16, d17, d18}, [r1]
 ; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
 ; CHECK-NEXT:    vstr d18, [r0]
@@ -188,7 +218,7 @@ define %struct.uint8x8x3_t @test_vld1_u8_x3(ptr %a) nounwind {
 
 define %struct.uint8x8x4_t @test_vld1_u8_x4(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1_u8_x4:
-; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1:256]
+; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1]
 ; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
 ; CHECK-NEXT:    vst1.8 {d18}, [r0:64]!
@@ -200,7 +230,7 @@ define %struct.uint8x8x4_t @test_vld1_u8_x4(ptr %a) nounwind {
 
 define %struct.uint16x8x2_t @test_vld1q_u16_x2(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1q_u16_x2:
-; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1:256]
+; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1]
 ; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
 ; CHECK-NEXT:    bx lr
@@ -210,8 +240,8 @@ define %struct.uint16x8x2_t @test_vld1q_u16_x2(ptr %a) nounwind {
 
 define %struct.uint16x8x3_t @test_vld1q_u16_x3(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1q_u16_x3:
-; CHECK:         vld1.16 {d16, d17, d18}, [r1:64]!
-; CHECK-NEXT:    vld1.16 {d19, d20, d21}, [r1:64]
+; CHECK:         vld1.16 {d16, d17, d18}, [r1]!
+; CHECK-NEXT:    vld1.16 {d19, d20, d21}, [r1]
 ; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.16 {d18, d19}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
@@ -222,8 +252,8 @@ define %struct.uint16x8x3_t @test_vld1q_u16_x3(ptr %a) nounwind {
 
 define %struct.uint16x8x4_t @test_vld1q_u16_x4(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1q_u16_x4:
-; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1:256]!
-; CHECK-NEXT:    vld1.16 {d20, d21, d22, d23}, [r1:256]
+; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1]!
+; CHECK-NEXT:    vld1.16 {d20, d21, d22, d23}, [r1]
 ; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.16 {d18, d19}, [r0]!
 ; CHECK-NEXT:    vst1.16 {d20, d21}, [r0]!
@@ -235,7 +265,7 @@ define %struct.uint16x8x4_t @test_vld1q_u16_x4(ptr %a) nounwind {
 
 define %struct.uint32x4x2_t @test_vld1q_u32_x2(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1q_u32_x2:
-; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1:256]
+; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1]
 ; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
 ; CHECK-NEXT:    bx lr
@@ -245,8 +275,8 @@ define %struct.uint32x4x2_t @test_vld1q_u32_x2(ptr %a) nounwind {
 
 define %struct.uint32x4x3_t @test_vld1q_u32_x3(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1q_u32_x3:
-; CHECK:         vld1.32 {d16, d17, d18}, [r1:64]!
-; CHECK-NEXT:    vld1.32 {d19, d20, d21}, [r1:64]
+; CHECK:         vld1.32 {d16, d17, d18}, [r1]!
+; CHECK-NEXT:    vld1.32 {d19, d20, d21}, [r1]
 ; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.32 {d18, d19}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
@@ -257,8 +287,8 @@ define %struct.uint32x4x3_t @test_vld1q_u32_x3(ptr %a) nounwind {
 
 define %struct.uint32x4x4_t @test_vld1q_u32_x4(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1q_u32_x4:
-; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1:256]!
-; CHECK-NEXT:    vld1.32 {d20, d21, d22, d23}, [r1:256]
+; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1]!
+; CHECK-NEXT:    vld1.32 {d20, d21, d22, d23}, [r1]
 ; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.32 {d18, d19}, [r0]!
 ; CHECK-NEXT:    vst1.32 {d20, d21}, [r0]!
@@ -270,7 +300,7 @@ define %struct.uint32x4x4_t @test_vld1q_u32_x4(ptr %a) nounwind {
 
 define %struct.uint64x2x2_t @test_vld1q_u64_x2(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1q_u64_x2:
-; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:256]
+; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1]
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
 ; CHECK-NEXT:    bx lr
@@ -280,8 +310,8 @@ define %struct.uint64x2x2_t @test_vld1q_u64_x2(ptr %a) nounwind {
 
 define %struct.uint64x2x3_t @test_vld1q_u64_x3(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1q_u64_x3:
-; CHECK:         vld1.64 {d16, d17, d18}, [r1:64]!
-; CHECK-NEXT:    vld1.64 {d19, d20, d21}, [r1:64]
+; CHECK:         vld1.64 {d16, d17, d18}, [r1]!
+; CHECK-NEXT:    vld1.64 {d19, d20, d21}, [r1]
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
@@ -292,8 +322,8 @@ define %struct.uint64x2x3_t @test_vld1q_u64_x3(ptr %a) nounwind {
 
 define %struct.uint64x2x4_t @test_vld1q_u64_x4(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1q_u64_x4:
-; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:256]!
-; CHECK-NEXT:    vld1.64 {d20, d21, d22, d23}, [r1:256]
+; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1]!
+; CHECK-NEXT:    vld1.64 {d20, d21, d22, d23}, [r1]
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]!
@@ -305,7 +335,7 @@ define %struct.uint64x2x4_t @test_vld1q_u64_x4(ptr %a) nounwind {
 
 define %struct.uint8x16x2_t @test_vld1q_u8_x2(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1q_u8_x2:
-; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1:256]
+; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1]
 ; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
 ; CHECK-NEXT:    bx lr
@@ -315,8 +345,8 @@ define %struct.uint8x16x2_t @test_vld1q_u8_x2(ptr %a) nounwind {
 
 define %struct.uint8x16x3_t @test_vld1q_u8_x3(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1q_u8_x3:
-; CHECK:         vld1.8 {d16, d17, d18}, [r1:64]!
-; CHECK-NEXT:    vld1.8 {d19, d20, d21}, [r1:64]
+; CHECK:         vld1.8 {d16, d17, d18}, [r1]!
+; CHECK-NEXT:    vld1.8 {d19, d20, d21}, [r1]
 ; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.8 {d18, d19}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
@@ -327,8 +357,8 @@ define %struct.uint8x16x3_t @test_vld1q_u8_x3(ptr %a) nounwind {
 
 define %struct.uint8x16x4_t @test_vld1q_u8_x4(ptr %a) nounwind {
 ; CHECK-LABEL: test_vld1q_u8_x4:
-; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1:256]!
-; CHECK-NEXT:    vld1.8 {d20, d21, d22, d23}, [r1:256]
+; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1]!
+; CHECK-NEXT:    vld1.8 {d20, d21, d22, d23}, [r1]
 ; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.8 {d18, d19}, [r0]!
 ; CHECK-NEXT:    vst1.8 {d20, d21}, [r0]!
@@ -344,7 +374,7 @@ define %struct.uint16x4x2_t @test_vld1_u16_x2_post_imm(ptr %a, ptr %ptr) nounwin
 ; CHECK-LABEL: test_vld1_u16_x2_post_imm:
 ; CHECK:         .save {r11, lr}
 ; CHECK-NEXT:    push {r11, lr}
-; CHECK-NEXT:    vld1.16 {d16, d17}, [r0:64]!
+; CHECK-NEXT:    vld1.16 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vmov lr, r12, d16
 ; CHECK-NEXT:    str r0, [r1]
 ; CHECK-NEXT:    vmov r2, r3, d17
@@ -362,7 +392,7 @@ define %struct.uint16x4x2_t @test_vld1_u16_x2_post_reg(ptr %a, ptr %ptr, i32 %in
 ; CHECK:         .save {r11, lr}
 ; CHECK-NEXT:    push {r11, lr}
 ; CHECK-NEXT:    lsl r2, r2, #1
-; CHECK-NEXT:    vld1.16 {d16, d17}, [r0:64], r2
+; CHECK-NEXT:    vld1.16 {d16, d17}, [r0], r2
 ; CHECK-NEXT:    vmov lr, r12, d16
 ; CHECK-NEXT:    str r0, [r1]
 ; CHECK-NEXT:    vmov r2, r3, d17
@@ -377,7 +407,7 @@ define %struct.uint16x4x2_t @test_vld1_u16_x2_post_reg(ptr %a, ptr %ptr, i32 %in
 
 define %struct.uint16x4x3_t @test_vld1_u16_x3_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u16_x3_post_imm:
-; CHECK:         vld1.16 {d16, d17, d18}, [r1:64]!
+; CHECK:         vld1.16 {d16, d17, d18}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
@@ -392,7 +422,7 @@ define %struct.uint16x4x3_t @test_vld1_u16_x3_post_imm(ptr %a, ptr %ptr) nounwin
 define %struct.uint16x4x3_t @test_vld1_u16_x3_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vld1_u16_x3_post_reg:
 ; CHECK:         lsl r3, r3, #1
-; CHECK-NEXT:    vld1.16 {d16, d17, d18}, [r1:64], r3
+; CHECK-NEXT:    vld1.16 {d16, d17, d18}, [r1], r3
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
@@ -406,7 +436,7 @@ define %struct.uint16x4x3_t @test_vld1_u16_x3_post_reg(ptr %a, ptr %ptr, i32 %in
 
 define %struct.uint16x4x4_t @test_vld1_u16_x4_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u16_x4_post_imm:
-; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1:256]!
+; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
@@ -422,7 +452,7 @@ define %struct.uint16x4x4_t @test_vld1_u16_x4_post_imm(ptr %a, ptr %ptr) nounwin
 define %struct.uint16x4x4_t @test_vld1_u16_x4_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vld1_u16_x4_post_reg:
 ; CHECK:         lsl r3, r3, #1
-; CHECK-NEXT:    vld1.16 {d16, d17, d18, d19}, [r1:256], r3
+; CHECK-NEXT:    vld1.16 {d16, d17, d18, d19}, [r1], r3
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
@@ -439,7 +469,7 @@ define %struct.uint32x2x2_t @test_vld1_u32_x2_post_imm(ptr %a, ptr %ptr) nounwin
 ; CHECK-LABEL: test_vld1_u32_x2_post_imm:
 ; CHECK:         .save {r11, lr}
 ; CHECK-NEXT:    push {r11, lr}
-; CHECK-NEXT:    vld1.32 {d16, d17}, [r0:64]!
+; CHECK-NEXT:    vld1.32 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vmov lr, r12, d16
 ; CHECK-NEXT:    str r0, [r1]
 ; CHECK-NEXT:    vmov r2, r3, d17
@@ -457,7 +487,7 @@ define %struct.uint32x2x2_t @test_vld1_u32_x2_post_reg(ptr %a, ptr %ptr, i32 %in
 ; CHECK:         .save {r11, lr}
 ; CHECK-NEXT:    push {r11, lr}
 ; CHECK-NEXT:    lsl r2, r2, #2
-; CHECK-NEXT:    vld1.32 {d16, d17}, [r0:64], r2
+; CHECK-NEXT:    vld1.32 {d16, d17}, [r0], r2
 ; CHECK-NEXT:    vmov lr, r12, d16
 ; CHECK-NEXT:    str r0, [r1]
 ; CHECK-NEXT:    vmov r2, r3, d17
@@ -472,7 +502,7 @@ define %struct.uint32x2x2_t @test_vld1_u32_x2_post_reg(ptr %a, ptr %ptr, i32 %in
 
 define %struct.uint32x2x3_t @test_vld1_u32_x3_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u32_x3_post_imm:
-; CHECK:         vld1.32 {d16, d17, d18}, [r1:64]!
+; CHECK:         vld1.32 {d16, d17, d18}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
@@ -487,7 +517,7 @@ define %struct.uint32x2x3_t @test_vld1_u32_x3_post_imm(ptr %a, ptr %ptr) nounwin
 define %struct.uint32x2x3_t @test_vld1_u32_x3_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vld1_u32_x3_post_reg:
 ; CHECK:         lsl r3, r3, #2
-; CHECK-NEXT:    vld1.32 {d16, d17, d18}, [r1:64], r3
+; CHECK-NEXT:    vld1.32 {d16, d17, d18}, [r1], r3
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
@@ -501,7 +531,7 @@ define %struct.uint32x2x3_t @test_vld1_u32_x3_post_reg(ptr %a, ptr %ptr, i32 %in
 
 define %struct.uint32x2x4_t @test_vld1_u32_x4_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u32_x4_post_imm:
-; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1:256]!
+; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
@@ -517,7 +547,7 @@ define %struct.uint32x2x4_t @test_vld1_u32_x4_post_imm(ptr %a, ptr %ptr) nounwin
 define %struct.uint32x2x4_t @test_vld1_u32_x4_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vld1_u32_x4_post_reg:
 ; CHECK:         lsl r3, r3, #2
-; CHECK-NEXT:    vld1.32 {d16, d17, d18, d19}, [r1:256], r3
+; CHECK-NEXT:    vld1.32 {d16, d17, d18, d19}, [r1], r3
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
@@ -534,7 +564,7 @@ define %struct.uint64x1x2_t @test_vld1_u64_x2_post_imm(ptr %a, ptr %ptr) nounwin
 ; CHECK-LABEL: test_vld1_u64_x2_post_imm:
 ; CHECK:         .save {r11, lr}
 ; CHECK-NEXT:    push {r11, lr}
-; CHECK-NEXT:    vld1.64 {d16, d17}, [r0:64]!
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vmov lr, r12, d16
 ; CHECK-NEXT:    str r0, [r1]
 ; CHECK-NEXT:    vmov r2, r3, d17
@@ -552,7 +582,7 @@ define %struct.uint64x1x2_t @test_vld1_u64_x2_post_reg(ptr %a, ptr %ptr, i32 %in
 ; CHECK:         .save {r11, lr}
 ; CHECK-NEXT:    push {r11, lr}
 ; CHECK-NEXT:    lsl r2, r2, #3
-; CHECK-NEXT:    vld1.64 {d16, d17}, [r0:64], r2
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r0], r2
 ; CHECK-NEXT:    vmov lr, r12, d16
 ; CHECK-NEXT:    str r0, [r1]
 ; CHECK-NEXT:    vmov r2, r3, d17
@@ -567,7 +597,7 @@ define %struct.uint64x1x2_t @test_vld1_u64_x2_post_reg(ptr %a, ptr %ptr, i32 %in
 
 define %struct.uint64x1x3_t @test_vld1_u64_x3_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u64_x3_post_imm:
-; CHECK:         vld1.64 {d16, d17, d18}, [r1:64]!
+; CHECK:         vld1.64 {d16, d17, d18}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
@@ -582,7 +612,7 @@ define %struct.uint64x1x3_t @test_vld1_u64_x3_post_imm(ptr %a, ptr %ptr) nounwin
 define %struct.uint64x1x3_t @test_vld1_u64_x3_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vld1_u64_x3_post_reg:
 ; CHECK:         lsl r3, r3, #3
-; CHECK-NEXT:    vld1.64 {d16, d17, d18}, [r1:64], r3
+; CHECK-NEXT:    vld1.64 {d16, d17, d18}, [r1], r3
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
@@ -596,7 +626,7 @@ define %struct.uint64x1x3_t @test_vld1_u64_x3_post_reg(ptr %a, ptr %ptr, i32 %in
 
 define %struct.uint64x1x4_t @test_vld1_u64_x4_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u64_x4_post_imm:
-; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:256]!
+; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
@@ -612,7 +642,7 @@ define %struct.uint64x1x4_t @test_vld1_u64_x4_post_imm(ptr %a, ptr %ptr) nounwin
 define %struct.uint64x1x4_t @test_vld1_u64_x4_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vld1_u64_x4_post_reg:
 ; CHECK:         lsl r3, r3, #3
-; CHECK-NEXT:    vld1.64 {d16, d17, d18, d19}, [r1:256], r3
+; CHECK-NEXT:    vld1.64 {d16, d17, d18, d19}, [r1], r3
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
@@ -629,7 +659,7 @@ define %struct.uint8x8x2_t @test_vld1_u8_x2_post_imm(ptr %a, ptr %ptr) nounwind
 ; CHECK-LABEL: test_vld1_u8_x2_post_imm:
 ; CHECK:         .save {r11, lr}
 ; CHECK-NEXT:    push {r11, lr}
-; CHECK-NEXT:    vld1.8 {d16, d17}, [r0:64]!
+; CHECK-NEXT:    vld1.8 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vmov lr, r12, d16
 ; CHECK-NEXT:    str r0, [r1]
 ; CHECK-NEXT:    vmov r2, r3, d17
@@ -646,7 +676,7 @@ define %struct.uint8x8x2_t @test_vld1_u8_x2_post_reg(ptr %a, ptr %ptr, i32 %inc)
 ; CHECK-LABEL: test_vld1_u8_x2_post_reg:
 ; CHECK:         .save {r11, lr}
 ; CHECK-NEXT:    push {r11, lr}
-; CHECK-NEXT:    vld1.8 {d16, d17}, [r0:64], r2
+; CHECK-NEXT:    vld1.8 {d16, d17}, [r0], r2
 ; CHECK-NEXT:    vmov lr, r12, d16
 ; CHECK-NEXT:    str r0, [r1]
 ; CHECK-NEXT:    vmov r2, r3, d17
@@ -661,7 +691,7 @@ define %struct.uint8x8x2_t @test_vld1_u8_x2_post_reg(ptr %a, ptr %ptr, i32 %inc)
 
 define %struct.uint8x8x3_t @test_vld1_u8_x3_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u8_x3_post_imm:
-; CHECK:         vld1.8 {d16, d17, d18}, [r1:64]!
+; CHECK:         vld1.8 {d16, d17, d18}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
@@ -675,7 +705,7 @@ define %struct.uint8x8x3_t @test_vld1_u8_x3_post_imm(ptr %a, ptr %ptr) nounwind
 
 define %struct.uint8x8x3_t @test_vld1_u8_x3_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vld1_u8_x3_post_reg:
-; CHECK:         vld1.8 {d16, d17, d18}, [r1:64], r3
+; CHECK:         vld1.8 {d16, d17, d18}, [r1], r3
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
@@ -689,7 +719,7 @@ define %struct.uint8x8x3_t @test_vld1_u8_x3_post_reg(ptr %a, ptr %ptr, i32 %inc)
 
 define %struct.uint8x8x4_t @test_vld1_u8_x4_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u8_x4_post_imm:
-; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1:256]!
+; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
@@ -704,7 +734,7 @@ define %struct.uint8x8x4_t @test_vld1_u8_x4_post_imm(ptr %a, ptr %ptr) nounwind
 
 define %struct.uint8x8x4_t @test_vld1_u8_x4_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vld1_u8_x4_post_reg:
-; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1:256], r3
+; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1], r3
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
 ; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
@@ -719,7 +749,7 @@ define %struct.uint8x8x4_t @test_vld1_u8_x4_post_reg(ptr %a, ptr %ptr, i32 %inc)
 
 define %struct.uint16x8x2_t @test_vld1q_u16_x2_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u16_x2_post_imm:
-; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1:256]!
+; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
@@ -732,8 +762,8 @@ define %struct.uint16x8x2_t @test_vld1q_u16_x2_post_imm(ptr %a, ptr %ptr) nounwi
 
 define %struct.uint16x8x3_t @test_vld1q_u16_x3_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u16_x3_post_imm:
-; CHECK:         vld1.16 {d16, d17, d18}, [r1:64]!
-; CHECK-NEXT:    vld1.16 {d19, d20, d21}, [r1:64]!
+; CHECK:         vld1.16 {d16, d17, d18}, [r1]!
+; CHECK-NEXT:    vld1.16 {d19, d20, d21}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.16 {d18, d19}, [r0]!
@@ -747,8 +777,8 @@ define %struct.uint16x8x3_t @test_vld1q_u16_x3_post_imm(ptr %a, ptr %ptr) nounwi
 
 define %struct.uint16x8x4_t @test_vld1q_u16_x4_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u16_x4_post_imm:
-; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1:256]!
-; CHECK-NEXT:    vld1.16 {d20, d21, d22, d23}, [r1:256]!
+; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1]!
+; CHECK-NEXT:    vld1.16 {d20, d21, d22, d23}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.16 {d18, d19}, [r0]!
@@ -763,7 +793,7 @@ define %struct.uint16x8x4_t @test_vld1q_u16_x4_post_imm(ptr %a, ptr %ptr) nounwi
 
 define %struct.uint32x4x2_t @test_vld1q_u32_x2_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u32_x2_post_imm:
-; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1:256]!
+; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
@@ -776,8 +806,8 @@ define %struct.uint32x4x2_t @test_vld1q_u32_x2_post_imm(ptr %a, ptr %ptr) nounwi
 
 define %struct.uint32x4x3_t @test_vld1q_u32_x3_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u32_x3_post_imm:
-; CHECK:         vld1.32 {d16, d17, d18}, [r1:64]!
-; CHECK-NEXT:    vld1.32 {d19, d20, d21}, [r1:64]!
+; CHECK:         vld1.32 {d16, d17, d18}, [r1]!
+; CHECK-NEXT:    vld1.32 {d19, d20, d21}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.32 {d18, d19}, [r0]!
@@ -791,8 +821,8 @@ define %struct.uint32x4x3_t @test_vld1q_u32_x3_post_imm(ptr %a, ptr %ptr) nounwi
 
 define %struct.uint32x4x4_t @test_vld1q_u32_x4_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u32_x4_post_imm:
-; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1:256]!
-; CHECK-NEXT:    vld1.32 {d20, d21, d22, d23}, [r1:256]!
+; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1]!
+; CHECK-NEXT:    vld1.32 {d20, d21, d22, d23}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.32 {d18, d19}, [r0]!
@@ -807,7 +837,7 @@ define %struct.uint32x4x4_t @test_vld1q_u32_x4_post_imm(ptr %a, ptr %ptr) nounwi
 
 define %struct.uint64x2x2_t @test_vld1q_u64_x2_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u64_x2_post_imm:
-; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:256]!
+; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
@@ -820,8 +850,8 @@ define %struct.uint64x2x2_t @test_vld1q_u64_x2_post_imm(ptr %a, ptr %ptr) nounwi
 
 define %struct.uint64x2x3_t @test_vld1q_u64_x3_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u64_x3_post_imm:
-; CHECK:         vld1.64 {d16, d17, d18}, [r1:64]!
-; CHECK-NEXT:    vld1.64 {d19, d20, d21}, [r1:64]!
+; CHECK:         vld1.64 {d16, d17, d18}, [r1]!
+; CHECK-NEXT:    vld1.64 {d19, d20, d21}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]!
@@ -835,8 +865,8 @@ define %struct.uint64x2x3_t @test_vld1q_u64_x3_post_imm(ptr %a, ptr %ptr) nounwi
 
 define %struct.uint64x2x4_t @test_vld1q_u64_x4_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u64_x4_post_imm:
-; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:256]!
-; CHECK-NEXT:    vld1.64 {d20, d21, d22, d23}, [r1:256]!
+; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1]!
+; CHECK-NEXT:    vld1.64 {d20, d21, d22, d23}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]!
@@ -851,7 +881,7 @@ define %struct.uint64x2x4_t @test_vld1q_u64_x4_post_imm(ptr %a, ptr %ptr) nounwi
 
 define %struct.uint8x16x2_t @test_vld1q_u8_x2_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u8_x2_post_imm:
-; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1:256]!
+; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
@@ -864,8 +894,8 @@ define %struct.uint8x16x2_t @test_vld1q_u8_x2_post_imm(ptr %a, ptr %ptr) nounwin
 
 define %struct.uint8x16x3_t @test_vld1q_u8_x3_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u8_x3_post_imm:
-; CHECK:         vld1.8 {d16, d17, d18}, [r1:64]!
-; CHECK-NEXT:    vld1.8 {d19, d20, d21}, [r1:64]!
+; CHECK:         vld1.8 {d16, d17, d18}, [r1]!
+; CHECK-NEXT:    vld1.8 {d19, d20, d21}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.8 {d18, d19}, [r0]!
@@ -879,8 +909,8 @@ define %struct.uint8x16x3_t @test_vld1q_u8_x3_post_imm(ptr %a, ptr %ptr) nounwin
 
 define %struct.uint8x16x4_t @test_vld1q_u8_x4_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u8_x4_post_imm:
-; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1:256]!
-; CHECK-NEXT:    vld1.8 {d20, d21, d22, d23}, [r1:256]!
+; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1]!
+; CHECK-NEXT:    vld1.8 {d20, d21, d22, d23}, [r1]!
 ; CHECK-NEXT:    str r1, [r2]
 ; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
 ; CHECK-NEXT:    vst1.8 {d18, d19}, [r0]!
diff --git a/llvm/test/CodeGen/ARM/arm-vst1.ll b/llvm/test/CodeGen/ARM/arm-vst1.ll
index 7dacd8b0b99f98..5d0a7e9614ce99 100644
--- a/llvm/test/CodeGen/ARM/arm-vst1.ll
+++ b/llvm/test/CodeGen/ARM/arm-vst1.ll
@@ -92,7 +92,7 @@ declare void @llvm.arm.neon.vst1x4.p0.v16i8(ptr nocapture, <16 x i8>, <16 x i8>,
 
 define arm_aapcs_vfpcc void @test_vst1_u16_x2(ptr %a, %struct.uint16x4x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u16_x2:
-; CHECK:         vst1.16 {d0, d1}, [r0:64]
+; CHECK:         vst1.16 {d0, d1}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0
@@ -101,9 +101,42 @@ entry:
   ret void
 }
 
+define arm_aapcs_vfpcc void @test_vst1_u16_x2_align8(ptr %a, %struct.uint16x4x2_t %b) nounwind {
+; CHECK-LABEL: test_vst1_u16_x2_align8:
+; CHECK:         vst1.16 {d0, d1}, [r0:64]
+; CHECK-NEXT:    bx lr
+entry:
+  %b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0
+  %b1 = extractvalue %struct.uint16x4x2_t %b, 0, 1
+  tail call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr align 8 %a, <4 x i16> %b0, <4 x i16> %b1)
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vst1_u16_x2_align16(ptr %a, %struct.uint16x4x2_t %b) nounwind {
+; CHECK-LABEL: test_vst1_u16_x2_align16:
+; CHECK:         vst1.16 {d0, d1}, [r0:128]
+; CHECK-NEXT:    bx lr
+entry:
+  %b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0
+  %b1 = extractvalue %struct.uint16x4x2_t %b, 0, 1
+  tail call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr align 16 %a, <4 x i16> %b0, <4 x i16> %b1)
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vst1_u16_x2_align32(ptr %a, %struct.uint16x4x2_t %b) nounwind {
+; CHECK-LABEL: test_vst1_u16_x2_align32:
+; CHECK:         vst1.16 {d0, d1}, [r0:128]
+; CHECK-NEXT:    bx lr
+entry:
+  %b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0
+  %b1 = extractvalue %struct.uint16x4x2_t %b, 0, 1
+  tail call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr align 32 %a, <4 x i16> %b0, <4 x i16> %b1)
+  ret void
+}
+
 define arm_aapcs_vfpcc void @test_vst1_u16_x3(ptr %a, %struct.uint16x4x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u16_x3:
-; CHECK:         vst1.16 {d0, d1, d2}, [r0:64]
+; CHECK:         vst1.16 {d0, d1, d2}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x4x3_t %b, 0, 0
@@ -115,7 +148,7 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1_u16_x4(ptr %a, %struct.uint16x4x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u16_x4:
-; CHECK:         vst1.16 {d0, d1, d2, d3}, [r0:256]
+; CHECK:         vst1.16 {d0, d1, d2, d3}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x4x4_t %b, 0, 0
@@ -128,7 +161,7 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1_u32_x2(ptr %a, %struct.uint32x2x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u32_x2:
-; CHECK:         vst1.32 {d0, d1}, [r0:64]
+; CHECK:         vst1.32 {d0, d1}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x2x2_t %b, 0, 0
@@ -139,7 +172,7 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1_u32_x3(ptr %a, %struct.uint32x2x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u32_x3:
-; CHECK:         vst1.32 {d0, d1, d2}, [r0:64]
+; CHECK:         vst1.32 {d0, d1, d2}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x2x3_t %b, 0, 0
@@ -151,7 +184,7 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1_u32_x4(ptr %a, %struct.uint32x2x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u32_x4:
-; CHECK:         vst1.32 {d0, d1, d2, d3}, [r0:256]
+; CHECK:         vst1.32 {d0, d1, d2, d3}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x2x4_t %b, 0, 0
@@ -164,7 +197,7 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1_u64_x2(ptr %a, %struct.uint64x1x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u64_x2:
-; CHECK:         vst1.64 {d0, d1}, [r0:64]
+; CHECK:         vst1.64 {d0, d1}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x1x2_t %b, 0, 0
@@ -175,7 +208,7 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1_u64_x3(ptr %a, %struct.uint64x1x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u64_x3:
-; CHECK:         vst1.64 {d0, d1, d2}, [r0:64]
+; CHECK:         vst1.64 {d0, d1, d2}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x1x3_t %b, 0, 0
@@ -187,7 +220,7 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1_u64_x4(ptr %a, %struct.uint64x1x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u64_x4:
-; CHECK:         vst1.64 {d0, d1, d2, d3}, [r0:256]
+; CHECK:         vst1.64 {d0, d1, d2, d3}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x1x4_t %b, 0, 0
@@ -200,7 +233,7 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1_u8_x2(ptr %a, %struct.uint8x8x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u8_x2:
-; CHECK:         vst1.8 {d0, d1}, [r0:64]
+; CHECK:         vst1.8 {d0, d1}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x8x2_t %b, 0, 0
@@ -211,7 +244,7 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1_u8_x3(ptr %a, %struct.uint8x8x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u8_x3:
-; CHECK:         vst1.8 {d0, d1, d2}, [r0:64]
+; CHECK:         vst1.8 {d0, d1, d2}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x8x3_t %b, 0, 0
@@ -223,7 +256,7 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1_u8_x4(ptr %a, %struct.uint8x8x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u8_x4:
-; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0:256]
+; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x8x4_t %b, 0, 0
@@ -236,7 +269,7 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1q_u16_x2(ptr %a, %struct.uint16x8x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u16_x2:
-; CHECK:         vst1.16 {d0, d1, d2, d3}, [r0:256]
+; CHECK:         vst1.16 {d0, d1, d2, d3}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x8x2_t %b, 0, 0
@@ -247,8 +280,8 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1q_u16_x3(ptr %a, %struct.uint16x8x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u16_x3:
-; CHECK:         vst1.16 {d0, d1, d2}, [r0:64]!
-; CHECK-NEXT:    vst1.16 {d3, d4, d5}, [r0:64]
+; CHECK:         vst1.16 {d0, d1, d2}, [r0]!
+; CHECK-NEXT:    vst1.16 {d3, d4, d5}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x8x3_t %b, 0, 0
@@ -260,8 +293,8 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1q_u16_x4(ptr %a, %struct.uint16x8x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u16_x4:
-; CHECK:         vst1.16 {d0, d1, d2, d3}, [r0:256]!
-; CHECK-NEXT:    vst1.16 {d4, d5, d6, d7}, [r0:256]
+; CHECK:         vst1.16 {d0, d1, d2, d3}, [r0]!
+; CHECK-NEXT:    vst1.16 {d4, d5, d6, d7}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x8x4_t %b, 0, 0
@@ -274,7 +307,7 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1q_u32_x2(ptr %a, %struct.uint32x4x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u32_x2:
-; CHECK:         vst1.32 {d0, d1, d2, d3}, [r0:256]
+; CHECK:         vst1.32 {d0, d1, d2, d3}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x4x2_t %b, 0, 0
@@ -285,8 +318,8 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1q_u32_x3(ptr %a, %struct.uint32x4x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u32_x3:
-; CHECK:         vst1.32 {d0, d1, d2}, [r0:64]!
-; CHECK-NEXT:    vst1.32 {d3, d4, d5}, [r0:64]
+; CHECK:         vst1.32 {d0, d1, d2}, [r0]!
+; CHECK-NEXT:    vst1.32 {d3, d4, d5}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x4x3_t %b, 0, 0
@@ -298,8 +331,8 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1q_u32_x4(ptr %a, %struct.uint32x4x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u32_x4:
-; CHECK:         vst1.32 {d0, d1, d2, d3}, [r0:256]!
-; CHECK-NEXT:    vst1.32 {d4, d5, d6, d7}, [r0:256]
+; CHECK:         vst1.32 {d0, d1, d2, d3}, [r0]!
+; CHECK-NEXT:    vst1.32 {d4, d5, d6, d7}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x4x4_t %b, 0, 0
@@ -312,7 +345,7 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1q_u64_x2(ptr %a, %struct.uint64x2x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u64_x2:
-; CHECK:         vst1.64 {d0, d1, d2, d3}, [r0:256]
+; CHECK:         vst1.64 {d0, d1, d2, d3}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x2x2_t %b, 0, 0
@@ -323,8 +356,8 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1q_u64_x3(ptr %a, %struct.uint64x2x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u64_x3:
-; CHECK:         vst1.64 {d0, d1, d2}, [r0:64]!
-; CHECK-NEXT:    vst1.64 {d3, d4, d5}, [r0:64]
+; CHECK:         vst1.64 {d0, d1, d2}, [r0]!
+; CHECK-NEXT:    vst1.64 {d3, d4, d5}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x2x3_t %b, 0, 0
@@ -336,8 +369,8 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1q_u64_x4(ptr %a, %struct.uint64x2x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u64_x4:
-; CHECK:         vst1.64 {d0, d1, d2, d3}, [r0:256]!
-; CHECK-NEXT:    vst1.64 {d4, d5, d6, d7}, [r0:256]
+; CHECK:         vst1.64 {d0, d1, d2, d3}, [r0]!
+; CHECK-NEXT:    vst1.64 {d4, d5, d6, d7}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x2x4_t %b, 0, 0
@@ -350,7 +383,7 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1q_u8_x2(ptr %a, %struct.uint8x16x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u8_x2:
-; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0:256]
+; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x16x2_t %b, 0, 0
@@ -361,8 +394,8 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1q_u8_x3(ptr %a, %struct.uint8x16x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u8_x3:
-; CHECK:         vst1.8 {d0, d1, d2}, [r0:64]!
-; CHECK-NEXT:    vst1.8 {d3, d4, d5}, [r0:64]
+; CHECK:         vst1.8 {d0, d1, d2}, [r0]!
+; CHECK-NEXT:    vst1.8 {d3, d4, d5}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x16x3_t %b, 0, 0
@@ -374,8 +407,8 @@ entry:
 
 define arm_aapcs_vfpcc void @test_vst1q_u8_x4(ptr %a, %struct.uint8x16x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u8_x4:
-; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0:256]!
-; CHECK-NEXT:    vst1.8 {d4, d5, d6, d7}, [r0:256]
+; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0]!
+; CHECK-NEXT:    vst1.8 {d4, d5, d6, d7}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x16x4_t %b, 0, 0
@@ -390,7 +423,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1_u8_x2_post_imm(ptr %a, %struct.uint8x8x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u8_x2_post_imm:
-; CHECK:         vst1.8 {d0, d1}, [r0:64]!
+; CHECK:         vst1.8 {d0, d1}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x8x2_t %b, 0, 0
@@ -402,7 +435,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1_u8_x2_post_reg(ptr %a, %struct.uint8x8x2_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1_u8_x2_post_reg:
-; CHECK:         vst1.8 {d0, d1}, [r0:64], r1
+; CHECK:         vst1.8 {d0, d1}, [r0], r1
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x8x2_t %b, 0, 0
@@ -414,7 +447,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1_u16_x2_post_imm(ptr %a, %struct.uint16x4x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u16_x2_post_imm:
-; CHECK:         vst1.16 {d0, d1}, [r0:64]!
+; CHECK:         vst1.16 {d0, d1}, [r0]!
 ; CHECK-NEXT:    bx lr
   %b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0
   %b1 = extractvalue %struct.uint16x4x2_t %b, 0, 1
@@ -426,7 +459,7 @@ define arm_aapcs_vfpcc ptr @test_vst1_u16_x2_post_imm(ptr %a, %struct.uint16x4x2
 define arm_aapcs_vfpcc ptr @test_vst1_u16_x2_post_reg(ptr %a, %struct.uint16x4x2_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1_u16_x2_post_reg:
 ; CHECK:         lsl r1, r1, #1
-; CHECK-NEXT:    vst1.16 {d0, d1}, [r0:64], r1
+; CHECK-NEXT:    vst1.16 {d0, d1}, [r0], r1
 ; CHECK-NEXT:    bx lr
   %b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0
   %b1 = extractvalue %struct.uint16x4x2_t %b, 0, 1
@@ -437,7 +470,7 @@ define arm_aapcs_vfpcc ptr @test_vst1_u16_x2_post_reg(ptr %a, %struct.uint16x4x2
 
 define arm_aapcs_vfpcc ptr @test_vst1_u32_x2_post_imm(ptr %a, %struct.uint32x2x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u32_x2_post_imm:
-; CHECK:         vst1.32 {d0, d1}, [r0:64]!
+; CHECK:         vst1.32 {d0, d1}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x2x2_t %b, 0, 0
@@ -450,7 +483,7 @@ entry:
 define arm_aapcs_vfpcc ptr @test_vst1_u32_x2_post_reg(ptr %a, %struct.uint32x2x2_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1_u32_x2_post_reg:
 ; CHECK:         lsl r1, r1, #2
-; CHECK-NEXT:    vst1.32 {d0, d1}, [r0:64], r1
+; CHECK-NEXT:    vst1.32 {d0, d1}, [r0], r1
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x2x2_t %b, 0, 0
@@ -462,7 +495,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1_u64_x2_post_imm(ptr %a, %struct.uint64x1x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u64_x2_post_imm:
-; CHECK:         vst1.64 {d0, d1}, [r0:64]!
+; CHECK:         vst1.64 {d0, d1}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x1x2_t %b, 0, 0
@@ -475,7 +508,7 @@ entry:
 define arm_aapcs_vfpcc ptr @test_vst1_u64_x2_post_reg(ptr %a, %struct.uint64x1x2_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1_u64_x2_post_reg:
 ; CHECK:         lsl r1, r1, #3
-; CHECK-NEXT:    vst1.64 {d0, d1}, [r0:64], r1
+; CHECK-NEXT:    vst1.64 {d0, d1}, [r0], r1
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x1x2_t %b, 0, 0
@@ -487,7 +520,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1q_u8_x2_post_imm(ptr %a, %struct.uint8x16x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u8_x2_post_imm:
-; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0:256]!
+; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x16x2_t %b, 0, 0
@@ -499,7 +532,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1q_u8_x2_post_reg(ptr %a, %struct.uint8x16x2_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1q_u8_x2_post_reg:
-; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0:256], r1
+; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0], r1
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x16x2_t %b, 0, 0
@@ -511,7 +544,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1q_u16_x2_post_imm(ptr %a, %struct.uint16x8x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u16_x2_post_imm:
-; CHECK:         vst1.16 {d0, d1, d2, d3}, [r0:256]!
+; CHECK:         vst1.16 {d0, d1, d2, d3}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x8x2_t %b, 0, 0
@@ -524,7 +557,7 @@ entry:
 define arm_aapcs_vfpcc ptr @test_vst1q_u16_x2_post_reg(ptr %a, %struct.uint16x8x2_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1q_u16_x2_post_reg:
 ; CHECK:         lsl r1, r1, #1
-; CHECK-NEXT:    vst1.16 {d0, d1, d2, d3}, [r0:256], r1
+; CHECK-NEXT:    vst1.16 {d0, d1, d2, d3}, [r0], r1
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x8x2_t %b, 0, 0
@@ -536,7 +569,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1q_u32_x2_post_imm(ptr %a, %struct.uint32x4x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u32_x2_post_imm:
-; CHECK:         vst1.32 {d0, d1, d2, d3}, [r0:256]!
+; CHECK:         vst1.32 {d0, d1, d2, d3}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x4x2_t %b, 0, 0
@@ -549,7 +582,7 @@ entry:
 define arm_aapcs_vfpcc ptr @test_vst1q_u32_x2_post_reg(ptr %a, %struct.uint32x4x2_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1q_u32_x2_post_reg:
 ; CHECK:         lsl r1, r1, #2
-; CHECK-NEXT:    vst1.32 {d0, d1, d2, d3}, [r0:256], r1
+; CHECK-NEXT:    vst1.32 {d0, d1, d2, d3}, [r0], r1
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x4x2_t %b, 0, 0
@@ -561,7 +594,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1q_u64_x2_post_imm(ptr %a, %struct.uint64x2x2_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u64_x2_post_imm:
-; CHECK:         vst1.64 {d0, d1, d2, d3}, [r0:256]!
+; CHECK:         vst1.64 {d0, d1, d2, d3}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x2x2_t %b, 0, 0
@@ -574,7 +607,7 @@ entry:
 define arm_aapcs_vfpcc ptr @test_vst1q_u64_x2_post_reg(ptr %a, %struct.uint64x2x2_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1q_u64_x2_post_reg:
 ; CHECK:         lsl r1, r1, #3
-; CHECK-NEXT:    vst1.64 {d0, d1, d2, d3}, [r0:256], r1
+; CHECK-NEXT:    vst1.64 {d0, d1, d2, d3}, [r0], r1
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x2x2_t %b, 0, 0
@@ -587,7 +620,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1_u8_x3_post_imm(ptr %a, %struct.uint8x8x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u8_x3_post_imm:
-; CHECK:         vst1.8 {d0, d1, d2}, [r0:64]!
+; CHECK:         vst1.8 {d0, d1, d2}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x8x3_t %b, 0, 0
@@ -600,7 +633,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1_u8_x3_post_reg(ptr %a, %struct.uint8x8x3_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1_u8_x3_post_reg:
-; CHECK:         vst1.8 {d0, d1, d2}, [r0:64], r1
+; CHECK:         vst1.8 {d0, d1, d2}, [r0], r1
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x8x3_t %b, 0, 0
@@ -613,7 +646,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1_u16_x3_post_imm(ptr %a, %struct.uint16x4x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u16_x3_post_imm:
-; CHECK:         vst1.16 {d0, d1, d2}, [r0:64]!
+; CHECK:         vst1.16 {d0, d1, d2}, [r0]!
 ; CHECK-NEXT:    bx lr
   %b0 = extractvalue %struct.uint16x4x3_t %b, 0, 0
   %b1 = extractvalue %struct.uint16x4x3_t %b, 0, 1
@@ -626,7 +659,7 @@ define arm_aapcs_vfpcc ptr @test_vst1_u16_x3_post_imm(ptr %a, %struct.uint16x4x3
 define arm_aapcs_vfpcc ptr @test_vst1_u16_x3_post_reg(ptr %a, %struct.uint16x4x3_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1_u16_x3_post_reg:
 ; CHECK:         lsl r1, r1, #1
-; CHECK-NEXT:    vst1.16 {d0, d1, d2}, [r0:64], r1
+; CHECK-NEXT:    vst1.16 {d0, d1, d2}, [r0], r1
 ; CHECK-NEXT:    bx lr
   %b0 = extractvalue %struct.uint16x4x3_t %b, 0, 0
   %b1 = extractvalue %struct.uint16x4x3_t %b, 0, 1
@@ -638,7 +671,7 @@ define arm_aapcs_vfpcc ptr @test_vst1_u16_x3_post_reg(ptr %a, %struct.uint16x4x3
 
 define arm_aapcs_vfpcc ptr @test_vst1_u32_x3_post_imm(ptr %a, %struct.uint32x2x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u32_x3_post_imm:
-; CHECK:         vst1.32 {d0, d1, d2}, [r0:64]!
+; CHECK:         vst1.32 {d0, d1, d2}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x2x3_t %b, 0, 0
@@ -652,7 +685,7 @@ entry:
 define arm_aapcs_vfpcc ptr @test_vst1_u32_x3_post_reg(ptr %a, %struct.uint32x2x3_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1_u32_x3_post_reg:
 ; CHECK:         lsl r1, r1, #2
-; CHECK-NEXT:    vst1.32 {d0, d1, d2}, [r0:64], r1
+; CHECK-NEXT:    vst1.32 {d0, d1, d2}, [r0], r1
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x2x3_t %b, 0, 0
@@ -665,7 +698,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1_u64_x3_post_imm(ptr %a, %struct.uint64x1x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u64_x3_post_imm:
-; CHECK:         vst1.64 {d0, d1, d2}, [r0:64]!
+; CHECK:         vst1.64 {d0, d1, d2}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x1x3_t %b, 0, 0
@@ -679,7 +712,7 @@ entry:
 define arm_aapcs_vfpcc ptr @test_vst1_u64_x3_post_reg(ptr %a, %struct.uint64x1x3_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1_u64_x3_post_reg:
 ; CHECK:         lsl r1, r1, #3
-; CHECK-NEXT:    vst1.64 {d0, d1, d2}, [r0:64], r1
+; CHECK-NEXT:    vst1.64 {d0, d1, d2}, [r0], r1
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x1x3_t %b, 0, 0
@@ -692,8 +725,8 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1q_u8_x3_post_imm(ptr %a, %struct.uint8x16x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u8_x3_post_imm:
-; CHECK:         vst1.8 {d0, d1, d2}, [r0:64]!
-; CHECK-NEXT:    vst1.8 {d3, d4, d5}, [r0:64]!
+; CHECK:         vst1.8 {d0, d1, d2}, [r0]!
+; CHECK-NEXT:    vst1.8 {d3, d4, d5}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x16x3_t %b, 0, 0
@@ -706,8 +739,8 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1q_u16_x3_post_imm(ptr %a, %struct.uint16x8x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u16_x3_post_imm:
-; CHECK:         vst1.16 {d0, d1, d2}, [r0:64]!
-; CHECK-NEXT:    vst1.16 {d3, d4, d5}, [r0:64]!
+; CHECK:         vst1.16 {d0, d1, d2}, [r0]!
+; CHECK-NEXT:    vst1.16 {d3, d4, d5}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x8x3_t %b, 0, 0
@@ -720,8 +753,8 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1q_u32_x3_post_imm(ptr %a, %struct.uint32x4x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u32_x3_post_imm:
-; CHECK:         vst1.32 {d0, d1, d2}, [r0:64]!
-; CHECK-NEXT:    vst1.32 {d3, d4, d5}, [r0:64]!
+; CHECK:         vst1.32 {d0, d1, d2}, [r0]!
+; CHECK-NEXT:    vst1.32 {d3, d4, d5}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x4x3_t %b, 0, 0
@@ -734,8 +767,8 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1q_u64_x3_post_imm(ptr %a, %struct.uint64x2x3_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u64_x3_post_imm:
-; CHECK:         vst1.64 {d0, d1, d2}, [r0:64]!
-; CHECK-NEXT:    vst1.64 {d3, d4, d5}, [r0:64]!
+; CHECK:         vst1.64 {d0, d1, d2}, [r0]!
+; CHECK-NEXT:    vst1.64 {d3, d4, d5}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x2x3_t %b, 0, 0
@@ -748,7 +781,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1_u8_x4_post_imm(ptr %a, %struct.uint8x8x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u8_x4_post_imm:
-; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0:256]!
+; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x8x4_t %b, 0, 0
@@ -762,7 +795,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1_u8_x4_post_reg(ptr %a, %struct.uint8x8x4_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1_u8_x4_post_reg:
-; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0:256], r1
+; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0], r1
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x8x4_t %b, 0, 0
@@ -776,7 +809,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1_u16_x4_post_imm(ptr %a, %struct.uint16x4x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u16_x4_post_imm:
-; CHECK:         vst1.16 {d0, d1, d2, d3}, [r0:256]!
+; CHECK:         vst1.16 {d0, d1, d2, d3}, [r0]!
 ; CHECK-NEXT:    bx lr
   %b0 = extractvalue %struct.uint16x4x4_t %b, 0, 0
   %b1 = extractvalue %struct.uint16x4x4_t %b, 0, 1
@@ -790,7 +823,7 @@ define arm_aapcs_vfpcc ptr @test_vst1_u16_x4_post_imm(ptr %a, %struct.uint16x4x4
 define arm_aapcs_vfpcc ptr @test_vst1_u16_x4_post_reg(ptr %a, %struct.uint16x4x4_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1_u16_x4_post_reg:
 ; CHECK:         lsl r1, r1, #1
-; CHECK-NEXT:    vst1.16 {d0, d1, d2, d3}, [r0:256], r1
+; CHECK-NEXT:    vst1.16 {d0, d1, d2, d3}, [r0], r1
 ; CHECK-NEXT:    bx lr
   %b0 = extractvalue %struct.uint16x4x4_t %b, 0, 0
   %b1 = extractvalue %struct.uint16x4x4_t %b, 0, 1
@@ -803,7 +836,7 @@ define arm_aapcs_vfpcc ptr @test_vst1_u16_x4_post_reg(ptr %a, %struct.uint16x4x4
 
 define arm_aapcs_vfpcc ptr @test_vst1_u32_x4_post_imm(ptr %a, %struct.uint32x2x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u32_x4_post_imm:
-; CHECK:         vst1.32 {d0, d1, d2, d3}, [r0:256]!
+; CHECK:         vst1.32 {d0, d1, d2, d3}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x2x4_t %b, 0, 0
@@ -818,7 +851,7 @@ entry:
 define arm_aapcs_vfpcc ptr @test_vst1_u32_x4_post_reg(ptr %a, %struct.uint32x2x4_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1_u32_x4_post_reg:
 ; CHECK:         lsl r1, r1, #2
-; CHECK-NEXT:    vst1.32 {d0, d1, d2, d3}, [r0:256], r1
+; CHECK-NEXT:    vst1.32 {d0, d1, d2, d3}, [r0], r1
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x2x4_t %b, 0, 0
@@ -832,7 +865,7 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1_u64_x4_post_imm(ptr %a, %struct.uint64x1x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1_u64_x4_post_imm:
-; CHECK:         vst1.64 {d0, d1, d2, d3}, [r0:256]!
+; CHECK:         vst1.64 {d0, d1, d2, d3}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x1x4_t %b, 0, 0
@@ -847,7 +880,7 @@ entry:
 define arm_aapcs_vfpcc ptr @test_vst1_u64_x4_post_reg(ptr %a, %struct.uint64x1x4_t %b, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vst1_u64_x4_post_reg:
 ; CHECK:         lsl r1, r1, #3
-; CHECK-NEXT:    vst1.64 {d0, d1, d2, d3}, [r0:256], r1
+; CHECK-NEXT:    vst1.64 {d0, d1, d2, d3}, [r0], r1
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x1x4_t %b, 0, 0
@@ -861,8 +894,8 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1q_u8_x4_post_imm(ptr %a, %struct.uint8x16x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u8_x4_post_imm:
-; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0:256]!
-; CHECK-NEXT:    vst1.8 {d4, d5, d6, d7}, [r0:256]!
+; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0]!
+; CHECK-NEXT:    vst1.8 {d4, d5, d6, d7}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x16x4_t %b, 0, 0
@@ -876,8 +909,8 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1q_u16_x4_post_imm(ptr %a, %struct.uint16x8x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u16_x4_post_imm:
-; CHECK:         vst1.16 {d0, d1, d2, d3}, [r0:256]!
-; CHECK-NEXT:    vst1.16 {d4, d5, d6, d7}, [r0:256]!
+; CHECK:         vst1.16 {d0, d1, d2, d3}, [r0]!
+; CHECK-NEXT:    vst1.16 {d4, d5, d6, d7}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x8x4_t %b, 0, 0
@@ -891,8 +924,8 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1q_u32_x4_post_imm(ptr %a, %struct.uint32x4x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u32_x4_post_imm:
-; CHECK:         vst1.32 {d0, d1, d2, d3}, [r0:256]!
-; CHECK-NEXT:    vst1.32 {d4, d5, d6, d7}, [r0:256]!
+; CHECK:         vst1.32 {d0, d1, d2, d3}, [r0]!
+; CHECK-NEXT:    vst1.32 {d4, d5, d6, d7}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x4x4_t %b, 0, 0
@@ -906,8 +939,8 @@ entry:
 
 define arm_aapcs_vfpcc ptr @test_vst1q_u64_x4_post_imm(ptr %a, %struct.uint64x2x4_t %b) nounwind {
 ; CHECK-LABEL: test_vst1q_u64_x4_post_imm:
-; CHECK:         vst1.64 {d0, d1, d2, d3}, [r0:256]!
-; CHECK-NEXT:    vst1.64 {d4, d5, d6, d7}, [r0:256]!
+; CHECK:         vst1.64 {d0, d1, d2, d3}, [r0]!
+; CHECK-NEXT:    vst1.64 {d4, d5, d6, d7}, [r0]!
 ; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x2x4_t %b, 0, 0
diff --git a/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll b/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll
index e49128f53b1157..846cf239e8987e 100644
--- a/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll
+++ b/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll
@@ -60,7 +60,7 @@ entry:
 define arm_aapcs_vfpcc [2 x <2 x i32>] @test_vld1_bf16_x2(ptr %ptr) {
 ; CHECK-LABEL: test_vld1_bf16_x2:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vld1.16 {d0, d1}, [r0:64]
+; CHECK-NEXT:    vld1.16 {d0, d1}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %vld1xN = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x2.v4bf16.p0(ptr %ptr)
@@ -76,7 +76,7 @@ entry:
 define arm_aapcs_vfpcc [2 x <4 x i32>] @test_vld1q_bf16_x2(ptr %ptr) {
 ; CHECK-LABEL: test_vld1q_bf16_x2:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vld1.16 {d0, d1, d2, d3}, [r0:256]
+; CHECK-NEXT:    vld1.16 {d0, d1, d2, d3}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %vld1xN = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x2.v8bf16.p0(ptr %ptr)
@@ -92,7 +92,7 @@ entry:
 define arm_aapcs_vfpcc [3 x <2 x i32>] @test_vld1_bf16_x3(ptr %ptr) {
 ; CHECK-LABEL: test_vld1_bf16_x3:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vld1.16 {d0, d1, d2}, [r0:64]
+; CHECK-NEXT:    vld1.16 {d0, d1, d2}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %vld1xN = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x3.v4bf16.p0(ptr %ptr)
@@ -111,8 +111,8 @@ entry:
 define arm_aapcs_vfpcc [3 x <4 x i32>] @test_vld1q_bf16_x3(ptr %ptr) {
 ; CHECK-LABEL: test_vld1q_bf16_x3:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vld1.16 {d0, d1, d2}, [r0:64]!
-; CHECK-NEXT:    vld1.16 {d3, d4, d5}, [r0:64]
+; CHECK-NEXT:    vld1.16 {d0, d1, d2}, [r0]!
+; CHECK-NEXT:    vld1.16 {d3, d4, d5}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %vld1xN = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x3.v8bf16.p0(ptr %ptr)
@@ -131,7 +131,7 @@ entry:
 define arm_aapcs_vfpcc [4 x <2 x i32>] @test_vld1_bf16_x4(ptr %ptr) {
 ; CHECK-LABEL: test_vld1_bf16_x4:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vld1.16 {d0, d1, d2, d3}, [r0:256]
+; CHECK-NEXT:    vld1.16 {d0, d1, d2, d3}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %vld1xN = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x4.v4bf16.p0(ptr %ptr)
@@ -153,8 +153,8 @@ entry:
 define arm_aapcs_vfpcc [4 x <4 x i32>] @test_vld1q_bf16_x4(ptr %ptr) {
 ; CHECK-LABEL: test_vld1q_bf16_x4:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vld1.16 {d0, d1, d2, d3}, [r0:256]!
-; CHECK-NEXT:    vld1.16 {d4, d5, d6, d7}, [r0:256]
+; CHECK-NEXT:    vld1.16 {d0, d1, d2, d3}, [r0]!
+; CHECK-NEXT:    vld1.16 {d4, d5, d6, d7}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %vld1xN = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x4.v8bf16.p0(ptr %ptr)
@@ -635,7 +635,7 @@ define arm_aapcs_vfpcc void @test_vst1_bf16_x2(ptr nocapture %ptr, [2 x <2 x i32
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    @ kill: def $d1 killed $d1 killed $q0 def $q0
 ; CHECK-NEXT:    @ kill: def $d0 killed $d0 killed $q0 def $q0
-; CHECK-NEXT:    vst1.16 {d0, d1}, [r0:64]
+; CHECK-NEXT:    vst1.16 {d0, d1}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %val.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %val.coerce, 0
@@ -651,7 +651,7 @@ define arm_aapcs_vfpcc void @test_vst1q_bf16_x2(ptr nocapture %ptr, [2 x <4 x i3
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    @ kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    @ kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    vst1.16 {d0, d1, d2, d3}, [r0:256]
+; CHECK-NEXT:    vst1.16 {d0, d1, d2, d3}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %val.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %val.coerce, 0
@@ -668,7 +668,7 @@ define arm_aapcs_vfpcc void @test_vst1_bf16_x3(ptr nocapture %ptr, [3 x <2 x i32
 ; CHECK-NEXT:    @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    vst1.16 {d0, d1, d2}, [r0:64]
+; CHECK-NEXT:    vst1.16 {d0, d1, d2}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %val.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %val.coerce, 0
@@ -687,8 +687,8 @@ define arm_aapcs_vfpcc void @test_vst1q_bf16_x3(ptr nocapture %ptr, [3 x <4 x i3
 ; CHECK-NEXT:    @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT:    vst1.16 {d0, d1, d2}, [r0:64]!
-; CHECK-NEXT:    vst1.16 {d3, d4, d5}, [r0:64]
+; CHECK-NEXT:    vst1.16 {d0, d1, d2}, [r0]!
+; CHECK-NEXT:    vst1.16 {d3, d4, d5}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %val.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %val.coerce, 0
@@ -708,7 +708,7 @@ define arm_aapcs_vfpcc void @test_vst1_bf16_x4(ptr nocapture %ptr, [4 x <2 x i32
 ; CHECK-NEXT:    @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT:    vst1.16 {d0, d1, d2, d3}, [r0:256]
+; CHECK-NEXT:    vst1.16 {d0, d1, d2, d3}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %val.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %val.coerce, 0
@@ -730,8 +730,8 @@ define arm_aapcs_vfpcc void @test_vst1q_bf16_x4(ptr nocapture %ptr, [4 x <4 x i3
 ; CHECK-NEXT:    @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT:    vst1.16 {d0, d1, d2, d3}, [r0:256]!
-; CHECK-NEXT:    vst1.16 {d4, d5, d6, d7}, [r0:256]
+; CHECK-NEXT:    vst1.16 {d0, d1, d2, d3}, [r0]!
+; CHECK-NEXT:    vst1.16 {d4, d5, d6, d7}, [r0]
 ; CHECK-NEXT:    bx lr
 entry:
   %val.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %val.coerce, 0
diff --git a/llvm/test/Transforms/InstCombine/ARM/neon-intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/neon-intrinsics.ll
index 68dda24b537471..b0435f2a276f1f 100644
--- a/llvm/test/Transforms/InstCombine/ARM/neon-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/ARM/neon-intrinsics.ll
@@ -29,7 +29,7 @@ define void @test() {
 define { <4 x i16>, <4 x i16> } @test_vld1x2_no_align(ptr align 16 %a) {
 ; CHECK-LABEL: define { <4 x i16>, <4 x i16> } @test_vld1x2_no_align(
 ; CHECK-SAME: ptr align 16 [[A:%.*]]) {
-; CHECK-NEXT:    [[TMP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr [[A]])
+; CHECK-NEXT:    [[TMP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 16 [[A]])
 ; CHECK-NEXT:    ret { <4 x i16>, <4 x i16> } [[TMP]]
 ;
   %tmp = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr %a)
@@ -39,7 +39,7 @@ define { <4 x i16>, <4 x i16> } @test_vld1x2_no_align(ptr align 16 %a) {
 define { <4 x i16>, <4 x i16> } @test_vld1x2_lower_align(ptr align 16 %a) {
 ; CHECK-LABEL: define { <4 x i16>, <4 x i16> } @test_vld1x2_lower_align(
 ; CHECK-SAME: ptr align 16 [[A:%.*]]) {
-; CHECK-NEXT:    [[TMP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 8 [[A]])
+; CHECK-NEXT:    [[TMP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 16 [[A]])
 ; CHECK-NEXT:    ret { <4 x i16>, <4 x i16> } [[TMP]]
 ;
   %tmp = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 8 %a)
@@ -59,7 +59,7 @@ define { <4 x i16>, <4 x i16> } @test_vld1x2_higher_align(ptr align 8 %a) {
 define void @test_vst1x2_no_align(ptr align 16 %a, <4 x i16> %b0, <4 x i16> %b1) {
 ; CHECK-LABEL: define void @test_vst1x2_no_align(
 ; CHECK-SAME: ptr align 16 [[A:%.*]], <4 x i16> [[B0:%.*]], <4 x i16> [[B1:%.*]]) {
-; CHECK-NEXT:    call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr [[A]], <4 x i16> [[B0]], <4 x i16> [[B1]])
+; CHECK-NEXT:    call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr align 16 [[A]], <4 x i16> [[B0]], <4 x i16> [[B1]])
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr %a, <4 x i16> %b0, <4 x i16> %b1)



More information about the llvm-commits mailing list