[llvm] 224112f - [ARM] Regenerate test checks (NFC)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 2 05:15:16 PDT 2024


Author: Nikita Popov
Date: 2024-09-02T14:15:03+02:00
New Revision: 224112f8334f944ecb354bff8d57d85274037b85

URL: https://github.com/llvm/llvm-project/commit/224112f8334f944ecb354bff8d57d85274037b85
DIFF: https://github.com/llvm/llvm-project/commit/224112f8334f944ecb354bff8d57d85274037b85.diff

LOG: [ARM] Regenerate test checks (NFC)

Added: 
    

Modified: 
    llvm/test/CodeGen/ARM/arm-vld1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/ARM/arm-vld1.ll b/llvm/test/CodeGen/ARM/arm-vld1.ll
index 1ea48e908abbda..78b0b92013c397 100644
--- a/llvm/test/CodeGen/ARM/arm-vld1.ll
+++ b/llvm/test/CodeGen/ARM/arm-vld1.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -verify-machineinstrs \
 ; RUN:     -asm-verbose=false | FileCheck %s
 
@@ -65,178 +66,274 @@ declare %struct.uint8x16x2_t @llvm.arm.neon.vld1x2.v16i8.p0(ptr) nounwind readon
 declare %struct.uint8x16x3_t @llvm.arm.neon.vld1x3.v16i8.p0(ptr) nounwind readonly
 declare %struct.uint8x16x4_t @llvm.arm.neon.vld1x4.v16i8.p0(ptr) nounwind readonly
 
-; CHECK-LABEL: test_vld1_u16_x2
-; CHECK: vld1.16 {d16, d17}, [r0:64]
 define %struct.uint16x4x2_t @test_vld1_u16_x2(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1_u16_x2:
+; CHECK:         vld1.16 {d16, d17}, [r0:64]
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld1x2.v4i16.p0(ptr %a)
   ret %struct.uint16x4x2_t %tmp
 }
 
-; CHECK-LABEL: test_vld1_u16_x3
-; CHECK: vld1.16 {d16, d17, d18}, [r1:64]
 define %struct.uint16x4x3_t @test_vld1_u16_x3(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1_u16_x3:
+; CHECK:         vld1.16 {d16, d17, d18}, [r1:64]
+; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
+; CHECK-NEXT:    vstr d18, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld1x3.v4i16.p0(ptr %a)
   ret %struct.uint16x4x3_t %tmp
 }
 
-; CHECK-LABEL: test_vld1_u16_x4
-; CHECK: vld1.16 {d16, d17, d18, d19}, [r1:256]
 define %struct.uint16x4x4_t @test_vld1_u16_x4(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1_u16_x4:
+; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1:256]
+; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
+; CHECK-NEXT:    vst1.16 {d18}, [r0:64]!
+; CHECK-NEXT:    vstr d19, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld1x4.v4i16.p0(ptr %a)
   ret %struct.uint16x4x4_t %tmp
 }
 
-; CHECK-LABEL: test_vld1_u32_x2
-; CHECK: vld1.32 {d16, d17}, [r0:64]
 define %struct.uint32x2x2_t @test_vld1_u32_x2(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1_u32_x2:
+; CHECK:         vld1.32 {d16, d17}, [r0:64]
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld1x2.v2i32.p0(ptr %a)
   ret %struct.uint32x2x2_t %tmp
 }
 
-; CHECK-LABEL: test_vld1_u32_x3
-; CHECK: vld1.32 {d16, d17, d18}, [r1:64]
 define %struct.uint32x2x3_t @test_vld1_u32_x3(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1_u32_x3:
+; CHECK:         vld1.32 {d16, d17, d18}, [r1:64]
+; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
+; CHECK-NEXT:    vstr d18, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld1x3.v2i32.p0(ptr %a)
   ret %struct.uint32x2x3_t %tmp
 }
 
-; CHECK-LABEL: test_vld1_u32_x4
-; CHECK: vld1.32 {d16, d17, d18, d19}, [r1:256]
 define %struct.uint32x2x4_t @test_vld1_u32_x4(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1_u32_x4:
+; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1:256]
+; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
+; CHECK-NEXT:    vst1.32 {d18}, [r0:64]!
+; CHECK-NEXT:    vstr d19, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld1x4.v2i32.p0(ptr %a)
   ret %struct.uint32x2x4_t %tmp
 }
 
-; CHECK-LABEL: test_vld1_u64_x2
-; CHECK: vld1.64 {d16, d17}, [r0:64]
 define %struct.uint64x1x2_t @test_vld1_u64_x2(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1_u64_x2:
+; CHECK:         vld1.64 {d16, d17}, [r0:64]
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld1x2.v1i64.p0(ptr %a)
   ret %struct.uint64x1x2_t %tmp
 }
 
-; CHECK-LABEL: test_vld1_u64_x3
-; CHECK: vld1.64 {d16, d17, d18}, [r1:64]
 define %struct.uint64x1x3_t @test_vld1_u64_x3(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1_u64_x3:
+; CHECK:         vld1.64 {d16, d17, d18}, [r1:64]
+; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
+; CHECK-NEXT:    vstr d18, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld1x3.v1i64.p0(ptr %a)
   ret %struct.uint64x1x3_t %tmp
 }
 
-; CHECK-LABEL: test_vld1_u64_x4
-; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]
 define %struct.uint64x1x4_t @test_vld1_u64_x4(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1_u64_x4:
+; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:256]
+; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
+; CHECK-NEXT:    vst1.64 {d18}, [r0:64]!
+; CHECK-NEXT:    vstr d19, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld1x4.v1i64.p0(ptr %a)
   ret %struct.uint64x1x4_t %tmp
 }
 
-; CHECK-LABEL: test_vld1_u8_x2
-; CHECK: vld1.8 {d16, d17}, [r0:64]
 define %struct.uint8x8x2_t @test_vld1_u8_x2(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1_u8_x2:
+; CHECK:         vld1.8 {d16, d17}, [r0:64]
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld1x2.v8i8.p0(ptr %a)
   ret %struct.uint8x8x2_t %tmp
 }
 
-; CHECK-LABEL: test_vld1_u8_x3
-; CHECK: vld1.8 {d16, d17, d18}, [r1:64]
 define %struct.uint8x8x3_t @test_vld1_u8_x3(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1_u8_x3:
+; CHECK:         vld1.8 {d16, d17, d18}, [r1:64]
+; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
+; CHECK-NEXT:    vstr d18, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld1x3.v8i8.p0(ptr %a)
   ret %struct.uint8x8x3_t %tmp
 }
 
-; CHECK-LABEL: test_vld1_u8_x4
-; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256]
 define %struct.uint8x8x4_t @test_vld1_u8_x4(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1_u8_x4:
+; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1:256]
+; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
+; CHECK-NEXT:    vst1.8 {d18}, [r0:64]!
+; CHECK-NEXT:    vstr d19, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld1x4.v8i8.p0(ptr %a)
   ret %struct.uint8x8x4_t %tmp
 }
 
-; CHECK-LABEL: test_vld1q_u16_x2
-; CHECK: vld1.16 {d16, d17, d18, d19}, [r1:256]
 define %struct.uint16x8x2_t @test_vld1q_u16_x2(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1q_u16_x2:
+; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1:256]
+; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld1x2.v8i16.p0(ptr %a)
   ret %struct.uint16x8x2_t %tmp
 }
 
-; CHECK-LABEL: test_vld1q_u16_x3
-; CHECK: vld1.16 {d16, d17, d18}, [r1:64]!
-; CHECK: vld1.16 {d19, d20, d21}, [r1:64]
 define %struct.uint16x8x3_t @test_vld1q_u16_x3(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1q_u16_x3:
+; CHECK:         vld1.16 {d16, d17, d18}, [r1:64]!
+; CHECK-NEXT:    vld1.16 {d19, d20, d21}, [r1:64]
+; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.16 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld1x3.v8i16.p0(ptr %a)
   ret %struct.uint16x8x3_t %tmp
 }
 
-; CHECK-LABEL: test_vld1q_u16_x4
-; CHECK: vld1.16 {d16, d17, d18, d19}, [r1:256]!
-; CHECK: vld1.16 {d20, d21, d22, d23}, [r1:256]
 define %struct.uint16x8x4_t @test_vld1q_u16_x4(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1q_u16_x4:
+; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT:    vld1.16 {d20, d21, d22, d23}, [r1:256]
+; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.16 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.16 {d20, d21}, [r0]!
+; CHECK-NEXT:    vst1.64 {d22, d23}, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld1x4.v8i16.p0(ptr %a)
   ret %struct.uint16x8x4_t %tmp
 }
 
-; CHECK-LABEL: test_vld1q_u32_x2
-; CHECK: vld1.32 {d16, d17, d18, d19}, [r1:256]
 define %struct.uint32x4x2_t @test_vld1q_u32_x2(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1q_u32_x2:
+; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1:256]
+; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld1x2.v4i32.p0(ptr %a)
   ret %struct.uint32x4x2_t %tmp
 }
 
-; CHECK-LABEL: test_vld1q_u32_x3
-; CHECK: vld1.32 {d16, d17, d18}, [r1:64]!
-; CHECK: vld1.32 {d19, d20, d21}, [r1:64]
 define %struct.uint32x4x3_t @test_vld1q_u32_x3(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1q_u32_x3:
+; CHECK:         vld1.32 {d16, d17, d18}, [r1:64]!
+; CHECK-NEXT:    vld1.32 {d19, d20, d21}, [r1:64]
+; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.32 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld1x3.v4i32.p0(ptr %a)
   ret %struct.uint32x4x3_t %tmp
 }
 
-; CHECK-LABEL: test_vld1q_u32_x4
-; CHECK: vld1.32 {d16, d17, d18, d19}, [r1:256]!
-; CHECK: vld1.32 {d20, d21, d22, d23}, [r1:256]
 define %struct.uint32x4x4_t @test_vld1q_u32_x4(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1q_u32_x4:
+; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT:    vld1.32 {d20, d21, d22, d23}, [r1:256]
+; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.32 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.32 {d20, d21}, [r0]!
+; CHECK-NEXT:    vst1.64 {d22, d23}, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld1x4.v4i32.p0(ptr %a)
   ret %struct.uint32x4x4_t %tmp
 }
 
-; CHECK-LABEL: test_vld1q_u64_x2
-; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]
 define %struct.uint64x2x2_t @test_vld1q_u64_x2(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1q_u64_x2:
+; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:256]
+; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint64x2x2_t @llvm.arm.neon.vld1x2.v2i64.p0(ptr %a)
   ret %struct.uint64x2x2_t %tmp
 }
 
-; CHECK-LABEL: test_vld1q_u64_x3
-; CHECK: vld1.64 {d16, d17, d18}, [r1:64]!
-; CHECK: vld1.64 {d19, d20, d21}, [r1:64]
 define %struct.uint64x2x3_t @test_vld1q_u64_x3(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1q_u64_x3:
+; CHECK:         vld1.64 {d16, d17, d18}, [r1:64]!
+; CHECK-NEXT:    vld1.64 {d19, d20, d21}, [r1:64]
+; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint64x2x3_t @llvm.arm.neon.vld1x3.v2i64.p0(ptr %a)
   ret %struct.uint64x2x3_t %tmp
 }
 
-; CHECK-LABEL: test_vld1q_u64_x4
-; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]!
-; CHECK: vld1.64 {d20, d21, d22, d23}, [r1:256]
 define %struct.uint64x2x4_t @test_vld1q_u64_x4(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1q_u64_x4:
+; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT:    vld1.64 {d20, d21, d22, d23}, [r1:256]
+; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]!
+; CHECK-NEXT:    vst1.64 {d22, d23}, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint64x2x4_t @llvm.arm.neon.vld1x4.v2i64.p0(ptr %a)
   ret %struct.uint64x2x4_t %tmp
 }
 
-; CHECK-LABEL: test_vld1q_u8_x2
-; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256]
 define %struct.uint8x16x2_t @test_vld1q_u8_x2(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1q_u8_x2:
+; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1:256]
+; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld1x2.v16i8.p0(ptr %a)
   ret %struct.uint8x16x2_t %tmp
 }
 
-; CHECK-LABEL: test_vld1q_u8_x3
-; CHECK: vld1.8 {d16, d17, d18}, [r1:64]!
-; CHECK: vld1.8 {d19, d20, d21}, [r1:64]
 define %struct.uint8x16x3_t @test_vld1q_u8_x3(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1q_u8_x3:
+; CHECK:         vld1.8 {d16, d17, d18}, [r1:64]!
+; CHECK-NEXT:    vld1.8 {d19, d20, d21}, [r1:64]
+; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.8 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld1x3.v16i8.p0(ptr %a)
   ret %struct.uint8x16x3_t %tmp
 }
 
-; CHECK-LABEL: test_vld1q_u8_x4
-; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256]!
-; CHECK: vld1.8 {d20, d21, d22, d23}, [r1:256]
 define %struct.uint8x16x4_t @test_vld1q_u8_x4(ptr %a) nounwind {
+; CHECK-LABEL: test_vld1q_u8_x4:
+; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT:    vld1.8 {d20, d21, d22, d23}, [r1:256]
+; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.8 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.8 {d20, d21}, [r0]!
+; CHECK-NEXT:    vst1.64 {d22, d23}, [r0]
+; CHECK-NEXT:    bx lr
   %tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld1x4.v16i8.p0(ptr %a)
   ret %struct.uint8x16x4_t %tmp
 }
@@ -245,7 +342,15 @@ define %struct.uint8x16x4_t @test_vld1q_u8_x4(ptr %a) nounwind {
 
 define %struct.uint16x4x2_t @test_vld1_u16_x2_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u16_x2_post_imm:
-; CHECK:         vld1.16 {d16, d17}, [r0:64]!
+; CHECK:         .save {r11, lr}
+; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    vld1.16 {d16, d17}, [r0:64]!
+; CHECK-NEXT:    vmov lr, r12, d16
+; CHECK-NEXT:    str r0, [r1]
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    mov r0, lr
+; CHECK-NEXT:    mov r1, r12
+; CHECK-NEXT:    pop {r11, pc}
   %ld = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld1x2.v4i16.p0(ptr %a)
   %tmp = getelementptr i16, ptr %a, i32 8
   store ptr %tmp, ptr %ptr
@@ -254,8 +359,16 @@ define %struct.uint16x4x2_t @test_vld1_u16_x2_post_imm(ptr %a, ptr %ptr) nounwin
 
 define %struct.uint16x4x2_t @test_vld1_u16_x2_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vld1_u16_x2_post_reg:
-; CHECK:         lsl r2, r2, #1
+; CHECK:         .save {r11, lr}
+; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    lsl r2, r2, #1
 ; CHECK-NEXT:    vld1.16 {d16, d17}, [r0:64], r2
+; CHECK-NEXT:    vmov lr, r12, d16
+; CHECK-NEXT:    str r0, [r1]
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    mov r0, lr
+; CHECK-NEXT:    mov r1, r12
+; CHECK-NEXT:    pop {r11, pc}
   %ld = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld1x2.v4i16.p0(ptr %a)
   %tmp = getelementptr i16, ptr %a, i32 %inc
   store ptr %tmp, ptr %ptr
@@ -265,6 +378,11 @@ define %struct.uint16x4x2_t @test_vld1_u16_x2_post_reg(ptr %a, ptr %ptr, i32 %in
 define %struct.uint16x4x3_t @test_vld1_u16_x3_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u16_x3_post_imm:
 ; CHECK:         vld1.16 {d16, d17, d18}, [r1:64]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
+; CHECK-NEXT:    vstr d18, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld1x3.v4i16.p0(ptr %a)
   %tmp = getelementptr i16, ptr %a, i32 12
   store ptr %tmp, ptr %ptr
@@ -275,6 +393,11 @@ define %struct.uint16x4x3_t @test_vld1_u16_x3_post_reg(ptr %a, ptr %ptr, i32 %in
 ; CHECK-LABEL: test_vld1_u16_x3_post_reg:
 ; CHECK:         lsl r3, r3, #1
 ; CHECK-NEXT:    vld1.16 {d16, d17, d18}, [r1:64], r3
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
+; CHECK-NEXT:    vstr d18, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld1x3.v4i16.p0(ptr %a)
   %tmp = getelementptr i16, ptr %a, i32 %inc
   store ptr %tmp, ptr %ptr
@@ -284,6 +407,12 @@ define %struct.uint16x4x3_t @test_vld1_u16_x3_post_reg(ptr %a, ptr %ptr, i32 %in
 define %struct.uint16x4x4_t @test_vld1_u16_x4_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u16_x4_post_imm:
 ; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
+; CHECK-NEXT:    vst1.16 {d18}, [r0:64]!
+; CHECK-NEXT:    vstr d19, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld1x4.v4i16.p0(ptr %a)
   %tmp = getelementptr i16, ptr %a, i32 16
   store ptr %tmp, ptr %ptr
@@ -294,6 +423,12 @@ define %struct.uint16x4x4_t @test_vld1_u16_x4_post_reg(ptr %a, ptr %ptr, i32 %in
 ; CHECK-LABEL: test_vld1_u16_x4_post_reg:
 ; CHECK:         lsl r3, r3, #1
 ; CHECK-NEXT:    vld1.16 {d16, d17, d18, d19}, [r1:256], r3
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
+; CHECK-NEXT:    vst1.16 {d18}, [r0:64]!
+; CHECK-NEXT:    vstr d19, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld1x4.v4i16.p0(ptr %a)
   %tmp = getelementptr i16, ptr %a, i32 %inc
   store ptr %tmp, ptr %ptr
@@ -302,7 +437,15 @@ define %struct.uint16x4x4_t @test_vld1_u16_x4_post_reg(ptr %a, ptr %ptr, i32 %in
 
 define %struct.uint32x2x2_t @test_vld1_u32_x2_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u32_x2_post_imm:
-; CHECK:         vld1.32 {d16, d17}, [r0:64]!
+; CHECK:         .save {r11, lr}
+; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    vld1.32 {d16, d17}, [r0:64]!
+; CHECK-NEXT:    vmov lr, r12, d16
+; CHECK-NEXT:    str r0, [r1]
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    mov r0, lr
+; CHECK-NEXT:    mov r1, r12
+; CHECK-NEXT:    pop {r11, pc}
   %ld = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld1x2.v2i32.p0(ptr %a)
   %tmp = getelementptr i32, ptr %a, i32 4
   store ptr %tmp, ptr %ptr
@@ -311,8 +454,16 @@ define %struct.uint32x2x2_t @test_vld1_u32_x2_post_imm(ptr %a, ptr %ptr) nounwin
 
 define %struct.uint32x2x2_t @test_vld1_u32_x2_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vld1_u32_x2_post_reg:
-; CHECK:         lsl r2, r2, #2
+; CHECK:         .save {r11, lr}
+; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    lsl r2, r2, #2
 ; CHECK-NEXT:    vld1.32 {d16, d17}, [r0:64], r2
+; CHECK-NEXT:    vmov lr, r12, d16
+; CHECK-NEXT:    str r0, [r1]
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    mov r0, lr
+; CHECK-NEXT:    mov r1, r12
+; CHECK-NEXT:    pop {r11, pc}
   %ld = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld1x2.v2i32.p0(ptr %a)
   %tmp = getelementptr i32, ptr %a, i32 %inc
   store ptr %tmp, ptr %ptr
@@ -322,6 +473,11 @@ define %struct.uint32x2x2_t @test_vld1_u32_x2_post_reg(ptr %a, ptr %ptr, i32 %in
 define %struct.uint32x2x3_t @test_vld1_u32_x3_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u32_x3_post_imm:
 ; CHECK:         vld1.32 {d16, d17, d18}, [r1:64]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
+; CHECK-NEXT:    vstr d18, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld1x3.v2i32.p0(ptr %a)
   %tmp = getelementptr i32, ptr %a, i32 6
   store ptr %tmp, ptr %ptr
@@ -332,6 +488,11 @@ define %struct.uint32x2x3_t @test_vld1_u32_x3_post_reg(ptr %a, ptr %ptr, i32 %in
 ; CHECK-LABEL: test_vld1_u32_x3_post_reg:
 ; CHECK:         lsl r3, r3, #2
 ; CHECK-NEXT:    vld1.32 {d16, d17, d18}, [r1:64], r3
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
+; CHECK-NEXT:    vstr d18, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld1x3.v2i32.p0(ptr %a)
   %tmp = getelementptr i32, ptr %a, i32 %inc
   store ptr %tmp, ptr %ptr
@@ -341,6 +502,12 @@ define %struct.uint32x2x3_t @test_vld1_u32_x3_post_reg(ptr %a, ptr %ptr, i32 %in
 define %struct.uint32x2x4_t @test_vld1_u32_x4_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u32_x4_post_imm:
 ; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
+; CHECK-NEXT:    vst1.32 {d18}, [r0:64]!
+; CHECK-NEXT:    vstr d19, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld1x4.v2i32.p0(ptr %a)
   %tmp = getelementptr i32, ptr %a, i32 8
   store ptr %tmp, ptr %ptr
@@ -351,6 +518,12 @@ define %struct.uint32x2x4_t @test_vld1_u32_x4_post_reg(ptr %a, ptr %ptr, i32 %in
 ; CHECK-LABEL: test_vld1_u32_x4_post_reg:
 ; CHECK:         lsl r3, r3, #2
 ; CHECK-NEXT:    vld1.32 {d16, d17, d18, d19}, [r1:256], r3
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
+; CHECK-NEXT:    vst1.32 {d18}, [r0:64]!
+; CHECK-NEXT:    vstr d19, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld1x4.v2i32.p0(ptr %a)
   %tmp = getelementptr i32, ptr %a, i32 %inc
   store ptr %tmp, ptr %ptr
@@ -359,7 +532,15 @@ define %struct.uint32x2x4_t @test_vld1_u32_x4_post_reg(ptr %a, ptr %ptr, i32 %in
 
 define %struct.uint64x1x2_t @test_vld1_u64_x2_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u64_x2_post_imm:
-; CHECK:         vld1.64 {d16, d17}, [r0:64]!
+; CHECK:         .save {r11, lr}
+; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r0:64]!
+; CHECK-NEXT:    vmov lr, r12, d16
+; CHECK-NEXT:    str r0, [r1]
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    mov r0, lr
+; CHECK-NEXT:    mov r1, r12
+; CHECK-NEXT:    pop {r11, pc}
   %ld = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld1x2.v1i64.p0(ptr %a)
   %tmp = getelementptr i64, ptr %a, i32 2
   store ptr %tmp, ptr %ptr
@@ -368,8 +549,16 @@ define %struct.uint64x1x2_t @test_vld1_u64_x2_post_imm(ptr %a, ptr %ptr) nounwin
 
 define %struct.uint64x1x2_t @test_vld1_u64_x2_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vld1_u64_x2_post_reg:
-; CHECK:         lsl r2, r2, #3
+; CHECK:         .save {r11, lr}
+; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    lsl r2, r2, #3
 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r0:64], r2
+; CHECK-NEXT:    vmov lr, r12, d16
+; CHECK-NEXT:    str r0, [r1]
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    mov r0, lr
+; CHECK-NEXT:    mov r1, r12
+; CHECK-NEXT:    pop {r11, pc}
   %ld = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld1x2.v1i64.p0(ptr %a)
   %tmp = getelementptr i64, ptr %a, i32 %inc
   store ptr %tmp, ptr %ptr
@@ -379,6 +568,11 @@ define %struct.uint64x1x2_t @test_vld1_u64_x2_post_reg(ptr %a, ptr %ptr, i32 %in
 define %struct.uint64x1x3_t @test_vld1_u64_x3_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u64_x3_post_imm:
 ; CHECK:         vld1.64 {d16, d17, d18}, [r1:64]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
+; CHECK-NEXT:    vstr d18, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld1x3.v1i64.p0(ptr %a)
   %tmp = getelementptr i64, ptr %a, i32 3
   store ptr %tmp, ptr %ptr
@@ -389,6 +583,11 @@ define %struct.uint64x1x3_t @test_vld1_u64_x3_post_reg(ptr %a, ptr %ptr, i32 %in
 ; CHECK-LABEL: test_vld1_u64_x3_post_reg:
 ; CHECK:         lsl r3, r3, #3
 ; CHECK-NEXT:    vld1.64 {d16, d17, d18}, [r1:64], r3
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
+; CHECK-NEXT:    vstr d18, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld1x3.v1i64.p0(ptr %a)
   %tmp = getelementptr i64, ptr %a, i32 %inc
   store ptr %tmp, ptr %ptr
@@ -398,6 +597,12 @@ define %struct.uint64x1x3_t @test_vld1_u64_x3_post_reg(ptr %a, ptr %ptr, i32 %in
 define %struct.uint64x1x4_t @test_vld1_u64_x4_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u64_x4_post_imm:
 ; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
+; CHECK-NEXT:    vst1.64 {d18}, [r0:64]!
+; CHECK-NEXT:    vstr d19, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld1x4.v1i64.p0(ptr %a)
   %tmp = getelementptr i64, ptr %a, i32 4
   store ptr %tmp, ptr %ptr
@@ -408,6 +613,12 @@ define %struct.uint64x1x4_t @test_vld1_u64_x4_post_reg(ptr %a, ptr %ptr, i32 %in
 ; CHECK-LABEL: test_vld1_u64_x4_post_reg:
 ; CHECK:         lsl r3, r3, #3
 ; CHECK-NEXT:    vld1.64 {d16, d17, d18, d19}, [r1:256], r3
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
+; CHECK-NEXT:    vst1.64 {d18}, [r0:64]!
+; CHECK-NEXT:    vstr d19, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld1x4.v1i64.p0(ptr %a)
   %tmp = getelementptr i64, ptr %a, i32 %inc
   store ptr %tmp, ptr %ptr
@@ -416,7 +627,15 @@ define %struct.uint64x1x4_t @test_vld1_u64_x4_post_reg(ptr %a, ptr %ptr, i32 %in
 
 define %struct.uint8x8x2_t @test_vld1_u8_x2_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u8_x2_post_imm:
-; CHECK:         vld1.8 {d16, d17}, [r0:64]!
+; CHECK:         .save {r11, lr}
+; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    vld1.8 {d16, d17}, [r0:64]!
+; CHECK-NEXT:    vmov lr, r12, d16
+; CHECK-NEXT:    str r0, [r1]
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    mov r0, lr
+; CHECK-NEXT:    mov r1, r12
+; CHECK-NEXT:    pop {r11, pc}
   %ld = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld1x2.v8i8.p0(ptr %a)
   %tmp = getelementptr i8, ptr %a, i32 16
   store ptr %tmp, ptr %ptr
@@ -425,7 +644,15 @@ define %struct.uint8x8x2_t @test_vld1_u8_x2_post_imm(ptr %a, ptr %ptr) nounwind
 
 define %struct.uint8x8x2_t @test_vld1_u8_x2_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vld1_u8_x2_post_reg:
-; CHECK:         vld1.8 {d16, d17}, [r0:64], r2
+; CHECK:         .save {r11, lr}
+; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    vld1.8 {d16, d17}, [r0:64], r2
+; CHECK-NEXT:    vmov lr, r12, d16
+; CHECK-NEXT:    str r0, [r1]
+; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    mov r0, lr
+; CHECK-NEXT:    mov r1, r12
+; CHECK-NEXT:    pop {r11, pc}
   %ld = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld1x2.v8i8.p0(ptr %a)
   %tmp = getelementptr i8, ptr %a, i32 %inc
   store ptr %tmp, ptr %ptr
@@ -435,6 +662,11 @@ define %struct.uint8x8x2_t @test_vld1_u8_x2_post_reg(ptr %a, ptr %ptr, i32 %inc)
 define %struct.uint8x8x3_t @test_vld1_u8_x3_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u8_x3_post_imm:
 ; CHECK:         vld1.8 {d16, d17, d18}, [r1:64]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
+; CHECK-NEXT:    vstr d18, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld1x3.v8i8.p0(ptr %a)
   %tmp = getelementptr i8, ptr %a, i32 24
   store ptr %tmp, ptr %ptr
@@ -444,6 +676,11 @@ define %struct.uint8x8x3_t @test_vld1_u8_x3_post_imm(ptr %a, ptr %ptr) nounwind
 define %struct.uint8x8x3_t @test_vld1_u8_x3_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vld1_u8_x3_post_reg:
 ; CHECK:         vld1.8 {d16, d17, d18}, [r1:64], r3
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
+; CHECK-NEXT:    vstr d18, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld1x3.v8i8.p0(ptr %a)
   %tmp = getelementptr i8, ptr %a, i32 %inc
   store ptr %tmp, ptr %ptr
@@ -453,6 +690,12 @@ define %struct.uint8x8x3_t @test_vld1_u8_x3_post_reg(ptr %a, ptr %ptr, i32 %inc)
 define %struct.uint8x8x4_t @test_vld1_u8_x4_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1_u8_x4_post_imm:
 ; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
+; CHECK-NEXT:    vst1.8 {d18}, [r0:64]!
+; CHECK-NEXT:    vstr d19, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld1x4.v8i8.p0(ptr %a)
   %tmp = getelementptr i8, ptr %a, i32 32
   store ptr %tmp, ptr %ptr
@@ -462,6 +705,12 @@ define %struct.uint8x8x4_t @test_vld1_u8_x4_post_imm(ptr %a, ptr %ptr) nounwind
 define %struct.uint8x8x4_t @test_vld1_u8_x4_post_reg(ptr %a, ptr %ptr, i32 %inc) nounwind {
 ; CHECK-LABEL: test_vld1_u8_x4_post_reg:
 ; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1:256], r3
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
+; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
+; CHECK-NEXT:    vst1.8 {d18}, [r0:64]!
+; CHECK-NEXT:    vstr d19, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld1x4.v8i8.p0(ptr %a)
   %tmp = getelementptr i8, ptr %a, i32 %inc
   store ptr %tmp, ptr %ptr
@@ -471,6 +720,10 @@ define %struct.uint8x8x4_t @test_vld1_u8_x4_post_reg(ptr %a, ptr %ptr, i32 %inc)
 define %struct.uint16x8x2_t @test_vld1q_u16_x2_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u16_x2_post_imm:
 ; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld1x2.v8i16.p0(ptr %a)
   %tmp = getelementptr i16, ptr %a, i32 16
   store ptr %tmp, ptr %ptr
@@ -481,6 +734,11 @@ define %struct.uint16x8x3_t @test_vld1q_u16_x3_post_imm(ptr %a, ptr %ptr) nounwi
 ; CHECK-LABEL: test_vld1q_u16_x3_post_imm:
 ; CHECK:         vld1.16 {d16, d17, d18}, [r1:64]!
 ; CHECK-NEXT:    vld1.16 {d19, d20, d21}, [r1:64]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.16 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld1x3.v8i16.p0(ptr %a)
   %tmp = getelementptr i16, ptr %a, i32 24
   store ptr %tmp, ptr %ptr
@@ -491,6 +749,12 @@ define %struct.uint16x8x4_t @test_vld1q_u16_x4_post_imm(ptr %a, ptr %ptr) nounwi
 ; CHECK-LABEL: test_vld1q_u16_x4_post_imm:
 ; CHECK:         vld1.16 {d16, d17, d18, d19}, [r1:256]!
 ; CHECK-NEXT:    vld1.16 {d20, d21, d22, d23}, [r1:256]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.16 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.16 {d20, d21}, [r0]!
+; CHECK-NEXT:    vst1.64 {d22, d23}, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld1x4.v8i16.p0(ptr %a)
   %tmp = getelementptr i16, ptr %a, i32 32
   store ptr %tmp, ptr %ptr
@@ -500,6 +764,10 @@ define %struct.uint16x8x4_t @test_vld1q_u16_x4_post_imm(ptr %a, ptr %ptr) nounwi
 define %struct.uint32x4x2_t @test_vld1q_u32_x2_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u32_x2_post_imm:
 ; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld1x2.v4i32.p0(ptr %a)
   %tmp = getelementptr i32, ptr %a, i32 8
   store ptr %tmp, ptr %ptr
@@ -510,6 +778,11 @@ define %struct.uint32x4x3_t @test_vld1q_u32_x3_post_imm(ptr %a, ptr %ptr) nounwi
 ; CHECK-LABEL: test_vld1q_u32_x3_post_imm:
 ; CHECK:         vld1.32 {d16, d17, d18}, [r1:64]!
 ; CHECK-NEXT:    vld1.32 {d19, d20, d21}, [r1:64]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.32 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld1x3.v4i32.p0(ptr %a)
   %tmp = getelementptr i32, ptr %a, i32 12
   store ptr %tmp, ptr %ptr
@@ -520,6 +793,12 @@ define %struct.uint32x4x4_t @test_vld1q_u32_x4_post_imm(ptr %a, ptr %ptr) nounwi
 ; CHECK-LABEL: test_vld1q_u32_x4_post_imm:
 ; CHECK:         vld1.32 {d16, d17, d18, d19}, [r1:256]!
 ; CHECK-NEXT:    vld1.32 {d20, d21, d22, d23}, [r1:256]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.32 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.32 {d20, d21}, [r0]!
+; CHECK-NEXT:    vst1.64 {d22, d23}, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld1x4.v4i32.p0(ptr %a)
   %tmp = getelementptr i32, ptr %a, i32 16
   store ptr %tmp, ptr %ptr
@@ -529,6 +808,10 @@ define %struct.uint32x4x4_t @test_vld1q_u32_x4_post_imm(ptr %a, ptr %ptr) nounwi
 define %struct.uint64x2x2_t @test_vld1q_u64_x2_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u64_x2_post_imm:
 ; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint64x2x2_t @llvm.arm.neon.vld1x2.v2i64.p0(ptr %a)
   %tmp = getelementptr i64, ptr %a, i32 4
   store ptr %tmp, ptr %ptr
@@ -539,6 +822,11 @@ define %struct.uint64x2x3_t @test_vld1q_u64_x3_post_imm(ptr %a, ptr %ptr) nounwi
 ; CHECK-LABEL: test_vld1q_u64_x3_post_imm:
 ; CHECK:         vld1.64 {d16, d17, d18}, [r1:64]!
 ; CHECK-NEXT:    vld1.64 {d19, d20, d21}, [r1:64]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint64x2x3_t @llvm.arm.neon.vld1x3.v2i64.p0(ptr %a)
   %tmp = getelementptr i64, ptr %a, i32 6
   store ptr %tmp, ptr %ptr
@@ -549,6 +837,12 @@ define %struct.uint64x2x4_t @test_vld1q_u64_x4_post_imm(ptr %a, ptr %ptr) nounwi
 ; CHECK-LABEL: test_vld1q_u64_x4_post_imm:
 ; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:256]!
 ; CHECK-NEXT:    vld1.64 {d20, d21, d22, d23}, [r1:256]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]!
+; CHECK-NEXT:    vst1.64 {d22, d23}, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint64x2x4_t @llvm.arm.neon.vld1x4.v2i64.p0(ptr %a)
   %tmp = getelementptr i64, ptr %a, i32 8
   store ptr %tmp, ptr %ptr
@@ -558,6 +852,10 @@ define %struct.uint64x2x4_t @test_vld1q_u64_x4_post_imm(ptr %a, ptr %ptr) nounwi
 define %struct.uint8x16x2_t @test_vld1q_u8_x2_post_imm(ptr %a, ptr %ptr) nounwind {
 ; CHECK-LABEL: test_vld1q_u8_x2_post_imm:
 ; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld1x2.v16i8.p0(ptr %a)
   %tmp = getelementptr i8, ptr %a, i32 32
   store ptr %tmp, ptr %ptr
@@ -568,6 +866,11 @@ define %struct.uint8x16x3_t @test_vld1q_u8_x3_post_imm(ptr %a, ptr %ptr) nounwin
 ; CHECK-LABEL: test_vld1q_u8_x3_post_imm:
 ; CHECK:         vld1.8 {d16, d17, d18}, [r1:64]!
 ; CHECK-NEXT:    vld1.8 {d19, d20, d21}, [r1:64]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.8 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld1x3.v16i8.p0(ptr %a)
   %tmp = getelementptr i8, ptr %a, i32 48
   store ptr %tmp, ptr %ptr
@@ -578,6 +881,12 @@ define %struct.uint8x16x4_t @test_vld1q_u8_x4_post_imm(ptr %a, ptr %ptr) nounwin
 ; CHECK-LABEL: test_vld1q_u8_x4_post_imm:
 ; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1:256]!
 ; CHECK-NEXT:    vld1.8 {d20, d21, d22, d23}, [r1:256]!
+; CHECK-NEXT:    str r1, [r2]
+; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
+; CHECK-NEXT:    vst1.8 {d18, d19}, [r0]!
+; CHECK-NEXT:    vst1.8 {d20, d21}, [r0]!
+; CHECK-NEXT:    vst1.64 {d22, d23}, [r0]
+; CHECK-NEXT:    bx lr
   %ld = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld1x4.v16i8.p0(ptr %a)
   %tmp = getelementptr i8, ptr %a, i32 64
   store ptr %tmp, ptr %ptr


        


More information about the llvm-commits mailing list