[llvm] 7678603 - [ARM] Fix postinc of vst1xN

David Green via llvm-commits llvm-commits at lists.llvm.org
Sun May 9 13:58:07 PDT 2021


Author: David Green
Date: 2021-05-09T21:57:55+01:00
New Revision: 76786037c68163c48d7d829bb654de6c8298bbb0

URL: https://github.com/llvm/llvm-project/commit/76786037c68163c48d7d829bb654de6c8298bbb0
DIFF: https://github.com/llvm/llvm-project/commit/76786037c68163c48d7d829bb654de6c8298bbb0.diff

LOG: [ARM] Fix postinc of vst1xN

These nodes are not handled correctly by CombineBaseUpdate. For the
moment, similar to 5f1cad4d296a20025f0b mark them as unsupported.

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMISelLowering.cpp
    llvm/test/CodeGen/ARM/arm-vst1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index f1f3d4c6e895c..cae4b9babecb9 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -14641,6 +14641,9 @@ static SDValue CombineBaseUpdate(SDNode *N,
       case Intrinsic::arm_neon_vld1x2:
       case Intrinsic::arm_neon_vld1x3:
       case Intrinsic::arm_neon_vld1x4:
+      case Intrinsic::arm_neon_vst1x2:
+      case Intrinsic::arm_neon_vst1x3:
+      case Intrinsic::arm_neon_vst1x4:
       case Intrinsic::arm_neon_vld2dup:
       case Intrinsic::arm_neon_vld3dup:
       case Intrinsic::arm_neon_vld4dup:

diff  --git a/llvm/test/CodeGen/ARM/arm-vst1.ll b/llvm/test/CodeGen/ARM/arm-vst1.ll
index 3e8f6d76c3195..6c9c07cb7c4f6 100644
--- a/llvm/test/CodeGen/ARM/arm-vst1.ll
+++ b/llvm/test/CodeGen/ARM/arm-vst1.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi -verify-machineinstrs \
-; RUN:     -asm-verbose=false | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -verify-machineinstrs -asm-verbose=false | FileCheck %s
 
 ; %struct.uint16x4x2_t = type { <4 x i16>, <4 x i16> }
 ; %struct.uint16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
@@ -90,9 +90,10 @@ declare void @llvm.arm.neon.vst1x2.p0i8.v16i8(i8* nocapture, <16 x i8>, <16 x i8
 declare void @llvm.arm.neon.vst1x3.p0i8.v16i8(i8* nocapture, <16 x i8>, <16 x i8>, <16 x i8>) argmemonly nounwind
 declare void @llvm.arm.neon.vst1x4.p0i8.v16i8(i8* nocapture, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) argmemonly nounwind
 
-; CHECK-LABEL: test_vst1_u16_x2
-; CHECK: vst1.16 {d16, d17}, [r0:64]
-define void @test_vst1_u16_x2(i16* %a, %struct.uint16x4x2_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1_u16_x2(i16* %a, %struct.uint16x4x2_t %b) nounwind {
+; CHECK-LABEL: test_vst1_u16_x2:
+; CHECK:         vst1.16 {d0, d1}, [r0:64]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0
   %b1 = extractvalue %struct.uint16x4x2_t %b, 0, 1
@@ -100,9 +101,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1_u16_x3
-; CHECK: vst1.16 {d16, d17, d18}, [r0:64]
-define void @test_vst1_u16_x3(i16* %a, %struct.uint16x4x3_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1_u16_x3(i16* %a, %struct.uint16x4x3_t %b) nounwind {
+; CHECK-LABEL: test_vst1_u16_x3:
+; CHECK:         vst1.16 {d0, d1, d2}, [r0:64]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x4x3_t %b, 0, 0
   %b1 = extractvalue %struct.uint16x4x3_t %b, 0, 1
@@ -111,9 +113,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1_u16_x4
-; CHECK: vst1.16 {d16, d17, d18, d19}, [r0:256]
-define void @test_vst1_u16_x4(i16* %a, %struct.uint16x4x4_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1_u16_x4(i16* %a, %struct.uint16x4x4_t %b) nounwind {
+; CHECK-LABEL: test_vst1_u16_x4:
+; CHECK:         vst1.16 {d0, d1, d2, d3}, [r0:256]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x4x4_t %b, 0, 0
   %b1 = extractvalue %struct.uint16x4x4_t %b, 0, 1
@@ -123,9 +126,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1_u32_x2
-; CHECK: vst1.32 {d16, d17}, [r0:64]
-define void @test_vst1_u32_x2(i32* %a, %struct.uint32x2x2_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1_u32_x2(i32* %a, %struct.uint32x2x2_t %b) nounwind {
+; CHECK-LABEL: test_vst1_u32_x2:
+; CHECK:         vst1.32 {d0, d1}, [r0:64]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x2x2_t %b, 0, 0
   %b1 = extractvalue %struct.uint32x2x2_t %b, 0, 1
@@ -133,9 +137,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1_u32_x3
-; CHECK: vst1.32 {d16, d17, d18}, [r0:64]
-define void @test_vst1_u32_x3(i32* %a, %struct.uint32x2x3_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1_u32_x3(i32* %a, %struct.uint32x2x3_t %b) nounwind {
+; CHECK-LABEL: test_vst1_u32_x3:
+; CHECK:         vst1.32 {d0, d1, d2}, [r0:64]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x2x3_t %b, 0, 0
   %b1 = extractvalue %struct.uint32x2x3_t %b, 0, 1
@@ -144,9 +149,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1_u32_x4
-; CHECK: vst1.32 {d16, d17, d18, d19}, [r0:256]
-define void @test_vst1_u32_x4(i32* %a, %struct.uint32x2x4_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1_u32_x4(i32* %a, %struct.uint32x2x4_t %b) nounwind {
+; CHECK-LABEL: test_vst1_u32_x4:
+; CHECK:         vst1.32 {d0, d1, d2, d3}, [r0:256]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x2x4_t %b, 0, 0
   %b1 = extractvalue %struct.uint32x2x4_t %b, 0, 1
@@ -156,9 +162,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1_u64_x2
-; CHECK: vst1.64 {d16, d17}, [r0:64]
-define void @test_vst1_u64_x2(i64* %a, %struct.uint64x1x2_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1_u64_x2(i64* %a, %struct.uint64x1x2_t %b) nounwind {
+; CHECK-LABEL: test_vst1_u64_x2:
+; CHECK:         vst1.64 {d0, d1}, [r0:64]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x1x2_t %b, 0, 0
   %b1 = extractvalue %struct.uint64x1x2_t %b, 0, 1
@@ -166,9 +173,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1_u64_x3
-; CHECK: vst1.64 {d16, d17, d18}, [r0:64]
-define void @test_vst1_u64_x3(i64* %a, %struct.uint64x1x3_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1_u64_x3(i64* %a, %struct.uint64x1x3_t %b) nounwind {
+; CHECK-LABEL: test_vst1_u64_x3:
+; CHECK:         vst1.64 {d0, d1, d2}, [r0:64]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x1x3_t %b, 0, 0
   %b1 = extractvalue %struct.uint64x1x3_t %b, 0, 1
@@ -177,9 +185,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1_u64_x4
-; CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
-define void @test_vst1_u64_x4(i64* %a, %struct.uint64x1x4_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1_u64_x4(i64* %a, %struct.uint64x1x4_t %b) nounwind {
+; CHECK-LABEL: test_vst1_u64_x4:
+; CHECK:         vst1.64 {d0, d1, d2, d3}, [r0:256]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x1x4_t %b, 0, 0
   %b1 = extractvalue %struct.uint64x1x4_t %b, 0, 1
@@ -189,9 +198,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1_u8_x2
-; CHECK: vst1.8 {d16, d17}, [r0:64]
-define void @test_vst1_u8_x2(i8* %a, %struct.uint8x8x2_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1_u8_x2(i8* %a, %struct.uint8x8x2_t %b) nounwind {
+; CHECK-LABEL: test_vst1_u8_x2:
+; CHECK:         vst1.8 {d0, d1}, [r0:64]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x8x2_t %b, 0, 0
   %b1 = extractvalue %struct.uint8x8x2_t %b, 0, 1
@@ -199,9 +209,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1_u8_x3
-; CHECK: vst1.8 {d16, d17, d18}, [r0:64]
-define void @test_vst1_u8_x3(i8* %a, %struct.uint8x8x3_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1_u8_x3(i8* %a, %struct.uint8x8x3_t %b) nounwind {
+; CHECK-LABEL: test_vst1_u8_x3:
+; CHECK:         vst1.8 {d0, d1, d2}, [r0:64]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x8x3_t %b, 0, 0
   %b1 = extractvalue %struct.uint8x8x3_t %b, 0, 1
@@ -210,9 +221,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1_u8_x4
-; CHECK: vst1.8 {d16, d17, d18, d19}, [r0:256]
-define void @test_vst1_u8_x4(i8* %a, %struct.uint8x8x4_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1_u8_x4(i8* %a, %struct.uint8x8x4_t %b) nounwind {
+; CHECK-LABEL: test_vst1_u8_x4:
+; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0:256]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x8x4_t %b, 0, 0
   %b1 = extractvalue %struct.uint8x8x4_t %b, 0, 1
@@ -222,9 +234,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1q_u16_x2
-; CHECK: vst1.16 {d16, d17, d18, d19}, [r0:256]
-define void @test_vst1q_u16_x2(i16* %a, %struct.uint16x8x2_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1q_u16_x2(i16* %a, %struct.uint16x8x2_t %b) nounwind {
+; CHECK-LABEL: test_vst1q_u16_x2:
+; CHECK:         vst1.16 {d0, d1, d2, d3}, [r0:256]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x8x2_t %b, 0, 0
   %b1 = extractvalue %struct.uint16x8x2_t %b, 0, 1
@@ -232,10 +245,11 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1q_u16_x3
-; CHECK: vst1.16 {d16, d17, d18}, [r0:64]!
-; CHECK: vst1.16 {d19, d20, d21}, [r0:64]
-define void @test_vst1q_u16_x3(i16* %a, %struct.uint16x8x3_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1q_u16_x3(i16* %a, %struct.uint16x8x3_t %b) nounwind {
+; CHECK-LABEL: test_vst1q_u16_x3:
+; CHECK:         vst1.16 {d0, d1, d2}, [r0:64]!
+; CHECK-NEXT:    vst1.16 {d3, d4, d5}, [r0:64]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x8x3_t %b, 0, 0
   %b1 = extractvalue %struct.uint16x8x3_t %b, 0, 1
@@ -244,10 +258,11 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1q_u16_x4
-; CHECK: vst1.16 {d16, d17, d18, d19}, [r0:256]!
-; CHECK: vst1.16 {d20, d21, d22, d23}, [r0:256]
-define void @test_vst1q_u16_x4(i16* %a, %struct.uint16x8x4_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1q_u16_x4(i16* %a, %struct.uint16x8x4_t %b) nounwind {
+; CHECK-LABEL: test_vst1q_u16_x4:
+; CHECK:         vst1.16 {d0, d1, d2, d3}, [r0:256]!
+; CHECK-NEXT:    vst1.16 {d4, d5, d6, d7}, [r0:256]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint16x8x4_t %b, 0, 0
   %b1 = extractvalue %struct.uint16x8x4_t %b, 0, 1
@@ -257,9 +272,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1q_u32_x2
-; CHECK: vst1.32 {d16, d17, d18, d19}, [r0:256]
-define void @test_vst1q_u32_x2(i32* %a, %struct.uint32x4x2_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1q_u32_x2(i32* %a, %struct.uint32x4x2_t %b) nounwind {
+; CHECK-LABEL: test_vst1q_u32_x2:
+; CHECK:         vst1.32 {d0, d1, d2, d3}, [r0:256]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x4x2_t %b, 0, 0
   %b1 = extractvalue %struct.uint32x4x2_t %b, 0, 1
@@ -267,10 +283,11 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1q_u32_x3
-; CHECK: vst1.32 {d16, d17, d18}, [r0:64]!
-; CHECK: vst1.32 {d19, d20, d21}, [r0:64]
-define void @test_vst1q_u32_x3(i32* %a, %struct.uint32x4x3_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1q_u32_x3(i32* %a, %struct.uint32x4x3_t %b) nounwind {
+; CHECK-LABEL: test_vst1q_u32_x3:
+; CHECK:         vst1.32 {d0, d1, d2}, [r0:64]!
+; CHECK-NEXT:    vst1.32 {d3, d4, d5}, [r0:64]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x4x3_t %b, 0, 0
   %b1 = extractvalue %struct.uint32x4x3_t %b, 0, 1
@@ -279,10 +296,11 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1q_u32_x4
-; CHECK: vst1.32 {d16, d17, d18, d19}, [r0:256]!
-; CHECK: vst1.32 {d20, d21, d22, d23}, [r0:256]
-define void @test_vst1q_u32_x4(i32* %a, %struct.uint32x4x4_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1q_u32_x4(i32* %a, %struct.uint32x4x4_t %b) nounwind {
+; CHECK-LABEL: test_vst1q_u32_x4:
+; CHECK:         vst1.32 {d0, d1, d2, d3}, [r0:256]!
+; CHECK-NEXT:    vst1.32 {d4, d5, d6, d7}, [r0:256]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint32x4x4_t %b, 0, 0
   %b1 = extractvalue %struct.uint32x4x4_t %b, 0, 1
@@ -292,9 +310,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1q_u64_x2
-; CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
-define void @test_vst1q_u64_x2(i64* %a, %struct.uint64x2x2_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1q_u64_x2(i64* %a, %struct.uint64x2x2_t %b) nounwind {
+; CHECK-LABEL: test_vst1q_u64_x2:
+; CHECK:         vst1.64 {d0, d1, d2, d3}, [r0:256]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x2x2_t %b, 0, 0
   %b1 = extractvalue %struct.uint64x2x2_t %b, 0, 1
@@ -302,10 +321,11 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1q_u64_x3
-; CHECK: vst1.64 {d16, d17, d18}, [r0:64]!
-; CHECK: vst1.64 {d19, d20, d21}, [r0:64]
-define void @test_vst1q_u64_x3(i64* %a, %struct.uint64x2x3_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1q_u64_x3(i64* %a, %struct.uint64x2x3_t %b) nounwind {
+; CHECK-LABEL: test_vst1q_u64_x3:
+; CHECK:         vst1.64 {d0, d1, d2}, [r0:64]!
+; CHECK-NEXT:    vst1.64 {d3, d4, d5}, [r0:64]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x2x3_t %b, 0, 0
   %b1 = extractvalue %struct.uint64x2x3_t %b, 0, 1
@@ -314,10 +334,11 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1q_u64_x4
-; CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]!
-; CHECK: vst1.64 {d20, d21, d22, d23}, [r0:256]
-define void @test_vst1q_u64_x4(i64* %a, %struct.uint64x2x4_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1q_u64_x4(i64* %a, %struct.uint64x2x4_t %b) nounwind {
+; CHECK-LABEL: test_vst1q_u64_x4:
+; CHECK:         vst1.64 {d0, d1, d2, d3}, [r0:256]!
+; CHECK-NEXT:    vst1.64 {d4, d5, d6, d7}, [r0:256]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint64x2x4_t %b, 0, 0
   %b1 = extractvalue %struct.uint64x2x4_t %b, 0, 1
@@ -327,9 +348,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1q_u8_x2
-; CHECK: vst1.8 {d16, d17, d18, d19}, [r0:256]
-define void @test_vst1q_u8_x2(i8* %a, %struct.uint8x16x2_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1q_u8_x2(i8* %a, %struct.uint8x16x2_t %b) nounwind {
+; CHECK-LABEL: test_vst1q_u8_x2:
+; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0:256]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x16x2_t %b, 0, 0
   %b1 = extractvalue %struct.uint8x16x2_t %b, 0, 1
@@ -337,10 +359,11 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1q_u8_x3
-; CHECK: vst1.8 {d16, d17, d18}, [r0:64]!
-; CHECK: vst1.8 {d19, d20, d21}, [r0:64]
-define void @test_vst1q_u8_x3(i8* %a, %struct.uint8x16x3_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1q_u8_x3(i8* %a, %struct.uint8x16x3_t %b) nounwind {
+; CHECK-LABEL: test_vst1q_u8_x3:
+; CHECK:         vst1.8 {d0, d1, d2}, [r0:64]!
+; CHECK-NEXT:    vst1.8 {d3, d4, d5}, [r0:64]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x16x3_t %b, 0, 0
   %b1 = extractvalue %struct.uint8x16x3_t %b, 0, 1
@@ -349,10 +372,11 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: test_vst1q_u8_x4
-; CHECK: vst1.8 {d16, d17, d18, d19}, [r0:256]!
-; CHECK: vst1.8 {d20, d21, d22, d23}, [r0:256]
-define void @test_vst1q_u8_x4(i8* %a, %struct.uint8x16x4_t %b) nounwind {
+define arm_aapcs_vfpcc void @test_vst1q_u8_x4(i8* %a, %struct.uint8x16x4_t %b) nounwind {
+; CHECK-LABEL: test_vst1q_u8_x4:
+; CHECK:         vst1.8 {d0, d1, d2, d3}, [r0:256]!
+; CHECK-NEXT:    vst1.8 {d4, d5, d6, d7}, [r0:256]
+; CHECK-NEXT:    bx lr
 entry:
   %b0 = extractvalue %struct.uint8x16x4_t %b, 0, 0
   %b1 = extractvalue %struct.uint8x16x4_t %b, 0, 1
@@ -361,3 +385,93 @@ entry:
   tail call void @llvm.arm.neon.vst1x4.p0i8.v16i8(i8* %a, <16 x i8> %b0, <16 x i8> %b1, <16 x i8> %b2, <16 x i8> %b3)
   ret void
 }
+
+define void @postinc_1x2(i8* nocapture %0, i8* %1) {
+; CHECK-LABEL: postinc_1x2:
+; CHECK:         vld1.8 {d16, d17, d18, d19}, [r1:256]
+; CHECK-NEXT:    add r1, r1, #32
+; CHECK-NEXT:    vst1.8 {d16, d17, d18, d19}, [r0:256]
+; CHECK-NEXT:    add r0, r0, #32
+; CHECK-NEXT:    vld1.8 {d16, d17, d18, d19}, [r1:256]
+; CHECK-NEXT:    vst1.8 {d16, d17, d18, d19}, [r0:256]
+; CHECK-NEXT:    bx lr
+  %3 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld1x2.v16i8.p0i8(i8* %1)
+  %4 = extractvalue { <16 x i8>, <16 x i8> } %3, 0
+  %5 = extractvalue { <16 x i8>, <16 x i8> } %3, 1
+  tail call void @llvm.arm.neon.vst1x2.p0i8.v16i8(i8* %0, <16 x i8> %4, <16 x i8> %5)
+  %6 = getelementptr inbounds i8, i8* %1, i32 32
+  %7 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld1x2.v16i8.p0i8(i8* nonnull %6)
+  %8 = extractvalue { <16 x i8>, <16 x i8> } %7, 0
+  %9 = extractvalue { <16 x i8>, <16 x i8> } %7, 1
+  %10 = getelementptr inbounds i8, i8* %0, i32 32
+  tail call void @llvm.arm.neon.vst1x2.p0i8.v16i8(i8* nonnull %10, <16 x i8> %8, <16 x i8> %9)
+  ret void
+}
+
+declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld1x2.v16i8.p0i8(i8*)
+
+define void @postinc_1x3(i8* nocapture %0, i8* %1) {
+; CHECK-LABEL: postinc_1x3:
+; CHECK:         add r2, r1, #48
+; CHECK-NEXT:    vld1.8 {d16, d17, d18}, [r1:64]!
+; CHECK-NEXT:    vld1.8 {d19, d20, d21}, [r1:64]
+; CHECK-NEXT:    add r1, r0, #48
+; CHECK-NEXT:    vst1.8 {d16, d17, d18}, [r0:64]!
+; CHECK-NEXT:    vst1.8 {d19, d20, d21}, [r0:64]
+; CHECK-NEXT:    vld1.8 {d16, d17, d18}, [r2:64]!
+; CHECK-NEXT:    vld1.8 {d19, d20, d21}, [r2:64]
+; CHECK-NEXT:    vst1.8 {d16, d17, d18}, [r1:64]!
+; CHECK-NEXT:    vst1.8 {d19, d20, d21}, [r1:64]
+; CHECK-NEXT:    bx lr
+  %3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld1x3.v16i8.p0i8(i8* %1)
+  %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %3, 0
+  %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %3, 1
+  %a5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %3, 2
+  tail call void @llvm.arm.neon.vst1x3.p0i8.v16i8(i8* %0, <16 x i8> %4, <16 x i8> %5, <16 x i8> %a5)
+  %6 = getelementptr inbounds i8, i8* %1, i32 48
+  %7 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld1x3.v16i8.p0i8(i8* nonnull %6)
+  %8 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %7, 0
+  %9 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %7, 1
+  %a9 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %7, 2
+  %10 = getelementptr inbounds i8, i8* %0, i32 48
+  tail call void @llvm.arm.neon.vst1x3.p0i8.v16i8(i8* nonnull %10, <16 x i8> %8, <16 x i8> %9, <16 x i8> %a9)
+  ret void
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld1x3.v16i8.p0i8(i8*)
+
+define void @postinc_1x4(i8* nocapture %0, i8* %1) {
+; CHECK-LABEL: postinc_1x4:
+; CHECK:         add r2, r1, #64
+; CHECK-NEXT:    vld1.8 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT:    vld1.8 {d20, d21, d22, d23}, [r1:256]
+; CHECK-NEXT:    add r1, r0, #64
+; CHECK-NEXT:    vst1.8 {d16, d17, d18, d19}, [r0:256]!
+; CHECK-NEXT:    vst1.8 {d20, d21, d22, d23}, [r0:256]
+; CHECK-NEXT:    vld1.8 {d16, d17, d18, d19}, [r2:256]!
+; CHECK-NEXT:    vld1.8 {d20, d21, d22, d23}, [r2:256]
+; CHECK-NEXT:    vorr q15, q11, q11
+; CHECK-NEXT:    vorr q14, q10, q10
+; CHECK-NEXT:    vorr q13, q9, q9
+; CHECK-NEXT:    vorr q12, q8, q8
+; CHECK-NEXT:    vst1.8 {d24, d25, d26, d27}, [r1:256]!
+; CHECK-NEXT:    vst1.8 {d28, d29, d30, d31}, [r1:256]
+; CHECK-NEXT:    bx lr
+  %3 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld1x4.v16i8.p0i8(i8* %1)
+  %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %3, 0
+  %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %3, 1
+  %6 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %3, 2
+  %7 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %3, 3
+  tail call void @llvm.arm.neon.vst1x4.p0i8.v16i8(i8* %0, <16 x i8> %4, <16 x i8> %5, <16 x i8> %6, <16 x i8> %7)
+  %8 = getelementptr inbounds i8, i8* %1, i32 64
+  %9 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld1x4.v16i8.p0i8(i8* nonnull %8)
+  %10 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %9, 0
+  %11 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %9, 1
+  %12 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %9, 2
+  %13 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %9, 3
+  %14 = getelementptr inbounds i8, i8* %0, i32 64
+  tail call void @llvm.arm.neon.vst1x4.p0i8.v16i8(i8* nonnull %14, <16 x i8> %10, <16 x i8> %11, <16 x i8> %12, <16 x i8> %13)
+  ret void
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld1x4.v16i8.p0i8(i8*)


        


More information about the llvm-commits mailing list