[llvm] r368996 - [ARM] Fix alignment checks for BE VLDRH

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 15 05:54:47 PDT 2019


Author: dmgreen
Date: Thu Aug 15 05:54:47 2019
New Revision: 368996

URL: http://llvm.org/viewvc/llvm-project?rev=368996&view=rev
Log:
[ARM] Fix alignment checks for BE VLDRH

We need to allow any alignment at least 2, not just exactly 2, so that the big
endian loads and stores can be selected successfully. I've also added extra BE
testing for the load and store tests.

Thanks to Oliver for the report.

Differential Revision: https://reviews.llvm.org/D66222

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrMVE.td
    llvm/trunk/test/CodeGen/Thumb2/mve-ldst-offset.ll
    llvm/trunk/test/CodeGen/Thumb2/mve-ldst-postinc.ll
    llvm/trunk/test/CodeGen/Thumb2/mve-ldst-preinc.ll

Modified: llvm/trunk/lib/Target/ARM/ARMInstrMVE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrMVE.td?rev=368996&r1=368995&r2=368996&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrMVE.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrMVE.td Thu Aug 15 05:54:47 2019
@@ -4846,11 +4846,11 @@ def aligned32_post_store : PatFrag<(ops
 }]>;
 def aligned16_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
                                   (pre_store node:$val, node:$ptr, node:$offset), [{
-  return cast<StoreSDNode>(N)->getAlignment() == 2;
+  return cast<StoreSDNode>(N)->getAlignment() >= 2;
 }]>;
 def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
                                   (post_store node:$val, node:$ptr, node:$offset), [{
-  return cast<StoreSDNode>(N)->getAlignment() == 2;
+  return cast<StoreSDNode>(N)->getAlignment() >= 2;
 }]>;
 
 let Predicates = [HasMVEInt, IsLE] in {

Modified: llvm/trunk/test/CodeGen/Thumb2/mve-ldst-offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-ldst-offset.ll?rev=368996&r1=368995&r2=368996&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-ldst-offset.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-ldst-offset.ll Thu Aug 15 05:54:47 2019
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
 
 define i8* @ldrwu32_4(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrwu32_4:
@@ -720,11 +721,18 @@ entry:
 }
 
 define i8* @ldrwi32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwi32_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0, #3]
-; CHECK-NEXT:    vstrw.32 q0, [r1]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: ldrwi32_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]
+; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrwi32_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT:    vrev32.8 q0, q0
+; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %z to <4 x i32>*
@@ -735,11 +743,18 @@ entry:
 }
 
 define i8* @ldrhi16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrhi16_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0, #3]
-; CHECK-NEXT:    vstrh.16 q0, [r1]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: ldrhi16_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]
+; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrhi16_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT:    vrev16.8 q0, q0
+; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %z to <8 x i16>*
@@ -772,12 +787,19 @@ entry:
   ret i8* %x
 }
 
-define i8* @ldrwf32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwf32_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0, #3]
-; CHECK-NEXT:    vstrw.32 q0, [r1]
-; CHECK-NEXT:    bx lr
+define i8* @ldrf32_align1(i8* %x, i8* %y) {
+; CHECK-LE-LABEL: ldrf32_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]
+; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrf32_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT:    vrev32.8 q0, q0
+; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %z to <4 x float>*
@@ -787,12 +809,19 @@ entry:
   ret i8* %x
 }
 
-define i8* @ldrwf16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwf16_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0, #3]
-; CHECK-NEXT:    vstrh.16 q0, [r1]
-; CHECK-NEXT:    bx lr
+define i8* @ldrf16_align1(i8* %x, i8* %y) {
+; CHECK-LE-LABEL: ldrf16_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]
+; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrf16_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT:    vrev16.8 q0, q0
+; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %z to <8 x half>*
@@ -802,6 +831,27 @@ entry:
   ret i8* %x
 }
 
+define i8* @ldrh16_align8(i8* %x, i8* %y) {
+; CHECK-LE-LABEL: ldrh16_align8:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r0, #4]
+; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrh16_align8:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r0, #4]
+; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
+entry:
+  %z = getelementptr inbounds i8, i8* %x, i32 4
+  %0 = bitcast i8* %z to <8 x i16>*
+  %1 = load <8 x i16>, <8 x i16>* %0, align 8
+  %2 = bitcast i8* %y to <8 x i16>*
+  store <8 x i16> %1, <8 x i16>* %2, align 2
+  ret i8* %x
+}
+
 
 
 
@@ -1294,11 +1344,18 @@ entry:
 }
 
 define i8* @strwi32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strwi32_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #3]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strwi32_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strwi32_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT:    vrev32.8 q0, q0
+; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <4 x i32>*
@@ -1309,11 +1366,18 @@ entry:
 }
 
 define i8* @strhi16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strhi16_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrh.u16 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #3]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strhi16_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strhi16_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vrev16.8 q0, q0
+; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <8 x i16>*
@@ -1347,11 +1411,18 @@ entry:
 }
 
 define i8* @strf32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf32_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #3]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strf32_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strf32_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT:    vrev32.8 q0, q0
+; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <4 x float>*
@@ -1362,11 +1433,18 @@ entry:
 }
 
 define i8* @strf16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf16_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrh.u16 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #3]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strf16_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strf16_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vrev16.8 q0, q0
+; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <8 x half>*
@@ -1375,3 +1453,24 @@ entry:
   store <8 x half> %1, <8 x half>* %2, align 1
   ret i8* %y
 }
+
+define i8* @strf16_align8(i8* %y, i8* %x) {
+; CHECK-LE-LABEL: strf16_align8:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrw.32 q0, [r0, #16]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strf16_align8:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vstrh.16 q0, [r0, #16]
+; CHECK-BE-NEXT:    bx lr
+entry:
+  %z = getelementptr inbounds i8, i8* %y, i32 16
+  %0 = bitcast i8* %x to <8 x i16>*
+  %1 = load <8 x i16>, <8 x i16>* %0, align 2
+  %2 = bitcast i8* %z to <8 x i16>*
+  store <8 x i16> %1, <8 x i16>* %2, align 8
+  ret i8* %y
+}

Modified: llvm/trunk/test/CodeGen/Thumb2/mve-ldst-postinc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-ldst-postinc.ll?rev=368996&r1=368995&r2=368996&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-ldst-postinc.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-ldst-postinc.ll Thu Aug 15 05:54:47 2019
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
 
 define i8* @ldrwu32_4(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrwu32_4:
@@ -17,11 +18,18 @@ entry:
 }
 
 define i8* @ldrwu32_3(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwu32_3:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0], #3
-; CHECK-NEXT:    vstrw.32 q0, [r1]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: ldrwu32_3:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrwu32_3:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r0]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %x to <4 x i32>*
@@ -291,11 +299,18 @@ entry:
 }
 
 define i8* @ldrhu16_3(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrhu16_3:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0], #3
-; CHECK-NEXT:    vstrh.16 q0, [r1]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: ldrhu16_3:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrhu16_3:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r0]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %x to <8 x i16>*
@@ -708,11 +723,19 @@ entry:
 }
 
 define i8* @ldrwi32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwi32_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0], #3
-; CHECK-NEXT:    vstrw.32 q0, [r1]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: ldrwi32_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrwi32_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrb.u8 q0, [r0]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    vrev32.8 q0, q0
+; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %x to <4 x i32>*
@@ -723,11 +746,19 @@ entry:
 }
 
 define i8* @ldrhi16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrhi16_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0], #3
-; CHECK-NEXT:    vstrh.16 q0, [r1]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: ldrhi16_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrhi16_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrb.u8 q0, [r0]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    vrev16.8 q0, q0
+; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %x to <8 x i16>*
@@ -762,11 +793,19 @@ entry:
 }
 
 define i8* @ldrf32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrf32_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0], #3
-; CHECK-NEXT:    vstrw.32 q0, [r1]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: ldrf32_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrf32_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrb.u8 q0, [r0]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    vrev32.8 q0, q0
+; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %x to <4 x float>*
@@ -777,11 +816,19 @@ entry:
 }
 
 define i8* @ldrf16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrf16_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0], #3
-; CHECK-NEXT:    vstrh.16 q0, [r1]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: ldrf16_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrf16_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrb.u8 q0, [r0]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    vrev16.8 q0, q0
+; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %x to <8 x half>*
@@ -791,16 +838,43 @@ entry:
   ret i8* %z
 }
 
+define i8* @ldrh16_align8(i8* %x, i8* %y) {
+; CHECK-LE-LABEL: ldrh16_align8:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r0], #4
+; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrh16_align8:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r0], #4
+; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
+entry:
+  %z = getelementptr inbounds i8, i8* %x, i32 4
+  %0 = bitcast i8* %x to <8 x i16>*
+  %1 = load <8 x i16>, <8 x i16>* %0, align 8
+  %2 = bitcast i8* %y to <8 x i16>*
+  store <8 x i16> %1, <8 x i16>* %2, align 2
+  ret i8* %z
+}
+
 
 
 
 
 define i8* @strw32_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_4:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0], #4
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strw32_4:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #4
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strw32_4:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT:    vstrw.32 q0, [r0], #4
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 4
   %0 = bitcast i8* %x to <4 x i32>*
@@ -811,11 +885,18 @@ entry:
 }
 
 define i8* @strw32_3(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_3:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0], #3
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strw32_3:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strw32_3:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT:    vstrw.32 q0, [r0]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <4 x i32>*
@@ -826,11 +907,17 @@ entry:
 }
 
 define i8* @strw32_m4(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_m4:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0], #-4
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strw32_m4:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #-4
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strw32_m4:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT:    vstrw.32 q0, [r0], #-4
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 -4
   %0 = bitcast i8* %x to <4 x i32>*
@@ -982,11 +1069,17 @@ entry:
 
 
 define i8* @strh16_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_4:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrh.u16 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0], #4
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strh16_4:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #4
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strh16_4:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vstrh.16 q0, [r0], #4
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 4
   %0 = bitcast i8* %x to <8 x i16>*
@@ -997,11 +1090,18 @@ entry:
 }
 
 define i8* @strh16_3(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_3:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrh.u16 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0], #3
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strh16_3:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strh16_3:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vstrh.16 q0, [r0]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <8 x i16>*
@@ -1012,11 +1112,17 @@ entry:
 }
 
 define i8* @strh16_2(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_2:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrh.u16 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0], #2
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strh16_2:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #2
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strh16_2:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vstrh.16 q0, [r0], #2
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 2
   %0 = bitcast i8* %x to <8 x i16>*
@@ -1244,11 +1350,17 @@ entry:
 }
 
 define i8* @strf32_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strf32_4:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0], #4
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strf32_4:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #4
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strf32_4:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT:    vstrw.32 q0, [r0], #4
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 4
   %0 = bitcast i8* %x to <4 x float>*
@@ -1259,11 +1371,17 @@ entry:
 }
 
 define i8* @strf16_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strf16_4:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrh.u16 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0], #4
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strf16_4:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #4
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strf16_4:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vstrh.16 q0, [r0], #4
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 4
   %0 = bitcast i8* %x to <8 x half>*
@@ -1274,11 +1392,19 @@ entry:
 }
 
 define i8* @strwi32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strwi32_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0], #3
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strwi32_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strwi32_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT:    vrev32.8 q0, q0
+; CHECK-BE-NEXT:    vstrb.8 q0, [r0]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <4 x i32>*
@@ -1289,11 +1415,19 @@ entry:
 }
 
 define i8* @strhi16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strhi16_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrh.u16 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0], #3
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strhi16_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strhi16_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vrev16.8 q0, q0
+; CHECK-BE-NEXT:    vstrb.8 q0, [r0]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <8 x i16>*
@@ -1328,11 +1462,19 @@ entry:
 }
 
 define i8* @strf32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf32_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0], #3
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strf32_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strf32_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT:    vrev32.8 q0, q0
+; CHECK-BE-NEXT:    vstrb.8 q0, [r0]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <4 x float>*
@@ -1343,11 +1485,19 @@ entry:
 }
 
 define i8* @strf16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf16_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrh.u16 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0], #3
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strf16_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strf16_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vrev16.8 q0, q0
+; CHECK-BE-NEXT:    vstrb.8 q0, [r0]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <8 x half>*
@@ -1356,3 +1506,24 @@ entry:
   store <8 x half> %1, <8 x half>* %2, align 1
   ret i8* %z
 }
+
+define i8* @strf16_align8(i8* %y, i8* %x) {
+; CHECK-LE-LABEL: strf16_align8:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #16
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strf16_align8:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vstrh.16 q0, [r0], #16
+; CHECK-BE-NEXT:    bx lr
+entry:
+  %z = getelementptr inbounds i8, i8* %y, i32 16
+  %0 = bitcast i8* %x to <8 x i16>*
+  %1 = load <8 x i16>, <8 x i16>* %0, align 2
+  %2 = bitcast i8* %y to <8 x i16>*
+  store <8 x i16> %1, <8 x i16>* %2, align 8
+  ret i8* %z
+}

Modified: llvm/trunk/test/CodeGen/Thumb2/mve-ldst-preinc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-ldst-preinc.ll?rev=368996&r1=368995&r2=368996&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-ldst-preinc.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-ldst-preinc.ll Thu Aug 15 05:54:47 2019
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
 
 define i8* @ldrwu32_4(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrwu32_4:
@@ -17,11 +18,18 @@ entry:
 }
 
 define i8* @ldrwu32_3(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwu32_3:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT:    vstrw.32 q0, [r1]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: ldrwu32_3:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrwu32_3:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r0]
+; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %z to <4 x i32>*
@@ -291,11 +299,18 @@ entry:
 }
 
 define i8* @ldrhu16_3(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrhu16_3:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT:    vstrh.16 q0, [r1]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: ldrhu16_3:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrhu16_3:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r0]
+; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %z to <8 x i16>*
@@ -708,11 +723,19 @@ entry:
 }
 
 define i8* @ldrwi32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwi32_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT:    vstrw.32 q0, [r1]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: ldrwi32_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrwi32_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    vrev32.8 q0, q0
+; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %z to <4 x i32>*
@@ -723,11 +746,19 @@ entry:
 }
 
 define i8* @ldrhi16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrhi16_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT:    vstrh.16 q0, [r1]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: ldrhi16_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrhi16_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    vrev16.8 q0, q0
+; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %z to <8 x i16>*
@@ -762,11 +793,19 @@ entry:
 }
 
 define i8* @ldrf32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrf32_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT:    vstrw.32 q0, [r1]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: ldrf32_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrf32_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    vrev32.8 q0, q0
+; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %z to <4 x float>*
@@ -777,11 +816,19 @@ entry:
 }
 
 define i8* @ldrf16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrf16_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT:    vstrh.16 q0, [r1]
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: ldrf16_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrf16_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    vrev16.8 q0, q0
+; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
   %0 = bitcast i8* %z to <8 x half>*
@@ -791,16 +838,43 @@ entry:
   ret i8* %z
 }
 
+define i8* @ldrh16_align8(i8* %x, i8* %y) {
+; CHECK-LE-LABEL: ldrh16_align8:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r0, #4]!
+; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: ldrh16_align8:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r0, #4]!
+; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
+; CHECK-BE-NEXT:    bx lr
+entry:
+  %z = getelementptr inbounds i8, i8* %x, i32 4
+  %0 = bitcast i8* %z to <8 x i16>*
+  %1 = load <8 x i16>, <8 x i16>* %0, align 8
+  %2 = bitcast i8* %y to <8 x i16>*
+  store <8 x i16> %1, <8 x i16>* %2, align 2
+  ret i8* %z
+}
+
 
 
 
 
 define i8* @strw32_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_4:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #4]!
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strw32_4:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #4]!
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strw32_4:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT:    vstrw.32 q0, [r0, #4]!
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 4
   %0 = bitcast i8* %x to <4 x i32>*
@@ -811,11 +885,18 @@ entry:
 }
 
 define i8* @strw32_3(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_3:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strw32_3:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strw32_3:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT:    vstrw.32 q0, [r0]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <4 x i32>*
@@ -826,11 +907,17 @@ entry:
 }
 
 define i8* @strw32_m4(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_m4:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #-4]!
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strw32_m4:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #-4]!
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strw32_m4:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT:    vstrw.32 q0, [r0, #-4]!
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 -4
   %0 = bitcast i8* %x to <4 x i32>*
@@ -982,11 +1069,17 @@ entry:
 
 
 define i8* @strh16_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_4:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrh.u16 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #4]!
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strh16_4:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #4]!
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strh16_4:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vstrh.16 q0, [r0, #4]!
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 4
   %0 = bitcast i8* %x to <8 x i16>*
@@ -997,11 +1090,18 @@ entry:
 }
 
 define i8* @strh16_3(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_3:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrh.u16 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strh16_3:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strh16_3:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vstrh.16 q0, [r0]
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <8 x i16>*
@@ -1012,11 +1112,17 @@ entry:
 }
 
 define i8* @strh16_2(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_2:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrh.u16 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #2]!
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strh16_2:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #2]!
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strh16_2:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vstrh.16 q0, [r0, #2]!
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 2
   %0 = bitcast i8* %x to <8 x i16>*
@@ -1244,11 +1350,17 @@ entry:
 }
 
 define i8* @strf32_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strf32_4:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #4]!
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strf32_4:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #4]!
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strf32_4:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT:    vstrw.32 q0, [r0, #4]!
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 4
   %0 = bitcast i8* %x to <4 x float>*
@@ -1259,11 +1371,17 @@ entry:
 }
 
 define i8* @strf16_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strf16_4:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrh.u16 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #4]!
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strf16_4:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #4]!
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strf16_4:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vstrh.16 q0, [r0, #4]!
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 4
   %0 = bitcast i8* %x to <8 x half>*
@@ -1274,11 +1392,19 @@ entry:
 }
 
 define i8* @strwi32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strwi32_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strwi32_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strwi32_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT:    vrev32.8 q0, q0
+; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <4 x i32>*
@@ -1289,11 +1415,19 @@ entry:
 }
 
 define i8* @strhi16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strhi16_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrh.u16 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strhi16_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strhi16_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vrev16.8 q0, q0
+; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <8 x i16>*
@@ -1327,11 +1461,19 @@ entry:
 }
 
 define i8* @strf32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf32_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strf32_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strf32_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT:    vrev32.8 q0, q0
+; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <4 x float>*
@@ -1342,11 +1484,19 @@ entry:
 }
 
 define i8* @strf16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf16_align1:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrh.u16 q0, [r1]
-; CHECK-NEXT:    vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT:    bx lr
+; CHECK-LE-LABEL: strf16_align1:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strf16_align1:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vrev16.8 q0, q0
+; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT:    adds r0, #3
+; CHECK-BE-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
   %0 = bitcast i8* %x to <8 x half>*
@@ -1355,3 +1505,24 @@ entry:
   store <8 x half> %1, <8 x half>* %2, align 1
   ret i8* %z
 }
+
+define i8* @strf16_align8(i8* %y, i8* %x) {
+; CHECK-LE-LABEL: strf16_align8:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #16]!
+; CHECK-LE-NEXT:    bx lr
+;
+; CHECK-BE-LABEL: strf16_align8:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT:    vstrh.16 q0, [r0, #16]!
+; CHECK-BE-NEXT:    bx lr
+entry:
+  %z = getelementptr inbounds i8, i8* %y, i32 16
+  %0 = bitcast i8* %x to <8 x i16>*
+  %1 = load <8 x i16>, <8 x i16>* %0, align 2
+  %2 = bitcast i8* %z to <8 x i16>*
+  store <8 x i16> %1, <8 x i16>* %2, align 8
+  ret i8* %z
+}




More information about the llvm-commits mailing list