[llvm] r368996 - [ARM] Fix alignment checks for BE VLDRH
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 15 05:54:47 PDT 2019
Author: dmgreen
Date: Thu Aug 15 05:54:47 2019
New Revision: 368996
URL: http://llvm.org/viewvc/llvm-project?rev=368996&view=rev
Log:
[ARM] Fix alignment checks for BE VLDRH
We need to allow any alignment at least 2, not just exactly 2, so that the big
endian loads and stores can be selected successfully. I've also added extra BE
testing for the load and store tests.
Thanks to Oliver for the report.
Differential Revision: https://reviews.llvm.org/D66222
Modified:
llvm/trunk/lib/Target/ARM/ARMInstrMVE.td
llvm/trunk/test/CodeGen/Thumb2/mve-ldst-offset.ll
llvm/trunk/test/CodeGen/Thumb2/mve-ldst-postinc.ll
llvm/trunk/test/CodeGen/Thumb2/mve-ldst-preinc.ll
Modified: llvm/trunk/lib/Target/ARM/ARMInstrMVE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrMVE.td?rev=368996&r1=368995&r2=368996&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrMVE.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrMVE.td Thu Aug 15 05:54:47 2019
@@ -4846,11 +4846,11 @@ def aligned32_post_store : PatFrag<(ops
}]>;
def aligned16_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
(pre_store node:$val, node:$ptr, node:$offset), [{
- return cast<StoreSDNode>(N)->getAlignment() == 2;
+ return cast<StoreSDNode>(N)->getAlignment() >= 2;
}]>;
def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
(post_store node:$val, node:$ptr, node:$offset), [{
- return cast<StoreSDNode>(N)->getAlignment() == 2;
+ return cast<StoreSDNode>(N)->getAlignment() >= 2;
}]>;
let Predicates = [HasMVEInt, IsLE] in {
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-ldst-offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-ldst-offset.ll?rev=368996&r1=368995&r2=368996&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-ldst-offset.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-ldst-offset.ll Thu Aug 15 05:54:47 2019
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
define i8* @ldrwu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_4:
@@ -720,11 +721,18 @@ entry:
}
define i8* @ldrwi32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwi32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrwi32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrwi32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x i32>*
@@ -735,11 +743,18 @@ entry:
}
define i8* @ldrhi16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrhi16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrhi16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrhi16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <8 x i16>*
@@ -772,12 +787,19 @@ entry:
ret i8* %x
}
-define i8* @ldrwf32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwf32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+define i8* @ldrf32_align1(i8* %x, i8* %y) {
+; CHECK-LE-LABEL: ldrf32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrf32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x float>*
@@ -787,12 +809,19 @@ entry:
ret i8* %x
}
-define i8* @ldrwf16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwf16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+define i8* @ldrf16_align1(i8* %x, i8* %y) {
+; CHECK-LE-LABEL: ldrf16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrf16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <8 x half>*
@@ -802,6 +831,27 @@ entry:
ret i8* %x
}
+define i8* @ldrh16_align8(i8* %x, i8* %y) {
+; CHECK-LE-LABEL: ldrh16_align8:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #4]
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrh16_align8:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r0, #4]
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
+entry:
+ %z = getelementptr inbounds i8, i8* %x, i32 4
+ %0 = bitcast i8* %z to <8 x i16>*
+ %1 = load <8 x i16>, <8 x i16>* %0, align 8
+ %2 = bitcast i8* %y to <8 x i16>*
+ store <8 x i16> %1, <8 x i16>* %2, align 2
+ ret i8* %x
+}
+
@@ -1294,11 +1344,18 @@ entry:
}
define i8* @strwi32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strwi32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strwi32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strwi32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x i32>*
@@ -1309,11 +1366,18 @@ entry:
}
define i8* @strhi16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strhi16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strhi16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strhi16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x i16>*
@@ -1347,11 +1411,18 @@ entry:
}
define i8* @strf32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x float>*
@@ -1362,11 +1433,18 @@ entry:
}
define i8* @strf16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x half>*
@@ -1375,3 +1453,24 @@ entry:
store <8 x half> %1, <8 x half>* %2, align 1
ret i8* %y
}
+
+define i8* @strf16_align8(i8* %y, i8* %x) {
+; CHECK-LE-LABEL: strf16_align8:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrw.32 q0, [r0, #16]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_align8:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0, #16]
+; CHECK-BE-NEXT: bx lr
+entry:
+ %z = getelementptr inbounds i8, i8* %y, i32 16
+ %0 = bitcast i8* %x to <8 x i16>*
+ %1 = load <8 x i16>, <8 x i16>* %0, align 2
+ %2 = bitcast i8* %z to <8 x i16>*
+ store <8 x i16> %1, <8 x i16>* %2, align 8
+ ret i8* %y
+}
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-ldst-postinc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-ldst-postinc.ll?rev=368996&r1=368995&r2=368996&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-ldst-postinc.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-ldst-postinc.ll Thu Aug 15 05:54:47 2019
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
define i8* @ldrwu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_4:
@@ -17,11 +18,18 @@ entry:
}
define i8* @ldrwu32_3(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwu32_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0], #3
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrwu32_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrwu32_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %x to <4 x i32>*
@@ -291,11 +299,18 @@ entry:
}
define i8* @ldrhu16_3(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrhu16_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0], #3
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrhu16_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrhu16_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %x to <8 x i16>*
@@ -708,11 +723,19 @@ entry:
}
define i8* @ldrwi32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwi32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0], #3
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrwi32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrwi32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %x to <4 x i32>*
@@ -723,11 +746,19 @@ entry:
}
define i8* @ldrhi16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrhi16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0], #3
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrhi16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrhi16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %x to <8 x i16>*
@@ -762,11 +793,19 @@ entry:
}
define i8* @ldrf32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrf32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0], #3
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrf32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrf32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %x to <4 x float>*
@@ -777,11 +816,19 @@ entry:
}
define i8* @ldrf16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrf16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0], #3
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrf16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrf16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %x to <8 x half>*
@@ -791,16 +838,43 @@ entry:
ret i8* %z
}
+define i8* @ldrh16_align8(i8* %x, i8* %y) {
+; CHECK-LE-LABEL: ldrh16_align8:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r0], #4
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrh16_align8:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r0], #4
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
+entry:
+ %z = getelementptr inbounds i8, i8* %x, i32 4
+ %0 = bitcast i8* %x to <8 x i16>*
+ %1 = load <8 x i16>, <8 x i16>* %0, align 8
+ %2 = bitcast i8* %y to <8 x i16>*
+ store <8 x i16> %1, <8 x i16>* %2, align 2
+ ret i8* %z
+}
+
define i8* @strw32_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #4
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strw32_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strw32_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x i32>*
@@ -811,11 +885,18 @@ entry:
}
define i8* @strw32_3(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #3
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strw32_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strw32_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x i32>*
@@ -826,11 +907,17 @@ entry:
}
define i8* @strw32_m4(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_m4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #-4
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strw32_m4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #-4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strw32_m4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0], #-4
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 -4
%0 = bitcast i8* %x to <4 x i32>*
@@ -982,11 +1069,17 @@ entry:
define i8* @strh16_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #4
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strh16_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strh16_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <8 x i16>*
@@ -997,11 +1090,18 @@ entry:
}
define i8* @strh16_3(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #3
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strh16_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strh16_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x i16>*
@@ -1012,11 +1112,17 @@ entry:
}
define i8* @strh16_2(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_2:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #2
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strh16_2:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #2
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strh16_2:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0], #2
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 2
%0 = bitcast i8* %x to <8 x i16>*
@@ -1244,11 +1350,17 @@ entry:
}
define i8* @strf32_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strf32_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #4
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf32_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf32_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x float>*
@@ -1259,11 +1371,17 @@ entry:
}
define i8* @strf16_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strf16_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #4
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf16_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <8 x half>*
@@ -1274,11 +1392,19 @@ entry:
}
define i8* @strwi32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strwi32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #3
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strwi32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strwi32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x i32>*
@@ -1289,11 +1415,19 @@ entry:
}
define i8* @strhi16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strhi16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #3
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strhi16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strhi16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x i16>*
@@ -1328,11 +1462,19 @@ entry:
}
define i8* @strf32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #3
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x float>*
@@ -1343,11 +1485,19 @@ entry:
}
define i8* @strf16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #3
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x half>*
@@ -1356,3 +1506,24 @@ entry:
store <8 x half> %1, <8 x half>* %2, align 1
ret i8* %z
}
+
+define i8* @strf16_align8(i8* %y, i8* %x) {
+; CHECK-LE-LABEL: strf16_align8:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #16
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_align8:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0], #16
+; CHECK-BE-NEXT: bx lr
+entry:
+ %z = getelementptr inbounds i8, i8* %y, i32 16
+ %0 = bitcast i8* %x to <8 x i16>*
+ %1 = load <8 x i16>, <8 x i16>* %0, align 2
+ %2 = bitcast i8* %y to <8 x i16>*
+ store <8 x i16> %1, <8 x i16>* %2, align 8
+ ret i8* %z
+}
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-ldst-preinc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-ldst-preinc.ll?rev=368996&r1=368995&r2=368996&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-ldst-preinc.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-ldst-preinc.ll Thu Aug 15 05:54:47 2019
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
define i8* @ldrwu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_4:
@@ -17,11 +18,18 @@ entry:
}
define i8* @ldrwu32_3(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwu32_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrwu32_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrwu32_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vldrw.u32 q0, [r0]
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x i32>*
@@ -291,11 +299,18 @@ entry:
}
define i8* @ldrhu16_3(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrhu16_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrhu16_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrhu16_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vldrh.u16 q0, [r0]
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <8 x i16>*
@@ -708,11 +723,19 @@ entry:
}
define i8* @ldrwi32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwi32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrwi32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrwi32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x i32>*
@@ -723,11 +746,19 @@ entry:
}
define i8* @ldrhi16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrhi16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrhi16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrhi16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <8 x i16>*
@@ -762,11 +793,19 @@ entry:
}
define i8* @ldrf32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrf32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrf32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrf32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x float>*
@@ -777,11 +816,19 @@ entry:
}
define i8* @ldrf16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrf16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrf16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrf16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <8 x half>*
@@ -791,16 +838,43 @@ entry:
ret i8* %z
}
+define i8* @ldrh16_align8(i8* %x, i8* %y) {
+; CHECK-LE-LABEL: ldrh16_align8:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #4]!
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrh16_align8:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r0, #4]!
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
+entry:
+ %z = getelementptr inbounds i8, i8* %x, i32 4
+ %0 = bitcast i8* %z to <8 x i16>*
+ %1 = load <8 x i16>, <8 x i16>* %0, align 8
+ %2 = bitcast i8* %y to <8 x i16>*
+ store <8 x i16> %1, <8 x i16>* %2, align 2
+ ret i8* %z
+}
+
define i8* @strw32_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #4]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strw32_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strw32_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0, #4]!
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x i32>*
@@ -811,11 +885,18 @@ entry:
}
define i8* @strw32_3(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strw32_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strw32_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x i32>*
@@ -826,11 +907,17 @@ entry:
}
define i8* @strw32_m4(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_m4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #-4]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strw32_m4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #-4]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strw32_m4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0, #-4]!
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 -4
%0 = bitcast i8* %x to <4 x i32>*
@@ -982,11 +1069,17 @@ entry:
define i8* @strh16_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #4]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strh16_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strh16_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0, #4]!
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <8 x i16>*
@@ -997,11 +1090,18 @@ entry:
}
define i8* @strh16_3(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strh16_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strh16_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x i16>*
@@ -1012,11 +1112,17 @@ entry:
}
define i8* @strh16_2(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_2:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #2]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strh16_2:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #2]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strh16_2:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0, #2]!
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 2
%0 = bitcast i8* %x to <8 x i16>*
@@ -1244,11 +1350,17 @@ entry:
}
define i8* @strf32_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strf32_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #4]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf32_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf32_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0, #4]!
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x float>*
@@ -1259,11 +1371,17 @@ entry:
}
define i8* @strf16_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strf16_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #4]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf16_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0, #4]!
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <8 x half>*
@@ -1274,11 +1392,19 @@ entry:
}
define i8* @strwi32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strwi32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strwi32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strwi32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x i32>*
@@ -1289,11 +1415,19 @@ entry:
}
define i8* @strhi16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strhi16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strhi16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strhi16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x i16>*
@@ -1327,11 +1461,19 @@ entry:
}
define i8* @strf32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x float>*
@@ -1342,11 +1484,19 @@ entry:
}
define i8* @strf16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x half>*
@@ -1355,3 +1505,24 @@ entry:
store <8 x half> %1, <8 x half>* %2, align 1
ret i8* %z
}
+
+define i8* @strf16_align8(i8* %y, i8* %x) {
+; CHECK-LE-LABEL: strf16_align8:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #16]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_align8:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0, #16]!
+; CHECK-BE-NEXT: bx lr
+entry:
+ %z = getelementptr inbounds i8, i8* %y, i32 16
+ %0 = bitcast i8* %x to <8 x i16>*
+ %1 = load <8 x i16>, <8 x i16>* %0, align 2
+ %2 = bitcast i8* %z to <8 x i16>*
+ store <8 x i16> %1, <8 x i16>* %2, align 8
+ ret i8* %z
+}
More information about the llvm-commits
mailing list