[llvm] r368256 - [ARM] Tighten up VLDRH.32 with low alignments

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 7 23:22:03 PDT 2019


Author: dmgreen
Date: Wed Aug  7 23:22:03 2019
New Revision: 368256

URL: http://llvm.org/viewvc/llvm-project?rev=368256&view=rev
Log:
[ARM] Tighten up VLDRH.32 with low alignments

VLDRH needs to have an alignment of at least 2, including the
widening/narrowing versions. This tightens up the ISel patterns for it and
alters allowsMisalignedMemoryAccesses so that unaligned accesses are expanded
through the stack. It also fixed some incorrect shift amounts, which seemed to
be passing a multiple not a shift.

Differential Revision: https://reviews.llvm.org/D65580

Modified:
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMInstrMVE.td
    llvm/trunk/test/CodeGen/Thumb2/mve-ldst-offset.ll
    llvm/trunk/test/CodeGen/Thumb2/mve-ldst-postinc.ll
    llvm/trunk/test/CodeGen/Thumb2/mve-ldst-preinc.ll
    llvm/trunk/test/CodeGen/Thumb2/mve-widen-narrow.ll

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=368256&r1=368255&r2=368256&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Wed Aug  7 23:22:03 2019
@@ -14066,11 +14066,18 @@ bool ARMTargetLowering::allowsMisaligned
     return true;
   }
 
+  // These are for truncated stores/narrowing loads. They are fine so long as
+  // the alignment is at least the size of the item being loaded
+  if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
+      Alignment >= VT.getScalarSizeInBits() / 8) {
+    if (Fast)
+      *Fast = true;
+    return true;
+  }
+
   if (Ty != MVT::v16i8 && Ty != MVT::v8i16 && Ty != MVT::v8f16 &&
       Ty != MVT::v4i32 && Ty != MVT::v4f32 && Ty != MVT::v2i64 &&
-      Ty != MVT::v2f64 &&
-      // These are for truncated stores
-      Ty != MVT::v4i8 && Ty != MVT::v8i8 && Ty != MVT::v4i16)
+      Ty != MVT::v2f64)
     return false;
 
   if (Subtarget->isLittle()) {

Modified: llvm/trunk/lib/Target/ARM/ARMInstrMVE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrMVE.td?rev=368256&r1=368255&r2=368256&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrMVE.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrMVE.td Wed Aug  7 23:22:03 2019
@@ -4839,36 +4839,48 @@ let Predicates = [HasMVEInt, IsBE] in {
 
 // Widening/Narrowing Loads/Stores
 
+let MinAlignment = 2 in {
+  def truncstorevi16_align2 : PatFrag<(ops node:$val, node:$ptr),
+                                      (truncstorevi16 node:$val, node:$ptr)>;
+}
+
 let Predicates = [HasMVEInt] in {
-  def : Pat<(truncstorevi8  (v8i16 MQPR:$val), t2addrmode_imm7<1>:$addr),
-             (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
-  def : Pat<(truncstorevi8  (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr),
-             (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
-  def : Pat<(truncstorevi16 (v4i32 MQPR:$val), t2addrmode_imm7<2>:$addr),
-             (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<2>:$addr)>;
+  def : Pat<(truncstorevi8  (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr),
+             (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr)>;
+  def : Pat<(truncstorevi8  (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr),
+             (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr)>;
+  def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr),
+             (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
+}
+
+
+let MinAlignment = 2 in {
+  def extloadvi16_align2  : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>;
+  def sextloadvi16_align2 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>;
+  def zextloadvi16_align2 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>;
 }
 
 multiclass MVEExtLoad<string DestLanes, string DestElemBits,
                       string SrcElemBits, string SrcElemType,
-                      Operand am> {
+                      string Align, Operand am> {
   def _Any : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
-                   (!cast<PatFrag>("extloadvi"  # SrcElemBits) am:$addr)),
+                   (!cast<PatFrag>("extloadvi"  # SrcElemBits # Align) am:$addr)),
                  (!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
                    am:$addr)>;
   def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
-                   (!cast<PatFrag>("zextloadvi"  # SrcElemBits) am:$addr)),
+                   (!cast<PatFrag>("zextloadvi"  # SrcElemBits # Align) am:$addr)),
                  (!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
                    am:$addr)>;
   def _S   : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
-                   (!cast<PatFrag>("sextloadvi"  # SrcElemBits) am:$addr)),
+                   (!cast<PatFrag>("sextloadvi"  # SrcElemBits # Align) am:$addr)),
                  (!cast<Instruction>("MVE_VLDR" # SrcElemType # "S" # DestElemBits)
                    am:$addr)>;
 }
 
 let Predicates = [HasMVEInt] in {
-  defm : MVEExtLoad<"4", "32", "8",  "B", t2addrmode_imm7<1>>;
-  defm : MVEExtLoad<"8", "16", "8",  "B", t2addrmode_imm7<1>>;
-  defm : MVEExtLoad<"4", "32", "16", "H", t2addrmode_imm7<2>>;
+  defm : MVEExtLoad<"4", "32", "8",  "B", "", t2addrmode_imm7<0>>;
+  defm : MVEExtLoad<"8", "16", "8",  "B", "", t2addrmode_imm7<0>>;
+  defm : MVEExtLoad<"4", "32", "16", "H", "_align2", t2addrmode_imm7<1>>;
 }
 
 

Modified: llvm/trunk/test/CodeGen/Thumb2/mve-ldst-offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-ldst-offset.ll?rev=368256&r1=368255&r2=368256&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-ldst-offset.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-ldst-offset.ll Wed Aug  7 23:22:03 2019
@@ -148,8 +148,7 @@ entry:
 define i8* @ldrhu32_2(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrhu32_2:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adds r2, r0, #2
-; CHECK-NEXT:    vldrh.u32 q0, [r2]
+; CHECK-NEXT:    vldrh.u32 q0, [r0, #2]
 ; CHECK-NEXT:    vstrw.32 q0, [r1]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -233,8 +232,7 @@ entry:
 define i8* @ldrhs32_2(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrhs32_2:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adds r2, r0, #2
-; CHECK-NEXT:    vldrh.s32 q0, [r2]
+; CHECK-NEXT:    vldrh.s32 q0, [r0, #2]
 ; CHECK-NEXT:    vstrw.32 q0, [r1]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -380,8 +378,7 @@ entry:
 define i8* @ldrbu32_3(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrbu32_3:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adds r2, r0, #3
-; CHECK-NEXT:    vldrb.u32 q0, [r2]
+; CHECK-NEXT:    vldrb.u32 q0, [r0, #3]
 ; CHECK-NEXT:    vstrw.32 q0, [r1]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -448,8 +445,7 @@ entry:
 define i8* @ldrbs32_3(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrbs32_3:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adds r2, r0, #3
-; CHECK-NEXT:    vldrb.s32 q0, [r2]
+; CHECK-NEXT:    vldrb.s32 q0, [r0, #3]
 ; CHECK-NEXT:    vstrw.32 q0, [r1]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -516,8 +512,7 @@ entry:
 define i8* @ldrbu16_3(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrbu16_3:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adds r2, r0, #3
-; CHECK-NEXT:    vldrb.u16 q0, [r2]
+; CHECK-NEXT:    vldrb.u16 q0, [r0, #3]
 ; CHECK-NEXT:    vstrh.16 q0, [r1]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -584,8 +579,7 @@ entry:
 define i8* @ldrbs16_3(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrbs16_3:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adds r2, r0, #3
-; CHECK-NEXT:    vldrb.s16 q0, [r2]
+; CHECK-NEXT:    vldrb.s16 q0, [r0, #3]
 ; CHECK-NEXT:    vstrh.16 q0, [r1]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -758,9 +752,15 @@ entry:
 define i8* @ldrhi32_align1(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrhi32_align1:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adds r2, r0, #3
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    ldr.w r3, [r0, #7]
+; CHECK-NEXT:    ldr.w r2, [r0, #3]
+; CHECK-NEXT:    strd r2, r3, [sp]
+; CHECK-NEXT:    mov r2, sp
 ; CHECK-NEXT:    vldrh.s32 q0, [r2]
 ; CHECK-NEXT:    vstrw.32 q0, [r1]
+; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
@@ -952,8 +952,7 @@ define i8* @strh32_2(i8* %y, i8* %x) {
 ; CHECK-LABEL: strh32_2:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vldrh.u32 q0, [r1]
-; CHECK-NEXT:    adds r1, r0, #2
-; CHECK-NEXT:    vstrh.32 q0, [r1]
+; CHECK-NEXT:    vstrh.32 q0, [r0, #2]
 ; CHECK-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 2
@@ -1095,8 +1094,7 @@ define i8* @strb32_3(i8* %y, i8* %x) {
 ; CHECK-LABEL: strb32_3:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vldrb.u32 q0, [r1]
-; CHECK-NEXT:    adds r1, r0, #3
-; CHECK-NEXT:    vstrb.32 q0, [r1]
+; CHECK-NEXT:    vstrb.32 q0, [r0, #3]
 ; CHECK-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
@@ -1159,8 +1157,7 @@ define i8* @strb16_3(i8* %y, i8* %x) {
 ; CHECK-LABEL: strb16_3:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vldrb.u16 q0, [r1]
-; CHECK-NEXT:    adds r1, r0, #3
-; CHECK-NEXT:    vstrb.16 q0, [r1]
+; CHECK-NEXT:    vstrb.16 q0, [r0, #3]
 ; CHECK-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
@@ -1329,9 +1326,15 @@ entry:
 define i8* @strhi32_align1(i8* %y, i8* %x) {
 ; CHECK-LABEL: strhi32_align1:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, #8
 ; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    adds r1, r0, #3
+; CHECK-NEXT:    mov r1, sp
 ; CHECK-NEXT:    vstrh.32 q0, [r1]
+; CHECK-NEXT:    ldrd r1, r2, [sp]
+; CHECK-NEXT:    str.w r1, [r0, #3]
+; CHECK-NEXT:    str.w r2, [r0, #7]
+; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3

Modified: llvm/trunk/test/CodeGen/Thumb2/mve-ldst-postinc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-ldst-postinc.ll?rev=368256&r1=368255&r2=368256&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-ldst-postinc.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-ldst-postinc.ll Wed Aug  7 23:22:03 2019
@@ -774,9 +774,16 @@ entry:
 define i8* @ldrhi32_align1(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrhi32_align1:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldrh.s32 q0, [r0]
-; CHECK-NEXT:    adds r0, #3
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    ldr r3, [r0, #4]
+; CHECK-NEXT:    ldr r2, [r0]
+; CHECK-NEXT:    adds r0, #3
+; CHECK-NEXT:    strd r2, r3, [sp]
+; CHECK-NEXT:    mov r2, sp
+; CHECK-NEXT:    vldrh.s32 q0, [r2]
 ; CHECK-NEXT:    vstrw.32 q0, [r1]
+; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
@@ -1360,9 +1367,16 @@ entry:
 define i8* @strhi32_align1(i8* %y, i8* %x) {
 ; CHECK-LABEL: strhi32_align1:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, #8
 ; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrh.32 q0, [r0]
+; CHECK-NEXT:    mov r1, sp
+; CHECK-NEXT:    vstrh.32 q0, [r1]
+; CHECK-NEXT:    ldrd r1, r2, [sp]
+; CHECK-NEXT:    str r1, [r0]
+; CHECK-NEXT:    str r2, [r0, #4]
 ; CHECK-NEXT:    adds r0, #3
+; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3

Modified: llvm/trunk/test/CodeGen/Thumb2/mve-ldst-preinc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-ldst-preinc.ll?rev=368256&r1=368255&r2=368256&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-ldst-preinc.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-ldst-preinc.ll Wed Aug  7 23:22:03 2019
@@ -151,8 +151,8 @@ entry:
 define i8* @ldrhu32_2(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrhu32_2:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vldrh.u32 q0, [r0, #2]
 ; CHECK-NEXT:    adds r0, #2
-; CHECK-NEXT:    vldrh.u32 q0, [r0]
 ; CHECK-NEXT:    vstrw.32 q0, [r1]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -237,8 +237,8 @@ entry:
 define i8* @ldrhs32_2(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrhs32_2:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vldrh.s32 q0, [r0, #2]
 ; CHECK-NEXT:    adds r0, #2
-; CHECK-NEXT:    vldrh.s32 q0, [r0]
 ; CHECK-NEXT:    vstrw.32 q0, [r1]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -387,8 +387,8 @@ entry:
 define i8* @ldrbu32_3(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrbu32_3:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vldrb.u32 q0, [r0, #3]
 ; CHECK-NEXT:    adds r0, #3
-; CHECK-NEXT:    vldrb.u32 q0, [r0]
 ; CHECK-NEXT:    vstrw.32 q0, [r1]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -456,8 +456,8 @@ entry:
 define i8* @ldrbs32_3(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrbs32_3:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vldrb.s32 q0, [r0, #3]
 ; CHECK-NEXT:    adds r0, #3
-; CHECK-NEXT:    vldrb.s32 q0, [r0]
 ; CHECK-NEXT:    vstrw.32 q0, [r1]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -525,8 +525,8 @@ entry:
 define i8* @ldrbu16_3(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrbu16_3:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vldrb.u16 q0, [r0, #3]
 ; CHECK-NEXT:    adds r0, #3
-; CHECK-NEXT:    vldrb.u16 q0, [r0]
 ; CHECK-NEXT:    vstrh.16 q0, [r1]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -594,8 +594,8 @@ entry:
 define i8* @ldrbs16_3(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrbs16_3:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vldrb.s16 q0, [r0, #3]
 ; CHECK-NEXT:    adds r0, #3
-; CHECK-NEXT:    vldrb.s16 q0, [r0]
 ; CHECK-NEXT:    vstrh.16 q0, [r1]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -774,9 +774,16 @@ entry:
 define i8* @ldrhi32_align1(i8* %x, i8* %y) {
 ; CHECK-LABEL: ldrhi32_align1:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adds r0, #3
-; CHECK-NEXT:    vldrh.s32 q0, [r0]
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    ldr r2, [r0, #3]!
+; CHECK-NEXT:    str r2, [sp]
+; CHECK-NEXT:    ldr r2, [r0, #4]
+; CHECK-NEXT:    str r2, [sp, #4]
+; CHECK-NEXT:    mov r2, sp
+; CHECK-NEXT:    vldrh.s32 q0, [r2]
 ; CHECK-NEXT:    vstrw.32 q0, [r1]
+; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %x, i32 3
@@ -972,9 +979,9 @@ entry:
 define i8* @strh32_2(i8* %y, i8* %x) {
 ; CHECK-LABEL: strh32_2:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adds r0, #2
 ; CHECK-NEXT:    vldrh.u32 q0, [r1]
-; CHECK-NEXT:    vstrh.32 q0, [r0]
+; CHECK-NEXT:    vstrh.32 q0, [r0, #2]
+; CHECK-NEXT:    adds r0, #2
 ; CHECK-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 2
@@ -1118,9 +1125,9 @@ entry:
 define i8* @strb32_3(i8* %y, i8* %x) {
 ; CHECK-LABEL: strb32_3:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adds r0, #3
 ; CHECK-NEXT:    vldrb.u32 q0, [r1]
-; CHECK-NEXT:    vstrb.32 q0, [r0]
+; CHECK-NEXT:    vstrb.32 q0, [r0, #3]
+; CHECK-NEXT:    adds r0, #3
 ; CHECK-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
@@ -1183,9 +1190,9 @@ entry:
 define i8* @strb16_3(i8* %y, i8* %x) {
 ; CHECK-LABEL: strb16_3:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adds r0, #3
 ; CHECK-NEXT:    vldrb.u16 q0, [r1]
-; CHECK-NEXT:    vstrb.16 q0, [r0]
+; CHECK-NEXT:    vstrb.16 q0, [r0, #3]
+; CHECK-NEXT:    adds r0, #3
 ; CHECK-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3
@@ -1360,9 +1367,15 @@ entry:
 define i8* @strhi32_align1(i8* %y, i8* %x) {
 ; CHECK-LABEL: strhi32_align1:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adds r0, #3
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, #8
 ; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    vstrh.32 q0, [r0]
+; CHECK-NEXT:    mov r1, sp
+; CHECK-NEXT:    vstrh.32 q0, [r1]
+; CHECK-NEXT:    ldrd r1, r2, [sp]
+; CHECK-NEXT:    str r1, [r0, #3]!
+; CHECK-NEXT:    str r2, [r0, #4]
+; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    bx lr
 entry:
   %z = getelementptr inbounds i8, i8* %y, i32 3

Modified: llvm/trunk/test/CodeGen/Thumb2/mve-widen-narrow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-widen-narrow.ll?rev=368256&r1=368255&r2=368256&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-widen-narrow.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-widen-narrow.ll Wed Aug  7 23:22:03 2019
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
 
 define void @foo_int8_int32(<4 x i8>* %dest, <4 x i32>* readonly %src, i32 %n) {
 ; CHECK-LABEL: foo_int8_int32:
@@ -14,7 +14,6 @@ entry:
   ret void
 }
 
-
 define void @foo_int16_int32(<4 x i16>* %dest, <4 x i32>* readonly %src, i32 %n) {
 ; CHECK-LABEL: foo_int16_int32:
 ; CHECK:       @ %bb.0: @ %entry
@@ -28,7 +27,6 @@ entry:
   ret void
 }
 
-
 define void @foo_int8_int16(<8 x i8>* %dest, <8 x i16>* readonly %src, i32 %n) {
 ; CHECK-LABEL: foo_int8_int16:
 ; CHECK:       @ %bb.0: @ %entry
@@ -42,7 +40,6 @@ entry:
   ret void
 }
 
-
 define void @foo_int32_int8(<4 x i32>* %dest, <4 x i8>* readonly %src, i32 %n) {
 ; CHECK-LABEL: foo_int32_int8:
 ; CHECK:       @ %bb.0: @ %entry
@@ -56,7 +53,6 @@ entry:
   ret void
 }
 
-
 define void @foo_int16_int8(<8 x i16>* %dest, <8 x i8>* readonly %src, i32 %n) {
 ; CHECK-LABEL: foo_int16_int8:
 ; CHECK:       @ %bb.0: @ %entry
@@ -70,7 +66,6 @@ entry:
   ret void
 }
 
-
 define void @foo_int32_int16(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
 ; CHECK-LABEL: foo_int32_int16:
 ; CHECK:       @ %bb.0: @ %entry
@@ -84,7 +79,6 @@ entry:
   ret void
 }
 
-
 define void @foo_uint32_uint8(<4 x i32>* %dest, <4 x i8>* readonly %src, i32 %n) {
 ; CHECK-LABEL: foo_uint32_uint8:
 ; CHECK:       @ %bb.0: @ %entry
@@ -98,7 +92,6 @@ entry:
   ret void
 }
 
-
 define void @foo_uint16_uint8(<8 x i16>* %dest, <8 x i8>* readonly %src, i32 %n) {
 ; CHECK-LABEL: foo_uint16_uint8:
 ; CHECK:       @ %bb.0: @ %entry
@@ -112,7 +105,6 @@ entry:
   ret void
 }
 
-
 define void @foo_uint32_uint16(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
 ; CHECK-LABEL: foo_uint32_uint16:
 ; CHECK:       @ %bb.0: @ %entry
@@ -124,4 +116,67 @@ entry:
   %0 = zext <4 x i16> %wide.load to <4 x i32>
   store <4 x i32> %0, <4 x i32>* %dest, align 4
   ret void
+}
+
+
+
+
+define void @foo_int16_int32_align1(<4 x i16>* %dest, <4 x i32>* readonly %src, i32 %n) {
+; CHECK-LABEL: foo_int16_int32_align1:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-NEXT:    mov r1, sp
+; CHECK-NEXT:    vstrh.32 q0, [r1]
+; CHECK-NEXT:    ldrd r1, r2, [sp]
+; CHECK-NEXT:    str r1, [r0]
+; CHECK-NEXT:    str r2, [r0, #4]
+; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    bx lr
+entry:
+  %wide.load = load <4 x i32>, <4 x i32>* %src, align 4
+  %0 = trunc <4 x i32> %wide.load to <4 x i16>
+  store <4 x i16> %0, <4 x i16>* %dest, align 1
+  ret void
+}
+
+define void @foo_int32_int16_align1(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
+; CHECK-LABEL: foo_int32_int16_align1:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    ldr r2, [r1]
+; CHECK-NEXT:    ldr r1, [r1, #4]
+; CHECK-NEXT:    strd r2, r1, [sp]
+; CHECK-NEXT:    mov r1, sp
+; CHECK-NEXT:    vldrh.s32 q0, [r1]
+; CHECK-NEXT:    vstrw.32 q0, [r0]
+; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    bx lr
+entry:
+  %wide.load = load <4 x i16>, <4 x i16>* %src, align 1
+  %0 = sext <4 x i16> %wide.load to <4 x i32>
+  store <4 x i32> %0, <4 x i32>* %dest, align 4
+  ret void
+}
+
+define void @foo_uint32_uint16_align1(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
+; CHECK-LABEL: foo_uint32_uint16_align1:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    ldr r2, [r1]
+; CHECK-NEXT:    ldr r1, [r1, #4]
+; CHECK-NEXT:    strd r2, r1, [sp]
+; CHECK-NEXT:    mov r1, sp
+; CHECK-NEXT:    vldrh.u32 q0, [r1]
+; CHECK-NEXT:    vstrw.32 q0, [r0]
+; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    bx lr
+entry:
+  %wide.load = load <4 x i16>, <4 x i16>* %src, align 1
+  %0 = zext <4 x i16> %wide.load to <4 x i32>
+  store <4 x i32> %0, <4 x i32>* %dest, align 4
+  ret void
 }




More information about the llvm-commits mailing list