<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - Invalid vldmia/vsdmia encoding"

   href="https://bugs.llvm.org/show_bug.cgi?id=34896">34896</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>Invalid vldmia/vsdmia encoding

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Linux

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Backend: ARM

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>yyc1992@gmail.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr></table>

      <p>

        <div>

        <pre>The ARM instruction vldmia and vstmia can can technically encode register

ranges that are invalid (unpredictable). (only up to 16 within 0-31 are

allowed). However, it seems that LLVM can emit invalid ones for certain code

due to load-store optimizations.

The IR to reproduce is

```

target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"

target triple = "armv7l-unknown-linux-gnueabihf"

define void @f(<34 x double> addrspace(10)* %p0, <34 x double>* %p1) {

L10:

  %v = load <34 x double>, <34 x double> addrspace(10)* %p0, align 16

  store <34 x double> %v, <34 x double>* %p1, align 16

  ret void

}

```

When running with `neon` diabled (important, doesn't show up with neon

enabled!!!)

```

% llc -mattr=-neon l.ll -o -

        .text

        .syntax unified

        .eabi_attribute 67, "2.09"      @ Tag_conformance

        .eabi_attribute 6, 10   @ Tag_CPU_arch

        .eabi_attribute 7, 65   @ Tag_CPU_arch_profile

        .eabi_attribute 8, 1    @ Tag_ARM_ISA_use

        .eabi_attribute 9, 2    @ Tag_THUMB_ISA_use

        .fpu    vfpv3

        .eabi_attribute 34, 1   @ Tag_CPU_unaligned_access

        .eabi_attribute 17, 1   @ Tag_ABI_PCS_GOT_use

        .eabi_attribute 20, 1   @ Tag_ABI_FP_denormal

        .eabi_attribute 21, 1   @ Tag_ABI_FP_exceptions

        .eabi_attribute 23, 3   @ Tag_ABI_FP_number_model

        .eabi_attribute 24, 1   @ Tag_ABI_align_needed

        .eabi_attribute 25, 1   @ Tag_ABI_align_preserved

        .eabi_attribute 28, 1   @ Tag_ABI_VFP_args

        .eabi_attribute 38, 1   @ Tag_ABI_FP_16bit_format

        .eabi_attribute 14, 0   @ Tag_ABI_PCS_R9_use

        .file   "l.ll"

        .globl  f                       @ -- Begin function f

        .p2align        2

        .type   f,%function

        .code   32                      @ @f

f:

        .fnstart

@ BB#0:                                 @ %L10

        .vsave  {d8, d9, d10, d11, d12, d13, d14, d15}

        vpush   {d8, d9, d10, d11, d12, d13, d14, d15}

        .pad    #16

        sub     sp, sp, #16

        vldr    d16, [r0]

        add     r2, r0, #16

        vldr    d17, [r0, #264]

        vstr    d16, [sp, #8]           @ 8-byte Spill

        vldr    d16, [r0, #8]

        vstr    d16, [sp]               @ 8-byte Spill

        vldmia  r2, {d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29,

d30, d31}

        add     r2, r0, #120

        vldmia  r2, {d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11, d12,

d13, d14, d15, d16}

        vldr    d18, [r0, #256]

        add     r0, r1, #64

        vstr    d18, [r1, #256]

        vstr    d17, [r1, #264]

        vstmia  r0, {d25, d26, d27, d28, d29, d30, d31}

        add     r0, r1, #120

        vstmia  r0, {d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11, d12,

d13, d14, d15, d16}

        add     r0, r1, #16

        vldr    d16, [sp, #8]           @ 8-byte Reload

        vstr    d16, [r1]

        vldr    d16, [sp]               @ 8-byte Reload

        vstr    d16, [r1, #8]

        vstmia  r0, {d19, d20, d21, d22, d23, d24}

        add     sp, sp, #16

        vpop    {d8, d9, d10, d11, d12, d13, d14, d15}

        bx      lr

.Lfunc_end0:

        .size   f, .Lfunc_end0-f

        .fnend

                                        @ -- End function

        .section        ".note.GNU-stack","",%progbits

        .eabi_attribute 30, 1   @ Tag_ABI_optimization_goals

```

Notice that the `vstmia  r0, {d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11,

d12, d13, d14, d15, d16}` and `vldmia  r2, {d0, d1, d2, d3, d4, d5, d6, d7, d8,

d9, d10, d11, d12, d13, d14, d15, d16}` are encodable but are not valid on

current ARM architectures since they each have 17 vector/floating point

registers.

The instruction is created by `ARMLoadStoreOpt::CreateLoadStoreMulti` which

took a input register list of a large size. Not sure if the caller or this

function is to be blamed...</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>