[llvm-bugs] [Bug 34896] New: Invalid vldmia/vsdmia encoding

Mon Oct 9 17:18:07 PDT 2017

https://bugs.llvm.org/show_bug.cgi?id=34896

            Bug ID: 34896
           Summary: Invalid vldmia/vsdmia encoding
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Backend: ARM
          Assignee: unassignedbugs at nondot.org
          Reporter: yyc1992 at gmail.com
                CC: llvm-bugs at lists.llvm.org

The ARM instruction vldmia and vstmia can can technically encode register
ranges that are invalid (unpredictable). (only up to 16 within 0-31 are
allowed). However, it seems that LLVM can emit invalid ones for certain code
due to load-store optimizations.

The IR to reproduce is

```
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "armv7l-unknown-linux-gnueabihf"

define void @f(<34 x double> addrspace(10)* %p0, <34 x double>* %p1) {
L10:
  %v = load <34 x double>, <34 x double> addrspace(10)* %p0, align 16
  store <34 x double> %v, <34 x double>* %p1, align 16
  ret void
}
```

When running with `neon` diabled (important, doesn't show up with neon
enabled!!!)

```
% llc -mattr=-neon l.ll -o -
        .text
        .syntax unified
        .eabi_attribute 67, "2.09"      @ Tag_conformance
        .eabi_attribute 6, 10   @ Tag_CPU_arch
        .eabi_attribute 7, 65   @ Tag_CPU_arch_profile
        .eabi_attribute 8, 1    @ Tag_ARM_ISA_use
        .eabi_attribute 9, 2    @ Tag_THUMB_ISA_use
        .fpu    vfpv3
        .eabi_attribute 34, 1   @ Tag_CPU_unaligned_access
        .eabi_attribute 17, 1   @ Tag_ABI_PCS_GOT_use
        .eabi_attribute 20, 1   @ Tag_ABI_FP_denormal
        .eabi_attribute 21, 1   @ Tag_ABI_FP_exceptions
        .eabi_attribute 23, 3   @ Tag_ABI_FP_number_model
        .eabi_attribute 24, 1   @ Tag_ABI_align_needed
        .eabi_attribute 25, 1   @ Tag_ABI_align_preserved
        .eabi_attribute 28, 1   @ Tag_ABI_VFP_args
        .eabi_attribute 38, 1   @ Tag_ABI_FP_16bit_format
        .eabi_attribute 14, 0   @ Tag_ABI_PCS_R9_use
        .file   "l.ll"
        .globl  f                       @ -- Begin function f
        .p2align        2
        .type   f,%function
        .code   32                      @ @f
f:
        .fnstart
@ BB#0:                                 @ %L10
        .vsave  {d8, d9, d10, d11, d12, d13, d14, d15}
        vpush   {d8, d9, d10, d11, d12, d13, d14, d15}
        .pad    #16
        sub     sp, sp, #16
        vldr    d16, [r0]
        add     r2, r0, #16
        vldr    d17, [r0, #264]
        vstr    d16, [sp, #8]           @ 8-byte Spill
        vldr    d16, [r0, #8]
        vstr    d16, [sp]               @ 8-byte Spill
        vldmia  r2, {d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29,
d30, d31}
        add     r2, r0, #120
        vldmia  r2, {d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11, d12,
d13, d14, d15, d16}
        vldr    d18, [r0, #256]
        add     r0, r1, #64
        vstr    d18, [r1, #256]
        vstr    d17, [r1, #264]
        vstmia  r0, {d25, d26, d27, d28, d29, d30, d31}
        add     r0, r1, #120
        vstmia  r0, {d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11, d12,
d13, d14, d15, d16}
        add     r0, r1, #16
        vldr    d16, [sp, #8]           @ 8-byte Reload
        vstr    d16, [r1]
        vldr    d16, [sp]               @ 8-byte Reload
        vstr    d16, [r1, #8]
        vstmia  r0, {d19, d20, d21, d22, d23, d24}
        add     sp, sp, #16
        vpop    {d8, d9, d10, d11, d12, d13, d14, d15}
        bx      lr
.Lfunc_end0:
        .size   f, .Lfunc_end0-f
        .fnend
                                        @ -- End function

        .section        ".note.GNU-stack","",%progbits
        .eabi_attribute 30, 1   @ Tag_ABI_optimization_goals
```

Notice that the `vstmia  r0, {d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11,
d12, d13, d14, d15, d16}` and `vldmia  r2, {d0, d1, d2, d3, d4, d5, d6, d7, d8,
d9, d10, d11, d12, d13, d14, d15, d16}` are encodable but are not valid on
current ARM architectures since they each have 17 vector/floating point
registers.

The instruction is created by `ARMLoadStoreOpt::CreateLoadStoreMulti` which
took a input register list of a large size. Not sure if the caller or this
function is to be blamed...

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20171010/fe5f4fe4/attachment.html>