[llvm-bugs] [Bug 30947] New: movdqa32 being used on a 16-byte aligned address on cannonlake

Tue Nov 8 10:32:42 PST 2016

https://llvm.org/bugs/show_bug.cgi?id=30947

            Bug ID: 30947
           Summary: movdqa32 being used on a 16-byte aligned address on
                    cannonlake
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: normal
          Priority: P
         Component: Backend: X86
          Assignee: unassignedbugs at nondot.org
          Reporter: andrew.b.adams at gmail.com
                CC: llvm-bugs at lists.llvm.org
    Classification: Unclassified

I'm getting a case of movdqa32 on a ymm register being used when the address is
only 16-byte aligned. I don't actually have a cannonlake CPU - I'm using
Intel's SDE to emulate one. It faults on this instruction with an alignment
error:

SDE ERROR:  TID: 0 executed instruction with an unaligned memory reference to
address 0x1da11770 INSTR: 0x01e4a09e8: IFORM:
VMOVDQA32_MEMu32_MASKmskw_YMMu32_AVX512 :: vmovdqa32 ymmword ptr [rax+rsi*4],
ymm4

Reading the Intel docs, I don't think it's a bug in SDE. movdqa32 requires
alignment to the type being stored, and a ymm register is 32 bytes, not 16.

Here's my test.ll:

define void @fn(<16 x i32> %a1, <16 x i32> %a2, <8 x i32>* %b1, <8 x i32>* %b2,
<8 x i32>* %b3, <8 x i32>* %b4) {
       %cmp = icmp slt <16 x i32> %a1, %a2
       %mix = select <16 x i1> %cmp, <16 x i32>  <i32 1, i32 1, i32 1, i32 1,
i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32
undef, i32 undef, i32 undef, i32 undef>, <16 x i32> <i32 0, i32 0, i32 0, i32
0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32
undef, i32 undef, i32 undef, i32 undef>
       %lo8 = shufflevector <16 x i32> %mix, <16 x i32> undef, <8 x i32> <i32
0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
       store <8 x i32> %lo8, <8 x i32>* %b1, align 8
       store <8 x i32> %lo8, <8 x i32>* %b2, align 16
       store <8 x i32> %lo8, <8 x i32>* %b3, align 32
       store <8 x i32> %lo8, <8 x i32>* %b4, align 64
       ret void
}

Compiled with: llc test.ll -mcpu=cannonlake -O3 -filetype=asm -o - 

fn:                                     # @fn
# BB#0:
        vmovdqa32       (%rdx), %zmm0
        movq    48(%rsp), %rax
        movq    40(%rsp), %rdx
        vpcmpgtd        (%rcx), %zmm0, %k1
        vpbroadcastd    .LCPI0_0(%rip), %zmm0 {%k1} {z}
        vmovdqu32       %ymm0, (%r8)
        vmovdqa32       %ymm0, (%r9) /// This instruction should be vmovdqu32
        vmovdqa32       %ymm0, (%rdx)
        vmovdqa32       %ymm0, (%rax)
        retq

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20161108/6a0c30b2/attachment.html>