[llvm-bugs] [Bug 31363] New: Lowering operations to compiler intrinsics with reference parameters generates bad code

via llvm-bugs llvm-bugs at lists.llvm.org
Tue Dec 13 11:21:33 PST 2016


https://llvm.org/bugs/show_bug.cgi?id=31363

            Bug ID: 31363
           Summary: Lowering operations to compiler intrinsics with
                    reference parameters generates bad code
           Product: libraries
           Version: 3.9
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: normal
          Priority: P
         Component: Common Code Generator Code
          Assignee: unassignedbugs at nondot.org
          Reporter: simonas+llvm.org at kazlauskas.me
                CC: llvm-bugs at lists.llvm.org
    Classification: Unclassified

Consider this snippet of LLVM IR:

```llvm
target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-windows-msvc"

; Function Attrs: nounwind uwtable
define void @test3(i128* noalias nocapture sret dereferenceable(16), i128*
noalias readonly nocapture dereferenceable(16), i128* noalias readonly
nocapture dereferenceable(16)) unnamed_addr #0 {
start:
  %3 = load i128, i128* %1
  %4 = load i128, i128* %2
  %5 = udiv i128 %3, %4
  store i128 %5, i128* %0
  ret void
}

attributes #0 = { nounwind uwtable }
```

This generates following assembly:

```asm
test3:
    push    rsi
    .seh_pushreg 6
    sub    rsp, 64
    .seh_stackalloc 64
    .seh_endprologue
    mov    rsi, rcx
    mov    r9, qword ptr [rdx]
    mov    rcx, qword ptr [rdx + 8]
    mov    rdx, qword ptr [r8]
    mov    rax, qword ptr [r8 + 8]
    mov    qword ptr [rsp + 56], rcx
    mov    qword ptr [rsp + 48], r9
    mov    qword ptr [rsp + 40], rax
    mov    qword ptr [rsp + 32], rdx
    lea    rcx, [rsp + 48]
    lea    rdx, [rsp + 32]
    call    __udivti3
    pshufd    xmm1, xmm0, 78          # xmm1 = xmm0[2,3,0,1]
    movq    qword ptr [rsi], xmm0
    movq    qword ptr [rsi + 8], xmm1
    mov    rax, rsi
    add    rsp, 64
    pop    rsi
    ret
    .seh_handlerdata
    .text
    .seh_endproc
```

noting that LLVM thinks __udivti3 looks like this when targetting windows:

```
declare i128 @__udivti3(i128* readonly nocapture dereferenceable(16), i128*
readonly nocapture dereferenceable(16));
```

its easy to see that there’s way too much stack trashing going on and the
assembly instead could be just this:

```
test3:
    push    rsi
    .seh_pushreg 6
    sub    rsp, 32
    .seh_stackalloc 32
    .seh_endprologue
    mov    rsi, rcx
    mov    rcx, rdx
    mov    rdx, r8
    call    __udivti3
    pshufd    xmm1, xmm0, 78          # xmm1 = xmm0[2,3,0,1]
    movq    qword ptr [rsi], xmm0
    movq    qword ptr [rsi + 8], xmm1
    mov    rax, rsi
    add    rsp, 32
    pop    rsi
    ret
    .seh_handlerdata
    .text
    .seh_endproc
```

which only does write to the sret parameter, as one would expect from a well
optimised code.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20161213/ffa1a8bd/attachment.html>


More information about the llvm-bugs mailing list