[LLVMbugs] [Bug 17002] New: LLVM produces poor code for 8-vectors split into two 4-vectors when using select.

Mon Aug 26 07:34:05 PDT 2013

http://llvm.org/bugs/show_bug.cgi?id=17002

            Bug ID: 17002
           Summary: LLVM produces poor code for 8-vectors split into two
                    4-vectors when using select.
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: normal
          Priority: P
         Component: Common Code Generator Code
          Assignee: unassignedbugs at nondot.org
          Reporter: nick at indigorenderer.com
                CC: llvmbugs at cs.uiuc.edu
    Classification: Unclassified

I'm on a non-AVX box, and am trying to write a min() function that produces the
min of two 8-vectors.
The select gets turned into poor code.  (See code below)
What ideally would happen is that the code produced would be two loads, two
MINPS's, then two stores.  
Instead we get a lot of shuffling and shifting.
The problem seems to be due to splitting of the select node in the selection
DAG.
Happens with LLVM trunk, on Windows x64.

Optimised IR:

; ModuleID = 'WinterModule'
target datalayout =
"e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"

; Function Attrs: nounwind
define void @"main(Float8Struct, Float8Struct)"({ <8 x float> }* noalias
nocapture sret %ret, { <8 x float> }* noalias nocapture readonly %a, { <8 x
float> }* noalias nocapture readonly %b, i32* nocapture readnone %hidden) #0 {
entry:
  %a.idx = getelementptr { <8 x float> }* %a, i64 0, i32 0
  %a.idx.val = load <8 x float>* %a.idx, align 32
  %b.idx = getelementptr { <8 x float> }* %b, i64 0, i32 0
  %b.idx.val = load <8 x float>* %b.idx, align 32
  %0 = fcmp olt <8 x float> %a.idx.val, %b.idx.val
  %1 = select <8 x i1> %0, <8 x float> %a.idx.val, <8 x float> %b.idx.val
  %.fca.0.insert.i = insertvalue { <8 x float> } undef, <8 x float> %1, 0
  store { <8 x float> } %.fca.0.insert.i, { <8 x float> }* %ret, align 32
  ret void
}

attributes #0 = { nounwind }

Resulting code:

Function Live Ins: %RCX in %vreg0, %RDX in %vreg1, %R8 in %vreg2

BB#0: derived from LLVM BB %entry
    Live Ins: %RCX %RDX %R8
        %XMM2<def> = MOVAPSrm %R8, 1, %noreg, 0, %noreg;
mem:LD16[%b.idx2](align=32)
        %XMM3<def> = MOVAPSrm %R8<kill>, 1, %noreg, 16, %noreg;
mem:LD16[%b.idx2(align=32)+16](align=16)
        %XMM4<def> = MOVAPSrm %RDX, 1, %noreg, 0, %noreg;
mem:LD16[%a.idx1](align=32)
        %XMM5<def> = MOVAPSrm %RDX<kill>, 1, %noreg, 16, %noreg;
mem:LD16[%a.idx1(align=32)+16](align=16)
        %XMM0<def> = MOVAPSrr %XMM5
        %XMM0<def,tied1> = CMPPSrri %XMM0<kill,tied0>, %XMM3, 1
        %RAX<def> = MOV64ri <cp#0>
        %XMM0<def,tied1> = PSHUFBrm %XMM0<kill,tied0>, %RAX<kill>, 1, %noreg,
0, %noreg; mem:LD16[ConstantPool]
        %XMM1<def> = MOVAPSrr %XMM4
        %XMM1<def,tied1> = CMPPSrri %XMM1<kill,tied0>, %XMM2, 1
        %RAX<def> = MOV64ri <cp#1>
        %XMM1<def,tied1> = PSHUFBrm %XMM1<kill,tied0>, %RAX<kill>, 1, %noreg,
0, %noreg; mem:LD16[ConstantPool]
        %XMM1<def,tied1> = PORrr %XMM1<kill,tied0>, %XMM0<kill>
        %XMM1<def,tied1> = PSLLWri %XMM1<kill,tied0>, 15
        %XMM1<def,tied1> = PSRAWri %XMM1<kill,tied0>, 15
        %XMM0<def> = MOVDQArr %XMM1
        %XMM0<def,tied1> = PUNPCKHBWrr %XMM0<kill,tied0>, %XMM0<undef>
        %XMM0<def,tied1> = PSLLDri %XMM0<kill,tied0>, 31
        %XMM3<def,tied1> = BLENDVPSrr0 %XMM3<kill,tied0>, %XMM5<kill>,
%XMM0<imp-use>
        MOVAPSmr %RCX, 1, %noreg, 16, %noreg, %XMM3<kill>;
mem:ST16[%ret(align=32)+16](align=16)
        %XMM1<def,tied1> = PUNPCKLBWrr %XMM1<kill,tied0>, %XMM0<undef>
        %XMM1<def,tied1> = PSLLDri %XMM1<kill,tied0>, 31
        %XMM0<def> = MOVDQArr %XMM1<kill>
        %XMM2<def,tied1> = BLENDVPSrr0 %XMM2<kill,tied0>, %XMM4<kill>,
%XMM0<imp-use>
        MOVAPSmr %RCX, 1, %noreg, 0, %noreg, %XMM2<kill>;
mem:ST16[%ret](align=32)
        %RAX<def> = MOV64rr %RCX<kill>
        RET %RAX

# End machine code for function main(Float8Struct, Float8Struct).

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20130826/e6c43454/attachment.html>