[llvm] afdcf21 - [X86] Pre-commit test for #61271

Phoebe Wang via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 13 01:39:52 PDT 2023


Author: Phoebe Wang
Date: 2023-03-13T16:39:08+08:00
New Revision: afdcf219eafe5510e4d8e968711dc8f643d381e0

URL: https://github.com/llvm/llvm-project/commit/afdcf219eafe5510e4d8e968711dc8f643d381e0
DIFF: https://github.com/llvm/llvm-project/commit/afdcf219eafe5510e4d8e968711dc8f643d381e0.diff

LOG: [X86] Pre-commit test for #61271

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/half.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll
index 14a1016fa9f4..2761d3b09994 100644
--- a/llvm/test/CodeGen/X86/half.ll
+++ b/llvm/test/CodeGen/X86/half.ll
@@ -1355,4 +1355,800 @@ define <8 x half> @shuffle(ptr %p) {
   ret <8 x half> %2
 }
 
+declare half @llvm.minnum.f16(half, half)
+
+define half @pr61271(half %0, half %1) #0 {
+; CHECK-LIBCALL-LABEL: pr61271:
+; CHECK-LIBCALL:       # %bb.0:
+; CHECK-LIBCALL-NEXT:    subq $40, %rsp
+; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps %xmm1, %xmm0
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, %xmm1
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    cmpltss %xmm2, %xmm1
+; CHECK-LIBCALL-NEXT:    andps %xmm1, %xmm0
+; CHECK-LIBCALL-NEXT:    andnps %xmm2, %xmm1
+; CHECK-LIBCALL-NEXT:    orps %xmm1, %xmm0
+; CHECK-LIBCALL-NEXT:    callq __truncsfhf2 at PLT
+; CHECK-LIBCALL-NEXT:    addq $40, %rsp
+; CHECK-LIBCALL-NEXT:    retq
+;
+; BWON-F16C-LABEL: pr61271:
+; BWON-F16C:       # %bb.0:
+; BWON-F16C-NEXT:    vpextrw $0, %xmm0, %eax
+; BWON-F16C-NEXT:    vpextrw $0, %xmm1, %ecx
+; BWON-F16C-NEXT:    movzwl %cx, %ecx
+; BWON-F16C-NEXT:    vmovd %ecx, %xmm0
+; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
+; BWON-F16C-NEXT:    movzwl %ax, %eax
+; BWON-F16C-NEXT:    vmovd %eax, %xmm1
+; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
+; BWON-F16C-NEXT:    vcmpltss %xmm0, %xmm1, %xmm2
+; BWON-F16C-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
+; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; BWON-F16C-NEXT:    vmovd %xmm0, %eax
+; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; BWON-F16C-NEXT:    retq
+;
+; CHECK-I686-LABEL: pr61271:
+; CHECK-I686:       # %bb.0:
+; CHECK-I686-NEXT:    subl $44, %esp
+; CHECK-I686-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
+; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-I686-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
+; CHECK-I686-NEXT:    movw %ax, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
+; CHECK-I686-NEXT:    movw %ax, (%esp)
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    movaps %xmm1, %xmm2
+; CHECK-I686-NEXT:    cmpltss %xmm0, %xmm2
+; CHECK-I686-NEXT:    andps %xmm2, %xmm1
+; CHECK-I686-NEXT:    andnps %xmm0, %xmm2
+; CHECK-I686-NEXT:    orps %xmm1, %xmm2
+; CHECK-I686-NEXT:    movss %xmm2, (%esp)
+; CHECK-I686-NEXT:    calll __truncsfhf2
+; CHECK-I686-NEXT:    addl $44, %esp
+; CHECK-I686-NEXT:    retl
+  %3 = call fast half @llvm.minnum.f16(half %0, half %1)
+  ret half %3
+}
+
+declare <8 x half> @llvm.maxnum.v8f16(<8 x half>, <8 x half>)
+
+define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 {
+; CHECK-LIBCALL-LABEL: maxnum_v8f16:
+; CHECK-LIBCALL:       # %bb.0:
+; CHECK-LIBCALL-NEXT:    subq $184, %rsp
+; CHECK-LIBCALL-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:    ja .LBB26_2
+; CHECK-LIBCALL-NEXT:  # %bb.1:
+; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:  .LBB26_2:
+; CHECK-LIBCALL-NEXT:    callq __truncsfhf2 at PLT
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:    ja .LBB26_4
+; CHECK-LIBCALL-NEXT:  # %bb.3:
+; CHECK-LIBCALL-NEXT:    movss (%rsp), %xmm0 # 4-byte Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:  .LBB26_4:
+; CHECK-LIBCALL-NEXT:    callq __truncsfhf2 at PLT
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-LIBCALL-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    ucomiss (%rsp), %xmm0 # 4-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:    ja .LBB26_6
+; CHECK-LIBCALL-NEXT:  # %bb.5:
+; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:  .LBB26_6:
+; CHECK-LIBCALL-NEXT:    callq __truncsfhf2 at PLT
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:    ja .LBB26_8
+; CHECK-LIBCALL-NEXT:  # %bb.7:
+; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:  .LBB26_8:
+; CHECK-LIBCALL-NEXT:    callq __truncsfhf2 at PLT
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    psrlq $48, %xmm0
+; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movd %xmm0, (%rsp) # 4-byte Folded Spill
+; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    psrlq $48, %xmm0
+; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    movss (%rsp), %xmm0 # 4-byte Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:    ja .LBB26_10
+; CHECK-LIBCALL-NEXT:  # %bb.9:
+; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:  .LBB26_10:
+; CHECK-LIBCALL-NEXT:    callq __truncsfhf2 at PLT
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; CHECK-LIBCALL-NEXT:    # xmm1 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    ja .LBB26_12
+; CHECK-LIBCALL-NEXT:  # %bb.11:
+; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; CHECK-LIBCALL-NEXT:    # xmm1 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:  .LBB26_12:
+; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps %xmm1, %xmm0
+; CHECK-LIBCALL-NEXT:    callq __truncsfhf2 at PLT
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; CHECK-LIBCALL-NEXT:    # xmm1 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    ja .LBB26_14
+; CHECK-LIBCALL-NEXT:  # %bb.13:
+; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; CHECK-LIBCALL-NEXT:    # xmm1 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:  .LBB26_14:
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps %xmm1, %xmm0
+; CHECK-LIBCALL-NEXT:    callq __truncsfhf2 at PLT
+; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    psrld $16, %xmm0
+; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    psrld $16, %xmm0
+; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movd %xmm0, (%rsp) # 4-byte Folded Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-LIBCALL-NEXT:    ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:    ja .LBB26_16
+; CHECK-LIBCALL-NEXT:  # %bb.15:
+; CHECK-LIBCALL-NEXT:    movd (%rsp), %xmm0 # 4-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-LIBCALL-NEXT:  .LBB26_16:
+; CHECK-LIBCALL-NEXT:    callq __truncsfhf2 at PLT
+; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-LIBCALL-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; CHECK-LIBCALL-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; CHECK-LIBCALL-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; CHECK-LIBCALL-NEXT:    # xmm1 = xmm1[0],mem[0]
+; CHECK-LIBCALL-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-LIBCALL-NEXT:    addq $184, %rsp
+; CHECK-LIBCALL-NEXT:    retq
+;
+; BWON-F16C-LABEL: maxnum_v8f16:
+; BWON-F16C:       # %bb.0:
+; BWON-F16C-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; BWON-F16C-NEXT:    vpextrw $0, %xmm2, %eax
+; BWON-F16C-NEXT:    movzwl %ax, %eax
+; BWON-F16C-NEXT:    vmovd %eax, %xmm2
+; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm2
+; BWON-F16C-NEXT:    vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; BWON-F16C-NEXT:    vpextrw $0, %xmm3, %eax
+; BWON-F16C-NEXT:    movzwl %ax, %eax
+; BWON-F16C-NEXT:    vmovd %eax, %xmm3
+; BWON-F16C-NEXT:    vcvtph2ps %xmm3, %xmm3
+; BWON-F16C-NEXT:    vucomiss %xmm2, %xmm3
+; BWON-F16C-NEXT:    ja .LBB26_2
+; BWON-F16C-NEXT:  # %bb.1:
+; BWON-F16C-NEXT:    vmovaps %xmm2, %xmm3
+; BWON-F16C-NEXT:  .LBB26_2:
+; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm3, %xmm2
+; BWON-F16C-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[3,3,3,3]
+; BWON-F16C-NEXT:    vpextrw $0, %xmm3, %eax
+; BWON-F16C-NEXT:    movzwl %ax, %eax
+; BWON-F16C-NEXT:    vmovd %eax, %xmm3
+; BWON-F16C-NEXT:    vcvtph2ps %xmm3, %xmm3
+; BWON-F16C-NEXT:    vpshufd {{.*#+}} xmm4 = xmm0[3,3,3,3]
+; BWON-F16C-NEXT:    vpextrw $0, %xmm4, %eax
+; BWON-F16C-NEXT:    movzwl %ax, %eax
+; BWON-F16C-NEXT:    vmovd %eax, %xmm4
+; BWON-F16C-NEXT:    vcvtph2ps %xmm4, %xmm4
+; BWON-F16C-NEXT:    vucomiss %xmm3, %xmm4
+; BWON-F16C-NEXT:    ja .LBB26_4
+; BWON-F16C-NEXT:  # %bb.3:
+; BWON-F16C-NEXT:    vmovaps %xmm3, %xmm4
+; BWON-F16C-NEXT:  .LBB26_4:
+; BWON-F16C-NEXT:    vmovd %xmm2, %eax
+; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm4, %xmm2
+; BWON-F16C-NEXT:    vmovd %xmm2, %ecx
+; BWON-F16C-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; BWON-F16C-NEXT:    vpextrw $0, %xmm2, %edx
+; BWON-F16C-NEXT:    movzwl %dx, %edx
+; BWON-F16C-NEXT:    vmovd %edx, %xmm2
+; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm2
+; BWON-F16C-NEXT:    vpsrldq {{.*#+}} xmm3 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; BWON-F16C-NEXT:    vpextrw $0, %xmm3, %edx
+; BWON-F16C-NEXT:    movzwl %dx, %edx
+; BWON-F16C-NEXT:    vmovd %edx, %xmm3
+; BWON-F16C-NEXT:    vcvtph2ps %xmm3, %xmm3
+; BWON-F16C-NEXT:    vucomiss %xmm2, %xmm3
+; BWON-F16C-NEXT:    ja .LBB26_6
+; BWON-F16C-NEXT:  # %bb.5:
+; BWON-F16C-NEXT:    vmovaps %xmm2, %xmm3
+; BWON-F16C-NEXT:  .LBB26_6:
+; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm3, %xmm2
+; BWON-F16C-NEXT:    vmovd %xmm2, %edx
+; BWON-F16C-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
+; BWON-F16C-NEXT:    vpextrw $0, %xmm2, %esi
+; BWON-F16C-NEXT:    movzwl %si, %esi
+; BWON-F16C-NEXT:    vmovd %esi, %xmm2
+; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm2
+; BWON-F16C-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
+; BWON-F16C-NEXT:    vpextrw $0, %xmm3, %esi
+; BWON-F16C-NEXT:    movzwl %si, %esi
+; BWON-F16C-NEXT:    vmovd %esi, %xmm3
+; BWON-F16C-NEXT:    vcvtph2ps %xmm3, %xmm3
+; BWON-F16C-NEXT:    vucomiss %xmm2, %xmm3
+; BWON-F16C-NEXT:    ja .LBB26_8
+; BWON-F16C-NEXT:  # %bb.7:
+; BWON-F16C-NEXT:    vmovaps %xmm2, %xmm3
+; BWON-F16C-NEXT:  .LBB26_8:
+; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm3, %xmm2
+; BWON-F16C-NEXT:    vmovd %xmm2, %esi
+; BWON-F16C-NEXT:    vpsrlq $48, %xmm1, %xmm2
+; BWON-F16C-NEXT:    vpextrw $0, %xmm2, %edi
+; BWON-F16C-NEXT:    movzwl %di, %edi
+; BWON-F16C-NEXT:    vmovd %edi, %xmm2
+; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm2
+; BWON-F16C-NEXT:    vpsrlq $48, %xmm0, %xmm3
+; BWON-F16C-NEXT:    vpextrw $0, %xmm3, %edi
+; BWON-F16C-NEXT:    movzwl %di, %edi
+; BWON-F16C-NEXT:    vmovd %edi, %xmm3
+; BWON-F16C-NEXT:    vcvtph2ps %xmm3, %xmm6
+; BWON-F16C-NEXT:    vucomiss %xmm2, %xmm6
+; BWON-F16C-NEXT:    ja .LBB26_10
+; BWON-F16C-NEXT:  # %bb.9:
+; BWON-F16C-NEXT:    vmovaps %xmm2, %xmm6
+; BWON-F16C-NEXT:  .LBB26_10:
+; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; BWON-F16C-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm3
+; BWON-F16C-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm4
+; BWON-F16C-NEXT:    vpinsrw $0, %esi, %xmm0, %xmm5
+; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; BWON-F16C-NEXT:    vmovd %xmm6, %eax
+; BWON-F16C-NEXT:    vmovshdup {{.*#+}} xmm6 = xmm1[1,1,3,3]
+; BWON-F16C-NEXT:    vpextrw $0, %xmm6, %ecx
+; BWON-F16C-NEXT:    movzwl %cx, %ecx
+; BWON-F16C-NEXT:    vmovd %ecx, %xmm6
+; BWON-F16C-NEXT:    vcvtph2ps %xmm6, %xmm6
+; BWON-F16C-NEXT:    vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3]
+; BWON-F16C-NEXT:    vpextrw $0, %xmm7, %ecx
+; BWON-F16C-NEXT:    movzwl %cx, %ecx
+; BWON-F16C-NEXT:    vmovd %ecx, %xmm7
+; BWON-F16C-NEXT:    vcvtph2ps %xmm7, %xmm7
+; BWON-F16C-NEXT:    vucomiss %xmm6, %xmm7
+; BWON-F16C-NEXT:    ja .LBB26_12
+; BWON-F16C-NEXT:  # %bb.11:
+; BWON-F16C-NEXT:    vmovaps %xmm6, %xmm7
+; BWON-F16C-NEXT:  .LBB26_12:
+; BWON-F16C-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; BWON-F16C-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm7, %xmm5
+; BWON-F16C-NEXT:    vmovd %xmm5, %eax
+; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; BWON-F16C-NEXT:    vpextrw $0, %xmm1, %eax
+; BWON-F16C-NEXT:    movzwl %ax, %eax
+; BWON-F16C-NEXT:    vmovd %eax, %xmm6
+; BWON-F16C-NEXT:    vcvtph2ps %xmm6, %xmm6
+; BWON-F16C-NEXT:    vpextrw $0, %xmm0, %eax
+; BWON-F16C-NEXT:    movzwl %ax, %eax
+; BWON-F16C-NEXT:    vmovd %eax, %xmm7
+; BWON-F16C-NEXT:    vcvtph2ps %xmm7, %xmm7
+; BWON-F16C-NEXT:    vucomiss %xmm6, %xmm7
+; BWON-F16C-NEXT:    ja .LBB26_14
+; BWON-F16C-NEXT:  # %bb.13:
+; BWON-F16C-NEXT:    vmovaps %xmm6, %xmm7
+; BWON-F16C-NEXT:  .LBB26_14:
+; BWON-F16C-NEXT:    vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; BWON-F16C-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm7, %xmm4
+; BWON-F16C-NEXT:    vmovd %xmm4, %eax
+; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; BWON-F16C-NEXT:    vpsrld $16, %xmm1, %xmm1
+; BWON-F16C-NEXT:    vpextrw $0, %xmm1, %eax
+; BWON-F16C-NEXT:    movzwl %ax, %eax
+; BWON-F16C-NEXT:    vmovd %eax, %xmm1
+; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
+; BWON-F16C-NEXT:    vpsrld $16, %xmm0, %xmm0
+; BWON-F16C-NEXT:    vpextrw $0, %xmm0, %eax
+; BWON-F16C-NEXT:    movzwl %ax, %eax
+; BWON-F16C-NEXT:    vmovd %eax, %xmm0
+; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
+; BWON-F16C-NEXT:    vucomiss %xmm1, %xmm0
+; BWON-F16C-NEXT:    ja .LBB26_16
+; BWON-F16C-NEXT:  # %bb.15:
+; BWON-F16C-NEXT:    vmovaps %xmm1, %xmm0
+; BWON-F16C-NEXT:  .LBB26_16:
+; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; BWON-F16C-NEXT:    vmovd %xmm0, %eax
+; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; BWON-F16C-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; BWON-F16C-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; BWON-F16C-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; BWON-F16C-NEXT:    retq
+;
+; CHECK-I686-LABEL: maxnum_v8f16:
+; CHECK-I686:       # %bb.0:
+; CHECK-I686-NEXT:    pushl %ebx
+; CHECK-I686-NEXT:    pushl %edi
+; CHECK-I686-NEXT:    pushl %esi
+; CHECK-I686-NEXT:    subl $336, %esp # imm = 0x150
+; CHECK-I686-NEXT:    movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-I686-NEXT:    movaps %xmm1, %xmm0
+; CHECK-I686-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[1,1]
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
+; CHECK-I686-NEXT:    movw %si, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %ebx
+; CHECK-I686-NEXT:    movw %bx, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movw %bx, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    movw %si, (%esp)
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
+; CHECK-I686-NEXT:    ja .LBB26_1
+; CHECK-I686-NEXT:  # %bb.2:
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    jmp .LBB26_3
+; CHECK-I686-NEXT:  .LBB26_1:
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:  .LBB26_3:
+; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    movss %xmm0, (%esp)
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
+; CHECK-I686-NEXT:    ja .LBB26_4
+; CHECK-I686-NEXT:  # %bb.5:
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    jmp .LBB26_6
+; CHECK-I686-NEXT:  .LBB26_4:
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:  .LBB26_6:
+; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-I686-NEXT:    calll __truncsfhf2
+; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    movss %xmm0, (%esp)
+; CHECK-I686-NEXT:    calll __truncsfhf2
+; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
+; CHECK-I686-NEXT:    movw %si, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    psrlq $48, %xmm0
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    psrlq $48, %xmm0
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %ebx
+; CHECK-I686-NEXT:    movw %bx, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movw %bx, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    movw %si, (%esp)
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
+; CHECK-I686-NEXT:    ja .LBB26_7
+; CHECK-I686-NEXT:  # %bb.8:
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    jmp .LBB26_9
+; CHECK-I686-NEXT:  .LBB26_7:
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:  .LBB26_9:
+; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    movss %xmm0, (%esp)
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
+; CHECK-I686-NEXT:    ja .LBB26_10
+; CHECK-I686-NEXT:  # %bb.11:
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    jmp .LBB26_12
+; CHECK-I686-NEXT:  .LBB26_10:
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:  .LBB26_12:
+; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-I686-NEXT:    calll __truncsfhf2
+; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    movss %xmm0, (%esp)
+; CHECK-I686-NEXT:    calll __truncsfhf2
+; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
+; CHECK-I686-NEXT:    movw %si, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %ebx
+; CHECK-I686-NEXT:    movw %bx, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movw %bx, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    movw %si, (%esp)
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
+; CHECK-I686-NEXT:    ja .LBB26_13
+; CHECK-I686-NEXT:  # %bb.14:
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    jmp .LBB26_15
+; CHECK-I686-NEXT:  .LBB26_13:
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:  .LBB26_15:
+; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    movss %xmm0, (%esp)
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
+; CHECK-I686-NEXT:    ja .LBB26_16
+; CHECK-I686-NEXT:  # %bb.17:
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    jmp .LBB26_18
+; CHECK-I686-NEXT:  .LBB26_16:
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:  .LBB26_18:
+; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-I686-NEXT:    calll __truncsfhf2
+; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    movss %xmm0, (%esp)
+; CHECK-I686-NEXT:    calll __truncsfhf2
+; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    psrld $16, %xmm0
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    psrld $16, %xmm0
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
+; CHECK-I686-NEXT:    movw %si, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %edi
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-I686-NEXT:    pextrw $0, %xmm0, %ebx
+; CHECK-I686-NEXT:    movw %bx, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movw %di, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; CHECK-I686-NEXT:    movw %bx, (%esp)
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    movw %si, (%esp)
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
+; CHECK-I686-NEXT:    ja .LBB26_19
+; CHECK-I686-NEXT:  # %bb.20:
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    jmp .LBB26_21
+; CHECK-I686-NEXT:  .LBB26_19:
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:  .LBB26_21:
+; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-I686-NEXT:    calll __extendhfsf2
+; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    movss %xmm0, (%esp)
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
+; CHECK-I686-NEXT:    ja .LBB26_22
+; CHECK-I686-NEXT:  # %bb.23:
+; CHECK-I686-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    jmp .LBB26_24
+; CHECK-I686-NEXT:  .LBB26_22:
+; CHECK-I686-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:  .LBB26_24:
+; CHECK-I686-NEXT:    movd %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-I686-NEXT:    calll __truncsfhf2
+; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 4-byte Reload
+; CHECK-I686-NEXT:    # xmm1 = mem[0],zero,zero,zero
+; CHECK-I686-NEXT:    movss %xmm1, (%esp)
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-I686-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    punpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-I686-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; CHECK-I686-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-I686-NEXT:    punpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-I686-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-I686-NEXT:    calll __truncsfhf2
+; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-I686-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; CHECK-I686-NEXT:    punpckldq {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; CHECK-I686-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; CHECK-I686-NEXT:    punpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; CHECK-I686-NEXT:    # xmm1 = xmm1[0],mem[0]
+; CHECK-I686-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-I686-NEXT:    addl $336, %esp # imm = 0x150
+; CHECK-I686-NEXT:    popl %esi
+; CHECK-I686-NEXT:    popl %edi
+; CHECK-I686-NEXT:    popl %ebx
+; CHECK-I686-NEXT:    retl
+  %3 = call fast <8 x half> @llvm.maxnum.v8f16(<8 x half> %0, <8 x half> %1)
+  ret <8 x half> %3
+}
+
 attributes #0 = { nounwind }


        


More information about the llvm-commits mailing list