[llvm] [SelectionDAG] Lower llvm.ldexp.f32 to ldexp() on Windows. (PR #95301)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 12 13:26:55 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Eli Friedman (efriedma-quic)
<details>
<summary>Changes</summary>
This reduces codesize. As discussed in #<!-- -->92707.
---
Patch is 34.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95301.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (+21)
- (modified) llvm/test/CodeGen/X86/ldexp.ll (+275-594)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 8cd2bb60d81f2..1d9f2fe65e6fb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3650,6 +3650,27 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// FIXME: Use separate LibCall action.
if (TLI.getLibcallName(LC))
break;
+ if (Node->getOpcode() == ISD::FLDEXP && VT == MVT::f32 &&
+ TLI.isTypeLegal(MVT::f64) &&
+ TLI.getLibcallName(RTLIB::getLDEXP(MVT::f64))) {
+ // On Windows, it's common to be missing the 32-bit libcall, but have
+ // the 64-bit libcall. Expand to the 64-bit libcall. (Note that ldexp
+ // involves a rounding step if the result is subnormal, but that isn't
+ // relevant here because any subnormal result will round to zero when
+ // it's truncated.)
+ //
+ // FIXME: Consider doing something similar for f16/bf16. But be very
+ // careful handling bf16: expanding bf16->f64 is fine, but expanding
+ // bf16->f32 would produce incorrect subnormal results.
+ SDValue Extended =
+ DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Node->getOperand(0));
+ SDValue LdExp =
+ DAG.getNode(ISD::FLDEXP, dl, MVT::f64, Extended, Node->getOperand(1));
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, VT, LdExp,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
+ break;
+ }
if (SDValue Expanded = expandLdexp(Node)) {
Results.push_back(Expanded);
diff --git a/llvm/test/CodeGen/X86/ldexp.ll b/llvm/test/CodeGen/X86/ldexp.ll
index 2be5dec156690..d3b02dc8f9b7c 100644
--- a/llvm/test/CodeGen/X86/ldexp.ll
+++ b/llvm/test/CodeGen/X86/ldexp.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=x86_64-unknown-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=X64 %s
+; RUN: llc -mtriple=x86_64-pc-win32 -verify-machineinstrs < %s | FileCheck -check-prefixes=WIN64 %s
; RUN: llc -mtriple=i386-pc-win32 -verify-machineinstrs < %s | FileCheck -check-prefix=WIN32 %s
define float @ldexp_f32(i8 zeroext %x) {
@@ -8,75 +9,30 @@ define float @ldexp_f32(i8 zeroext %x) {
; X64-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-NEXT: jmp ldexpf at PLT # TAILCALL
;
+; WIN64-LABEL: ldexp_f32:
+; WIN64: # %bb.0:
+; WIN64-NEXT: subq $40, %rsp
+; WIN64-NEXT: .seh_stackalloc 40
+; WIN64-NEXT: .seh_endprologue
+; WIN64-NEXT: movzbl %cl, %edx
+; WIN64-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; WIN64-NEXT: callq ldexp
+; WIN64-NEXT: cvtsd2ss %xmm0, %xmm0
+; WIN64-NEXT: addq $40, %rsp
+; WIN64-NEXT: retq
+; WIN64-NEXT: .seh_endproc
+;
; WIN32-LABEL: ldexp_f32:
; WIN32: # %bb.0:
-; WIN32-NEXT: pushl %eax
-; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; WIN32-NEXT: cmpl $381, %ecx # imm = 0x17D
-; WIN32-NEXT: movl %ecx, %eax
-; WIN32-NEXT: jl LBB0_2
-; WIN32-NEXT: # %bb.1:
-; WIN32-NEXT: movl $381, %eax # imm = 0x17D
-; WIN32-NEXT: LBB0_2:
-; WIN32-NEXT: addl $-254, %eax
-; WIN32-NEXT: leal -127(%ecx), %edx
-; WIN32-NEXT: cmpl $255, %ecx
-; WIN32-NEXT: jae LBB0_4
-; WIN32-NEXT: # %bb.3:
-; WIN32-NEXT: movl %edx, %eax
-; WIN32-NEXT: LBB0_4:
-; WIN32-NEXT: flds __real at 7f800000
-; WIN32-NEXT: flds __real at 7f000000
-; WIN32-NEXT: jae LBB0_6
-; WIN32-NEXT: # %bb.5:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: LBB0_6:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: cmpl $-329, %ecx # imm = 0xFEB7
-; WIN32-NEXT: movl %ecx, %edx
-; WIN32-NEXT: jge LBB0_8
-; WIN32-NEXT: # %bb.7:
-; WIN32-NEXT: movl $-330, %edx # imm = 0xFEB6
-; WIN32-NEXT: LBB0_8:
-; WIN32-NEXT: cmpl $-228, %ecx
-; WIN32-NEXT: fldz
-; WIN32-NEXT: flds __real at 0c800000
-; WIN32-NEXT: jb LBB0_9
-; WIN32-NEXT: # %bb.10:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: leal 102(%ecx), %edx
-; WIN32-NEXT: cmpl $-126, %ecx
-; WIN32-NEXT: jge LBB0_12
-; WIN32-NEXT: jmp LBB0_13
-; WIN32-NEXT: LBB0_9:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: addl $204, %edx
-; WIN32-NEXT: cmpl $-126, %ecx
-; WIN32-NEXT: jl LBB0_13
-; WIN32-NEXT: LBB0_12:
-; WIN32-NEXT: movl %ecx, %edx
-; WIN32-NEXT: LBB0_13:
+; WIN32-NEXT: subl $16, %esp
+; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: fld1
-; WIN32-NEXT: jl LBB0_15
-; WIN32-NEXT: # %bb.14:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: LBB0_15:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: cmpl $127, %ecx
-; WIN32-NEXT: jg LBB0_17
-; WIN32-NEXT: # %bb.16:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: movl %edx, %eax
-; WIN32-NEXT: fldz
-; WIN32-NEXT: LBB0_17:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: shll $23, %eax
-; WIN32-NEXT: addl $1065353216, %eax # imm = 0x3F800000
-; WIN32-NEXT: movl %eax, (%esp)
-; WIN32-NEXT: fmuls (%esp)
-; WIN32-NEXT: popl %eax
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: fstps {{[0-9]+}}(%esp)
+; WIN32-NEXT: flds {{[0-9]+}}(%esp)
+; WIN32-NEXT: addl $16, %esp
; WIN32-NEXT: retl
%zext = zext i8 %x to i32
%ldexp = call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 %zext)
@@ -89,6 +45,12 @@ define double @ldexp_f64(i8 zeroext %x) {
; X64-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; X64-NEXT: jmp ldexp at PLT # TAILCALL
;
+; WIN64-LABEL: ldexp_f64:
+; WIN64: # %bb.0:
+; WIN64-NEXT: movzbl %cl, %edx
+; WIN64-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; WIN64-NEXT: jmp ldexp # TAILCALL
+;
; WIN32-LABEL: ldexp_f64:
; WIN32: # %bb.0:
; WIN32-NEXT: subl $12, %esp
@@ -127,152 +89,60 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) {
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
+; WIN64-LABEL: ldexp_v2f32:
+; WIN64: # %bb.0:
+; WIN64-NEXT: pushq %rsi
+; WIN64-NEXT: .seh_pushreg %rsi
+; WIN64-NEXT: subq $64, %rsp
+; WIN64-NEXT: .seh_stackalloc 64
+; WIN64-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: .seh_savexmm %xmm7, 48
+; WIN64-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: .seh_savexmm %xmm6, 32
+; WIN64-NEXT: .seh_endprologue
+; WIN64-NEXT: movaps (%rcx), %xmm7
+; WIN64-NEXT: movl (%rdx), %eax
+; WIN64-NEXT: movl 4(%rdx), %esi
+; WIN64-NEXT: cvtss2sd %xmm7, %xmm0
+; WIN64-NEXT: movl %eax, %edx
+; WIN64-NEXT: callq ldexp
+; WIN64-NEXT: xorps %xmm6, %xmm6
+; WIN64-NEXT: cvtsd2ss %xmm0, %xmm6
+; WIN64-NEXT: shufps {{.*#+}} xmm7 = xmm7[1,1,1,1]
+; WIN64-NEXT: xorps %xmm0, %xmm0
+; WIN64-NEXT: cvtss2sd %xmm7, %xmm0
+; WIN64-NEXT: movl %esi, %edx
+; WIN64-NEXT: callq ldexp
+; WIN64-NEXT: cvtsd2ss %xmm0, %xmm0
+; WIN64-NEXT: unpcklps {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1]
+; WIN64-NEXT: movaps %xmm6, %xmm0
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
+; WIN64-NEXT: addq $64, %rsp
+; WIN64-NEXT: popq %rsi
+; WIN64-NEXT: retq
+; WIN64-NEXT: .seh_endproc
+;
; WIN32-LABEL: ldexp_v2f32:
; WIN32: # %bb.0:
-; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
-; WIN32-NEXT: subl $8, %esp
-; WIN32-NEXT: flds {{[0-9]+}}(%esp)
+; WIN32-NEXT: subl $20, %esp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: cmpl $-329, %eax # imm = 0xFEB7
-; WIN32-NEXT: movl %eax, %edx
-; WIN32-NEXT: jge LBB2_2
-; WIN32-NEXT: # %bb.1:
-; WIN32-NEXT: movl $-330, %edx # imm = 0xFEB6
-; WIN32-NEXT: LBB2_2:
-; WIN32-NEXT: addl $204, %edx
-; WIN32-NEXT: leal 102(%eax), %ecx
-; WIN32-NEXT: cmpl $-228, %eax
-; WIN32-NEXT: jb LBB2_4
-; WIN32-NEXT: # %bb.3:
-; WIN32-NEXT: movl %ecx, %edx
-; WIN32-NEXT: LBB2_4:
-; WIN32-NEXT: flds __real at 0c800000
-; WIN32-NEXT: fld %st(1)
-; WIN32-NEXT: fmul %st(1), %st
-; WIN32-NEXT: fld %st(0)
-; WIN32-NEXT: fmul %st(2), %st
-; WIN32-NEXT: jb LBB2_6
-; WIN32-NEXT: # %bb.5:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: LBB2_6:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: cmpl $-126, %eax
-; WIN32-NEXT: jl LBB2_8
-; WIN32-NEXT: # %bb.7:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: fld %st(1)
-; WIN32-NEXT: movl %eax, %edx
-; WIN32-NEXT: LBB2_8:
-; WIN32-NEXT: cmpl $381, %eax # imm = 0x17D
-; WIN32-NEXT: movl %eax, %esi
-; WIN32-NEXT: jl LBB2_10
-; WIN32-NEXT: # %bb.9:
-; WIN32-NEXT: movl $381, %esi # imm = 0x17D
-; WIN32-NEXT: LBB2_10:
-; WIN32-NEXT: flds __real at 7f000000
-; WIN32-NEXT: fmul %st, %st(3)
-; WIN32-NEXT: fld %st(3)
-; WIN32-NEXT: fmul %st(1), %st
-; WIN32-NEXT: leal -127(%eax), %ecx
-; WIN32-NEXT: cmpl $255, %eax
-; WIN32-NEXT: jae LBB2_11
-; WIN32-NEXT: # %bb.12:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: jmp LBB2_13
-; WIN32-NEXT: LBB2_11:
-; WIN32-NEXT: fstp %st(4)
-; WIN32-NEXT: addl $-254, %esi
-; WIN32-NEXT: movl %esi, %ecx
-; WIN32-NEXT: LBB2_13:
-; WIN32-NEXT: cmpl $127, %eax
+; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: jg LBB2_15
-; WIN32-NEXT: # %bb.14:
-; WIN32-NEXT: movl %edx, %ecx
-; WIN32-NEXT: LBB2_15:
-; WIN32-NEXT: cmpl $381, %esi # imm = 0x17D
-; WIN32-NEXT: movl %esi, %edx
-; WIN32-NEXT: jl LBB2_17
-; WIN32-NEXT: # %bb.16:
-; WIN32-NEXT: movl $381, %edx # imm = 0x17D
-; WIN32-NEXT: LBB2_17:
-; WIN32-NEXT: addl $-254, %edx
-; WIN32-NEXT: leal -127(%esi), %edi
-; WIN32-NEXT: cmpl $255, %esi
-; WIN32-NEXT: jae LBB2_19
-; WIN32-NEXT: # %bb.18:
-; WIN32-NEXT: movl %edi, %edx
-; WIN32-NEXT: LBB2_19:
-; WIN32-NEXT: fld %st(0)
-; WIN32-NEXT: fmul %st(2), %st
-; WIN32-NEXT: fmul %st, %st(2)
-; WIN32-NEXT: jae LBB2_21
-; WIN32-NEXT: # %bb.20:
-; WIN32-NEXT: fstp %st(2)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: LBB2_21:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: cmpl $-329, %esi # imm = 0xFEB7
-; WIN32-NEXT: movl %esi, %edi
-; WIN32-NEXT: jge LBB2_23
-; WIN32-NEXT: # %bb.22:
-; WIN32-NEXT: movl $-330, %edi # imm = 0xFEB6
-; WIN32-NEXT: LBB2_23:
-; WIN32-NEXT: fld %st(0)
-; WIN32-NEXT: fmul %st(4), %st
-; WIN32-NEXT: fmul %st, %st(4)
-; WIN32-NEXT: cmpl $-228, %esi
-; WIN32-NEXT: jb LBB2_24
-; WIN32-NEXT: # %bb.25:
-; WIN32-NEXT: fstp %st(4)
-; WIN32-NEXT: leal 102(%esi), %edi
-; WIN32-NEXT: cmpl $-126, %esi
-; WIN32-NEXT: jge LBB2_27
-; WIN32-NEXT: jmp LBB2_28
-; WIN32-NEXT: LBB2_24:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: addl $204, %edi
-; WIN32-NEXT: cmpl $-126, %esi
-; WIN32-NEXT: jl LBB2_28
-; WIN32-NEXT: LBB2_27:
-; WIN32-NEXT: fstp %st(3)
-; WIN32-NEXT: movl %esi, %edi
-; WIN32-NEXT: fldz
-; WIN32-NEXT: LBB2_28:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: cmpl $127, %esi
-; WIN32-NEXT: jg LBB2_30
-; WIN32-NEXT: # %bb.29:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: movl %edi, %edx
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(2)
-; WIN32-NEXT: LBB2_30:
-; WIN32-NEXT: fstp %st(2)
-; WIN32-NEXT: cmpl $127, %eax
-; WIN32-NEXT: jg LBB2_32
-; WIN32-NEXT: # %bb.31:
-; WIN32-NEXT: fstp %st(2)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: LBB2_32:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: shll $23, %ecx
-; WIN32-NEXT: addl $1065353216, %ecx # imm = 0x3F800000
-; WIN32-NEXT: movl %ecx, (%esp)
-; WIN32-NEXT: shll $23, %edx
-; WIN32-NEXT: addl $1065353216, %edx # imm = 0x3F800000
-; WIN32-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: fmuls (%esp)
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: fmuls {{[0-9]+}}(%esp)
-; WIN32-NEXT: addl $8, %esp
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN32-NEXT: flds {{[0-9]+}}(%esp)
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: fstps {{[0-9]+}}(%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: fstps {{[0-9]+}}(%esp)
+; WIN32-NEXT: flds {{[0-9]+}}(%esp)
+; WIN32-NEXT: flds {{[0-9]+}}(%esp)
+; WIN32-NEXT: addl $20, %esp
; WIN32-NEXT: popl %esi
-; WIN32-NEXT: popl %edi
; WIN32-NEXT: retl
%1 = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %val, <2 x i32> %exp)
ret <2 x float> %1
@@ -319,335 +189,106 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) {
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
+; WIN64-LABEL: ldexp_v4f32:
+; WIN64: # %bb.0:
+; WIN64-NEXT: pushq %rsi
+; WIN64-NEXT: .seh_pushreg %rsi
+; WIN64-NEXT: subq $80, %rsp
+; WIN64-NEXT: .seh_stackalloc 80
+; WIN64-NEXT: movaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: .seh_savexmm %xmm8, 64
+; WIN64-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: .seh_savexmm %xmm7, 48
+; WIN64-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; WIN64-NEXT: .seh_savexmm %xmm6, 32
+; WIN64-NEXT: .seh_endprologue
+; WIN64-NEXT: movq %rdx, %rsi
+; WIN64-NEXT: movaps (%rcx), %xmm7
+; WIN64-NEXT: movl 12(%rdx), %edx
+; WIN64-NEXT: movaps %xmm7, %xmm0
+; WIN64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3],xmm7[3,3]
+; WIN64-NEXT: cvtss2sd %xmm0, %xmm0
+; WIN64-NEXT: callq ldexp
+; WIN64-NEXT: xorps %xmm6, %xmm6
+; WIN64-NEXT: cvtsd2ss %xmm0, %xmm6
+; WIN64-NEXT: movl 8(%rsi), %edx
+; WIN64-NEXT: movaps %xmm7, %xmm0
+; WIN64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm7[1]
+; WIN64-NEXT: cvtss2sd %xmm0, %xmm0
+; WIN64-NEXT: callq ldexp
+; WIN64-NEXT: xorps %xmm8, %xmm8
+; WIN64-NEXT: cvtsd2ss %xmm0, %xmm8
+; WIN64-NEXT: unpcklps {{.*#+}} xmm8 = xmm8[0],xmm6[0],xmm8[1],xmm6[1]
+; WIN64-NEXT: movl (%rsi), %edx
+; WIN64-NEXT: movl 4(%rsi), %esi
+; WIN64-NEXT: xorps %xmm0, %xmm0
+; WIN64-NEXT: cvtss2sd %xmm7, %xmm0
+; WIN64-NEXT: callq ldexp
+; WIN64-NEXT: xorps %xmm6, %xmm6
+; WIN64-NEXT: cvtsd2ss %xmm0, %xmm6
+; WIN64-NEXT: shufps {{.*#+}} xmm7 = xmm7[1,1,1,1]
+; WIN64-NEXT: xorps %xmm0, %xmm0
+; WIN64-NEXT: cvtss2sd %xmm7, %xmm0
+; WIN64-NEXT: movl %esi, %edx
+; WIN64-NEXT: callq ldexp
+; WIN64-NEXT: cvtsd2ss %xmm0, %xmm0
+; WIN64-NEXT: unpcklps {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1]
+; WIN64-NEXT: movlhps {{.*#+}} xmm6 = xmm6[0],xmm8[0]
+; WIN64-NEXT: movaps %xmm6, %xmm0
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
+; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
+; WIN64-NEXT: addq $80, %rsp
+; WIN64-NEXT: popq %rsi
+; WIN64-NEXT: retq
+; WIN64-NEXT: .seh_endproc
+;
; WIN32-LABEL: ldexp_v4f32:
; WIN32: # %bb.0:
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
-; WIN32-NEXT: subl $32, %esp
+; WIN32-NEXT: subl $44, %esp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; WIN32-NEXT: movl %ebp, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
-; WIN32-NEXT: flds __real at 7f000000
-; WIN32-NEXT: fld %st(1)
-; WIN32-NEXT: fmul %st(1), %st
-; WIN32-NEXT: fld %st(0)
-; WIN32-NEXT: fmul %st(2), %st
-; WIN32-NEXT: cmpl $255, %ecx
-; WIN32-NEXT: jae LBB3_2
-; WIN32-NEXT: # %bb.1:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: LBB3_2:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: cmpl $-329, %ecx # imm = 0xFEB7
-; WIN32-NEXT: movl %ecx, %esi
-; WIN32-NEXT: jge LBB3_4
-; WIN32-NEXT: # %bb.3:
-; WIN32-NEXT: movl $-330, %esi # imm = 0xFEB6
-; WIN32-NEXT: LBB3_4:
-; WIN32-NEXT: addl $204, %esi
-; WIN32-NEXT: leal 102(%ecx), %eax
-; WIN32-NEXT: cmpl $-228, %ecx
-; WIN32-NEXT: jb LBB3_6
-; WIN32-NEXT: # %bb.5:
-; WIN32-NEXT: movl %eax, %esi
-; WIN32-NEXT: LBB3_6:
-; WIN32-NEXT: flds __real at 0c800000
-; WIN32-NEXT: fld %st(3)
-; WIN32-NEXT: fmul %st(1), %st
-; WIN32-NEXT: fld %st(0)
-; WIN32-NEXT: fmul %st(2), %st
-; WIN32-NEXT: jb LBB3_8
-; WIN32-NEXT: # %bb.7:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: LBB3_8:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: cmpl $-126, %ecx
-; WIN32-NEXT: jl LBB3_10
-; WIN32-NEXT: # %bb.9:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(4)
-; WIN32-NEXT: LBB3_10:
-; WIN32-NEXT: fstp %st(4)
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; WIN32-NEXT: movl %ecx, %edx
-; WIN32-NEXT: subl $127, %edx
-; WIN32-NEXT: jg LBB3_12
-; WIN32-NEXT: # %bb.11:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(3)
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: LBB3_12:
-; WIN32-NEXT: fstp %st(3)
-; WIN32-NEXT: fld %st(3)
-; WIN32-NEXT: fmul %st(2), %st
-; WIN32-NEXT: fld %st(0)
-; WIN32-NEXT: fmul %st(3), %st
-; WIN32-NEXT: cmpl $255, %edi
-; WIN32-NEXT: jae LBB3_14
-; WIN32-NEXT: # %bb.13:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: LBB3_14:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: fxch %st(1)
-; WIN32-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; WIN32-NEXT: cmpl $-329, %edi # imm = 0xFEB7
-; WIN32-NEXT: movl %edi, %eax
-; WIN32-NEXT: jge LBB3_16
-; WIN32-NEXT: # %bb.15:
-; WIN32-NEXT: movl $-330, %eax # imm = 0xFEB6
-; WIN32-NEXT: LBB3_16:
-; WIN32-NEXT: fld %st(3)
-; WIN32-NEXT: fmul %st(3), %st
-; WIN32-NEXT: fld %st(0)
-; WIN32-NEXT: fmul %st(4), %st
-; WIN32-NEXT: cmpl $-228, %edi
-; WIN32-NEXT: jb LBB3_17
-; WIN32-NEXT: # %bb.18:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: leal 102(%edi), %eax
-; WIN32-NEXT: cmpl $-126, %edi
-; WIN32-NEXT: jge LBB3_20
-; WIN32-NEXT: jmp LBB3_21
-; WIN32-NEXT: LBB3_17:
-; WIN32-NEXT: fstp %st(1)
-; WIN32-NEXT: addl $204, %eax
-; WIN32-NEXT: cmpl $-126, %edi
-; WIN32-NEXT: jl LBB3_21
-; WIN32-NEXT: LBB3_20:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: movl %edi, %eax
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(4)
-; WIN32-NEXT: LBB3_21:
-; WIN32-NEXT: fstp %st(4)
-; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; WIN32-NEXT: movl %edi, %ebx
-; WIN32-NEXT: subl $127, %ebx
-; WIN32-NEXT: jg LBB3_23
-; WIN32-NEXT: # %bb.22:
-; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: fldz
-; WIN32-NEXT: fxch %st(3)
-; WIN32-NEXT: LBB3_23:
-; WIN32-NEXT: fstp %st(3)
-; WIN32-NEXT: cmpl $381, %edi # imm = 0x17D
-; WIN32-NEXT: movl %edi, %eax
-; WIN32-NEXT: jge LBB3_24
-; WIN32-NEXT: # %bb.25:
-; WIN32-NEXT: cmpl $255, %edi
-; WIN32-NEXT: jae LBB3_26
-; WIN32-NEXT: LBB3_27:
-; WIN32-NEXT: cmpl $-126, %ecx
-; WIN32-NEXT: jl LBB3_29
-; WIN32-NEXT: LBB3_28:
-; WIN32-NEXT: movl %ecx, %esi
-; WIN32-NEXT: LBB3_29:
-; WIN32-NEXT: cmpl $381, %ecx # imm = 0x17D
-; WIN32-NEXT: movl %ecx, %eax
-; WIN32-NEXT: jl LBB3_31
-; WIN32-NEXT: # %bb.30:
-; WIN32-NEXT: movl $381, %eax # imm = 0x17D
-; WIN32-NEXT: LBB3_31:
-; WIN32-NEXT: cmpl $255, %ecx
+; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: calll _ldexp
+; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; WIN32-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; WIN32-NEXT: jb LBB3_33
-; WIN32-NEXT: # %bb.32:
-; WIN32-NEXT: addl $-254, %eax
-; WIN32-NEXT: movl %eax, %edx
-; WIN32-NEXT: LBB3_33:
-; WIN32-NEXT: fxc...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/95301
More information about the llvm-commits
mailing list