[PATCH] Propagate DAG node ordering during legalization and instruction selection

Fri Mar 15 10:41:22 PDT 2013

As far as I can tell, this change is just reversing the order of the
__ftol2 calls.  The end result is the elimination of the fxch at the
expense of an additional spill, causing the test to fail.  I'm not too
familiar with the x87 FP stack, but this still looks correct.  Does anyone
see any actual issues with the modified output (from original post)?  If
not, I'll just adjust the testcase.

Diff of Original -> Patched

 # After Instruction Selection:
 # Machine code for function double_ui64_2: SSA
 Frame Objects:
   fi#-3: size=8, align=4, fixed, at location [SP+20]
   fi#-2: size=8, align=4, fixed, at location [SP+12]
   fi#-1: size=8, align=4, fixed, at location [SP+4]
   fi#0: size=8, align=8, at location [SP+4]
   fi#1: size=8, align=8, at location [SP+4]

 BB#0: derived from LLVM BB %0
  %vreg2<def> = LD_Fp64m <fi#-3>, 1, %noreg, 0, %noreg,
%FPSW<imp-def,dead>; mem:LD8[FixedStack-3] RFP64:%vreg2
  %vreg1<def> = LD_Fp64m <fi#-2>, 1, %noreg, 0, %noreg,
%FPSW<imp-def,dead>; mem:LD8[FixedStack-2] RFP64:%vreg1
  %vreg0<def> = LD_Fp64m <fi#-1>, 1, %noreg, 0, %noreg,
%FPSW<imp-def,dead>; mem:LD8[FixedStack-1] RFP64:%vreg0
  %vreg3<def> = DIV_Fp64 %vreg0, %vreg1, %FPSW<imp-def,dead>;
RFP64:%vreg3,%vreg0,%vreg1
  %vreg4<def> = SUB_Fp64 %vreg0, %vreg2, %FPSW<imp-def,dead>;
RFP64:%vreg4,%vreg0,%vreg2
- WIN_FTOL_64 %vreg4<kill>, %EAX<imp-def>, %EDX<imp-def>,
%EFLAGS<imp-def,dead>; RFP64:%vreg4
+ WIN_FTOL_64 %vreg3<kill>, %EAX<imp-def>, %EDX<imp-def>,
%EFLAGS<imp-def,dead>; RFP64:%vreg3
  %vreg5<def> = COPY %EAX; GR32:%vreg5
  %vreg6<def> = COPY %EDX; GR32:%vreg6
- WIN_FTOL_64 %vreg3<kill>, %EAX<imp-def>, %EDX<imp-def>,
%EFLAGS<imp-def,dead>; RFP64:%vreg3
+ WIN_FTOL_64 %vreg4<kill>, %EAX<imp-def>, %EDX<imp-def>,
%EFLAGS<imp-def,dead>; RFP64:%vreg4
  %vreg7<def> = COPY %EAX; GR32:%vreg7
  %vreg8<def> = COPY %EDX; GR32:%vreg8
- %vreg9<def,tied1> = SUB32rr %vreg7<tied0>, %vreg5, %EFLAGS<imp-def>;
GR32:%vreg9,%vreg7,%vreg5
- %vreg10<def,tied1> = SBB32rr %vreg8<tied0>, %vreg6,
%EFLAGS<imp-def,dead>, %EFLAGS<imp-use>; GR32:%vreg10,%vreg8,%vreg6
+ %vreg9<def,tied1> = SUB32rr %vreg5<tied0>, %vreg7, %EFLAGS<imp-def>;
GR32:%vreg9,%vreg5,%vreg7
+ %vreg10<def,tied1> = SBB32rr %vreg6<tied0>, %vreg8,
%EFLAGS<imp-def,dead>, %EFLAGS<imp-use>; GR32:%vreg10,%vreg6,%vreg8
  %EAX<def> = COPY %vreg9; GR32:%vreg9
  %EDX<def> = COPY %vreg10; GR32:%vreg10
  RET %EAX, %EDX

 # End machine code for function double_ui64_2.

On Fri, Mar 15, 2013 at 12:31 PM, Jakob Stoklund Olesen <stoklund at 2pi.dk>wrote:

> Hi Justin,
>
> The x87 stack is always a bit strange. It's probably easier to compare the
> outputs of -print-machineinstrs.
>
> /jakob
>
> On Mar 15, 2013, at 7:54 AM, Justin Holewinski <
> justin.holewinski at gmail.com> wrote:
>
> The attached patch modifies DAGTypeLegalizer and SelectionDAGISel to
> propagate DAG node ordering.  We currently (for better or for worse) use
> source scheduling, and we're being hit by issues in the scheduler due to
> the ordering being lost.
>
>
> One issue with this patch is that is causes one X86 test to fail: win_ftol2
>
> The only meaningful difference I can spot (other than an extra spill) is
> that the 'fxch' instruction is eliminated.  Can anyone more familiar with
> the X86 back-end comment on why DAG node ordering would affect this test?
>  I've narrowed the issue down to being caused by this patch's change
> to DAGTypeLegalizer::SetExpandedInteger. (e.g. if this method propagates
> DAG node ordering in the newly created nodes, this test starts to fail).
>
>
> Current Output:
>
> _double_ui64_2:                         # @double_ui64_2
> # BB#0:
> pushl %ebp
> movl %esp, %ebp
> pushl %esi
> andl $-8, %esp
> subl $32, %esp
> fldl 24(%ebp)
> fldl 16(%ebp)
> fldl 8(%ebp)
> fdiv %st(0), %st(1)
> fsubp %st(2)
> fxch
> calll __ftol2
> movl %eax, 4(%esp)           # 4-byte Spill
> movl %edx, (%esp)            # 4-byte Spill
> calll __ftol2
> movl 4(%esp), %ecx           # 4-byte Reload
> subl %ecx, %eax
> movl (%esp), %esi            # 4-byte Reload
> sbbl %esi, %edx
> leal -4(%ebp), %esp
> popl %esi
> popl %ebp
> ret
>
>
> Output With Patch:
>
> _double_ui64_2:                         # @double_ui64_2
> # BB#0:
> pushl %ebp
> movl %esp, %ebp
> andl $-8, %esp
> subl $32, %esp
> fldl 24(%ebp)
> fldl 16(%ebp)
> fldl 8(%ebp)
> fdiv %st(0), %st(1)
> fsubp %st(2)
> calll __ftol2
> movl %eax, 12(%esp)          # 4-byte Spill
> movl %edx, 8(%esp)           # 4-byte Spill
> calll __ftol2
> movl 12(%esp), %ecx          # 4-byte Reload
> subl %eax, %ecx
> movl 8(%esp), %eax           # 4-byte Reload
> sbbl %edx, %eax
> movl %eax, 4(%esp)           # 4-byte Spill
> movl %ecx, %eax
> movl 4(%esp), %edx           # 4-byte Reload
> movl %ebp, %esp
> popl %ebp
> ret
>
>
> --
>
> Thanks,
>
> Justin Holewinski
> <0001-Propagate-DAG-node-ordering-during-type-legalization.patch>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
>
>

-- 

Thanks,

Justin Holewinski
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130315/70df9bd9/attachment.html>