[PATCH] D13417: [MachineCombiner] make slack optional in critical path cost calculation (PR25016)

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 23 09:04:19 PDT 2015


spatel added a comment.

And here's the MTM debug output showing why reassociation is happening: in the original sequence, the depth of the last fadd is '6' as expected from 2 dependent ops, but the height of this sequence increases the critical path to '10'. Machine combiner currently uses that info to trigger a reassociation, but this increases the depth of the fadds to '9'.

  Depths for BB#0:
      0 Instructions
  0	%vreg4<def> = COPY %RDI; GR64:%vreg4
  0	%vreg3<def> = COPY %XMM3; FR64:%vreg3
  0	%vreg2<def> = COPY %XMM2; FR64:%vreg2
  0	%vreg1<def> = COPY %XMM1; FR64:%vreg1
  0	%vreg0<def> = COPY %XMM0; FR64:%vreg0
  0	%vreg5<def> = MOV32rm %vreg4, 1, %noreg, 0, %noreg; mem:LD4[%b01] GR32:%vreg5 GR64:%vreg4
  0	%vreg6<def> = MOV32rm %vreg4, 1, %noreg, 24, %noreg; mem:LD4[%b3] GR32:%vreg6 GR64:%vreg4
  0	%vreg7<def,tied1> = ADD32rm %vreg5<tied0>, %vreg4, 1, %noreg, 8, %noreg, %EFLAGS<imp-def,dead>; mem:LD4[%b1] GR32:%vreg7,%vreg5 GR64:%vreg4
  0	%vreg8<def,tied1> = ADD32rm %vreg6<tied0>, %vreg4, 1, %noreg, 16, %noreg, %EFLAGS<imp-def,dead>; mem:LD4[%b2] GR32:%vreg8,%vreg6 GR64:%vreg4
  5	%vreg9<def,tied1> = ADD32rr %vreg7<tied0>, %vreg8<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg9,%vreg7,%vreg8
  6	%vreg10<def,tied1> = ADD32rr %vreg9<tied0>, %vreg6, %EFLAGS<imp-def,dead>; GR32:%vreg10,%vreg9,%vreg6
  7	%vreg11<def,tied1> = ADD32rr %vreg9<tied0>, %vreg10<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg11,%vreg9,%vreg10
  8	%vreg12<def,tied1> = ADD32rr %vreg11<tied0>, %vreg11, %EFLAGS<imp-def,dead>; GR32:%vreg12,%vreg11,%vreg11
  9	%vreg13<def,tied1> = ADD32rr %vreg11<tied0>, %vreg12<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg13,%vreg11,%vreg12
  10	MOV32mr %vreg4, 1, %noreg, 24, %noreg, %vreg13<kill>; mem:ST4[%b3] GR64:%vreg4 GR32:%vreg13
  0	%vreg14<def> = VADDSDrr %vreg0, %vreg1; FR64:%vreg14,%vreg0,%vreg1
  0	%vreg15<def> = VADDSDrr %vreg2, %vreg3; FR64:%vreg15,%vreg2,%vreg3
  3	%vreg16<def> = VADDSDrr %vreg14<kill>, %vreg15<kill>; FR64:%vreg16,%vreg14,%vreg15
  6	%XMM0<def> = COPY %vreg16; FR64:%vreg16
  6	RETQ %XMM0
  Heights for BB#0:
     14 Instructions
     3c @ SBPort1 (3 ops x12)
     1c @ SBPort4 (1 ops x12)
     1c @ SBPort5 (1 ops x12)
     1c @ SBPort05 (1 ops x6)
     2c @ SBPort15 (4 ops x6)
     3c @ SBPort23 (6 ops x6)
     4c @ SBPort015 (11 ops x4)
     3c @ SBPortAny (18 ops x2)
  6	0	RETQ %XMM0
  6	0	%XMM0<def> = COPY %vreg16; FR64:%vreg16
  6	3	%vreg16<def> = VADDSDrr %vreg14<kill>, %vreg15<kill>; FR64:%vreg16,%vreg14,%vreg15
  6	6	%vreg15<def> = VADDSDrr %vreg2, %vreg3; FR64:%vreg15,%vreg2,%vreg3
  6	6	%vreg14<def> = VADDSDrr %vreg0, %vreg1; FR64:%vreg14,%vreg0,%vreg1
  10	0	MOV32mr %vreg4, 1, %noreg, 24, %noreg, %vreg13<kill>; mem:ST4[%b3] GR64:%vreg4 GR32:%vreg13
  10	1	%vreg13<def,tied1> = ADD32rr %vreg11<tied0>, %vreg12<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg13,%vreg11,%vreg12
  10	2	%vreg12<def,tied1> = ADD32rr %vreg11<tied0>, %vreg11, %EFLAGS<imp-def,dead>; GR32:%vreg12,%vreg11,%vreg11
  10	3	%vreg11<def,tied1> = ADD32rr %vreg9<tied0>, %vreg10<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg11,%vreg9,%vreg10
  10	4	%vreg10<def,tied1> = ADD32rr %vreg9<tied0>, %vreg6, %EFLAGS<imp-def,dead>; GR32:%vreg10,%vreg9,%vreg6
  10	5	%vreg9<def,tied1> = ADD32rr %vreg7<tied0>, %vreg8<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg9,%vreg7,%vreg8
  10	10	%vreg8<def,tied1> = ADD32rm %vreg6<tied0>, %vreg4, 1, %noreg, 16, %noreg, %EFLAGS<imp-def,dead>; mem:LD4[%b2] GR32:%vreg8,%vreg6 GR64:%vreg4
  10	10	%vreg7<def,tied1> = ADD32rm %vreg5<tied0>, %vreg4, 1, %noreg, 8, %noreg, %EFLAGS<imp-def,dead>; mem:LD4[%b1] GR32:%vreg7,%vreg5 GR64:%vreg4
  10	10	%vreg6<def> = MOV32rm %vreg4, 1, %noreg, 24, %noreg; mem:LD4[%b3] GR32:%vreg6 GR64:%vreg4
  10	10	%vreg5<def> = MOV32rm %vreg4, 1, %noreg, 0, %noreg; mem:LD4[%b01] GR32:%vreg5 GR64:%vreg4
  10	6	%vreg0<def> = COPY %XMM0; FR64:%vreg0
  10	6	%vreg1<def> = COPY %XMM1; FR64:%vreg1
  10	6	%vreg2<def> = COPY %XMM2; FR64:%vreg2
  10	6	%vreg3<def> = COPY %XMM3; FR64:%vreg3
  10	10	%vreg4<def> = COPY %RDI; GR64:%vreg4
  BB#0 Live-ins: XMM0 at 6 XMM1 at 6 XMM2 at 6 XMM3 at 6 DIL at 10
  Critical path: 10
  NEW INSTR   %vreg17<def> = UNKNOWN %vreg1, %vreg15<kill>
  
  NEW INSTR   %vreg16<def> = UNKNOWN %vreg0, %vreg17<kill>
  
  DEPENDENCE DATA FOR 0x10884d880
   NewRootDepth: 6 NewRootLatency: 3
   RootDepth: 3 RootLatency: 3 RootSlack: 4
   NewRootDepth + NewRootLatency = 9
   RootDepth + RootLatency + RootSlack = 10

Depths after reassociation:

  Depths for BB#0:
      0 Instructions
  0	%vreg4<def> = COPY %RDI; GR64:%vreg4
  0	%vreg3<def> = COPY %XMM3; FR64:%vreg3
  0	%vreg2<def> = COPY %XMM2; FR64:%vreg2
  0	%vreg1<def> = COPY %XMM1; FR64:%vreg1
  0	%vreg0<def> = COPY %XMM0; FR64:%vreg0
  0	%vreg5<def> = MOV32rm %vreg4, 1, %noreg, 0, %noreg; mem:LD4[%b01] GR32:%vreg5 GR64:%vreg4
  0	%vreg6<def> = MOV32rm %vreg4, 1, %noreg, 24, %noreg; mem:LD4[%b3] GR32:%vreg6 GR64:%vreg4
  0	%vreg7<def,tied1> = ADD32rm %vreg5<tied0>, %vreg4, 1, %noreg, 8, %noreg, %EFLAGS<imp-def,dead>; mem:LD4[%b1] GR32:%vreg7,%vreg5 GR64:%vreg4
  0	%vreg8<def,tied1> = ADD32rm %vreg6<tied0>, %vreg4, 1, %noreg, 16, %noreg, %EFLAGS<imp-def,dead>; mem:LD4[%b2] GR32:%vreg8,%vreg6 GR64:%vreg4
  5	%vreg9<def,tied1> = ADD32rr %vreg7<tied0>, %vreg8<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg9,%vreg7,%vreg8
  6	%vreg10<def,tied1> = ADD32rr %vreg9<tied0>, %vreg6, %EFLAGS<imp-def,dead>; GR32:%vreg10,%vreg9,%vreg6
  7	%vreg11<def,tied1> = ADD32rr %vreg9<tied0>, %vreg10<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg11,%vreg9,%vreg10
  8	%vreg12<def,tied1> = ADD32rr %vreg11<tied0>, %vreg11, %EFLAGS<imp-def,dead>; GR32:%vreg12,%vreg11,%vreg11
  9	%vreg13<def,tied1> = ADD32rr %vreg11<tied0>, %vreg12<kill>, %EFLAGS<imp-def,dead>; GR32:%vreg13,%vreg11,%vreg12
  10	MOV32mr %vreg4, 1, %noreg, 24, %noreg, %vreg13<kill>; mem:ST4[%b3] GR64:%vreg4 GR32:%vreg13
  0	%vreg15<def> = VADDSDrr %vreg2, %vreg3; FR64:%vreg15,%vreg2,%vreg3
  3	%vreg17<def> = VADDSDrr %vreg1, %vreg15<kill>; FR64:%vreg17,%vreg1,%vreg15
  6	%vreg16<def> = VADDSDrr %vreg0, %vreg17<kill>; FR64:%vreg16,%vreg0,%vreg17
  9	%XMM0<def> = COPY %vreg16; FR64:%vreg16
  9	RETQ %XMM0


http://reviews.llvm.org/D13417





More information about the llvm-commits mailing list