[llvm] r319175 - [X86][X87] Tag FABS/FCHS/FSQRT/FSIN/FCOS x87 instruction scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 28 07:03:42 PST 2017
Author: rksimon
Date: Tue Nov 28 07:03:42 2017
New Revision: 319175
URL: http://llvm.org/viewvc/llvm-project?rev=319175&view=rev
Log:
[X86][X87] Tag FABS/FCHS/FSQRT/FSIN/FCOS x87 instruction scheduler classes
Atom's FABS/FCHS/FSQRT latencies taken from Agner.
Note: I just added FSIN and FCOS to the existing IIC_FSINCOS itinerary, which is actually a more costly instruction.
Modified:
llvm/trunk/lib/Target/X86/X86InstrFPStack.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/test/CodeGen/X86/x87-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrFPStack.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFPStack.td?rev=319175&r1=319174&r2=319175&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFPStack.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFPStack.td Tue Nov 28 07:03:42 2017
@@ -118,10 +118,12 @@ let usesCustomInserter = 1 in { // Expa
// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
// f80 instructions cannot use SSE and use neither of these.
-class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
- FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32]>;
-class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
- FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
+class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern,
+ InstrItinClass itin = NoItinerary> :
+ FpI_<outs, ins, fp, pattern, itin>, Requires<[FPStackf32]>;
+class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern,
+ InstrItinClass itin = NoItinerary> :
+ FpI_<outs, ins, fp, pattern, itin>, Requires<[FPStackf64]>;
// Factoring for arithmetic.
multiclass FPBinary_rr<SDNode OpNode> {
@@ -293,30 +295,38 @@ def COM_FST0r : FPST0rInst <MRM2r, "fc
def COMP_FST0r : FPST0rInst <MRM3r, "fcomp\t$op">;
// Unary operations.
-multiclass FPUnary<SDNode OpNode, Format fp, string asmstring> {
+multiclass FPUnary<SDNode OpNode, Format fp, string asmstring,
+ InstrItinClass itin> {
def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
- [(set RFP32:$dst, (OpNode RFP32:$src))]>;
+ [(set RFP32:$dst, (OpNode RFP32:$src))], itin>;
def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
- [(set RFP64:$dst, (OpNode RFP64:$src))]>;
+ [(set RFP64:$dst, (OpNode RFP64:$src))], itin>;
def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
- [(set RFP80:$dst, (OpNode RFP80:$src))]>;
-def _F : FPI<0xD9, fp, (outs), (ins), asmstring>;
+ [(set RFP80:$dst, (OpNode RFP80:$src))], itin>;
+def _F : FPI<0xD9, fp, (outs), (ins), asmstring, itin>;
}
let Defs = [FPSW] in {
-defm CHS : FPUnary<fneg, MRM_E0, "fchs">;
-defm ABS : FPUnary<fabs, MRM_E1, "fabs">;
-let SchedRW = [WriteFSqrt] in {
-defm SQRT: FPUnary<fsqrt,MRM_FA, "fsqrt">;
+
+let SchedRW = [WriteVecLogic] in {
+defm CHS : FPUnary<fneg, MRM_E0, "fchs", IIC_FSIGN>;
+defm ABS : FPUnary<fabs, MRM_E1, "fabs", IIC_FSIGN>;
+}
+
+let SchedRW = [WriteFSqrt] in
+defm SQRT: FPUnary<fsqrt,MRM_FA, "fsqrt", IIC_FSQRT>;
+
+let SchedRW = [WriteMicrocoded] in {
+defm SIN : FPUnary<fsin, MRM_FE, "fsin", IIC_FSINCOS>;
+defm COS : FPUnary<fcos, MRM_FF, "fcos", IIC_FSINCOS>;
}
-defm SIN : FPUnary<fsin, MRM_FE, "fsin">;
-defm COS : FPUnary<fcos, MRM_FF, "fcos">;
let hasSideEffects = 0 in {
def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
-}
+} // hasSideEffects
+
def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
} // Defs = [FPSW]
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=319175&r1=319174&r2=319175&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Tue Nov 28 07:03:42 2017
@@ -477,6 +477,8 @@ def IIC_FXTRACT : InstrItinClass;
def IIC_FPREM1 : InstrItinClass;
def IIC_FPSTP : InstrItinClass;
def IIC_FPREM : InstrItinClass;
+def IIC_FSIGN : InstrItinClass;
+def IIC_FSQRT : InstrItinClass;
def IIC_FYL2XP1 : InstrItinClass;
def IIC_FSINCOS : InstrItinClass;
def IIC_FRNDINT : InstrItinClass;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=319175&r1=319174&r2=319175&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Tue Nov 28 07:03:42 2017
@@ -394,6 +394,8 @@ def AtomItineraries : ProcessorItinerari
InstrItinData<IIC_FXSAVE, [InstrStage<140, [Port0, Port1]>] >,
InstrItinData<IIC_FXRSTOR, [InstrStage<141, [Port0, Port1]>] >,
InstrItinData<IIC_FXCH, [InstrStage<1, [Port0], 0>, InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_FSIGN, [InstrStage<1, [Port1]>] >,
+ InstrItinData<IIC_FSQRT, [InstrStage<71, [Port0, Port1]>] >,
// System instructions
InstrItinData<IIC_CPUID, [InstrStage<121, [Port0, Port1]>] >,
Modified: llvm/trunk/test/CodeGen/X86/x87-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x87-schedule.ll?rev=319175&r1=319174&r2=319175&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x87-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x87-schedule.ll Tue Nov 28 07:03:42 2017
@@ -96,21 +96,21 @@ define void @test_fabs() optsize {
; ATOM-LABEL: test_fabs:
; ATOM: # BB#0:
; ATOM-NEXT: #APP
-; ATOM-NEXT: fabs
+; ATOM-NEXT: fabs # sched: [1:1.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
; SLM-LABEL: test_fabs:
; SLM: # BB#0:
; SLM-NEXT: #APP
-; SLM-NEXT: fabs
+; SLM-NEXT: fabs # sched: [1:0.50]
; SLM-NEXT: #NO_APP
; SLM-NEXT: retl # sched: [4:1.00]
;
; SANDY-LABEL: test_fabs:
; SANDY: # BB#0:
; SANDY-NEXT: #APP
-; SANDY-NEXT: fabs
+; SANDY-NEXT: fabs # sched: [1:1.00]
; SANDY-NEXT: #NO_APP
; SANDY-NEXT: retl # sched: [5:1.00]
;
@@ -124,28 +124,28 @@ define void @test_fabs() optsize {
; BROADWELL-LABEL: test_fabs:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fabs
+; BROADWELL-NEXT: fabs # sched: [1:0.33]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retl # sched: [6:0.50]
;
; SKYLAKE-LABEL: test_fabs:
; SKYLAKE: # BB#0:
; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fabs
+; SKYLAKE-NEXT: fabs # sched: [1:0.33]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retl # sched: [6:0.50]
;
; SKX-LABEL: test_fabs:
; SKX: # BB#0:
; SKX-NEXT: #APP
-; SKX-NEXT: fabs
+; SKX-NEXT: fabs # sched: [1:0.33]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BTVER2-LABEL: test_fabs:
; BTVER2: # BB#0:
; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fabs
+; BTVER2-NEXT: fabs # sched: [1:0.50]
; BTVER2-NEXT: #NO_APP
; BTVER2-NEXT: retl # sched: [4:1.00]
;
@@ -421,21 +421,21 @@ define void @test_fchs() optsize {
; ATOM-LABEL: test_fchs:
; ATOM: # BB#0:
; ATOM-NEXT: #APP
-; ATOM-NEXT: fchs
+; ATOM-NEXT: fchs # sched: [1:1.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
; SLM-LABEL: test_fchs:
; SLM: # BB#0:
; SLM-NEXT: #APP
-; SLM-NEXT: fchs
+; SLM-NEXT: fchs # sched: [1:0.50]
; SLM-NEXT: #NO_APP
; SLM-NEXT: retl # sched: [4:1.00]
;
; SANDY-LABEL: test_fchs:
; SANDY: # BB#0:
; SANDY-NEXT: #APP
-; SANDY-NEXT: fchs
+; SANDY-NEXT: fchs # sched: [1:1.00]
; SANDY-NEXT: #NO_APP
; SANDY-NEXT: retl # sched: [5:1.00]
;
@@ -449,28 +449,28 @@ define void @test_fchs() optsize {
; BROADWELL-LABEL: test_fchs:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fchs
+; BROADWELL-NEXT: fchs # sched: [1:0.33]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retl # sched: [6:0.50]
;
; SKYLAKE-LABEL: test_fchs:
; SKYLAKE: # BB#0:
; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fchs
+; SKYLAKE-NEXT: fchs # sched: [1:0.33]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retl # sched: [6:0.50]
;
; SKX-LABEL: test_fchs:
; SKX: # BB#0:
; SKX-NEXT: #APP
-; SKX-NEXT: fchs
+; SKX-NEXT: fchs # sched: [1:0.33]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BTVER2-LABEL: test_fchs:
; BTVER2: # BB#0:
; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fchs
+; BTVER2-NEXT: fchs # sched: [1:0.50]
; BTVER2-NEXT: #NO_APP
; BTVER2-NEXT: retl # sched: [4:1.00]
;
@@ -1075,63 +1075,63 @@ define void @test_fcos() optsize {
; ATOM-LABEL: test_fcos:
; ATOM: # BB#0:
; ATOM-NEXT: #APP
-; ATOM-NEXT: fcos
+; ATOM-NEXT: fcos # sched: [174:87.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
; SLM-LABEL: test_fcos:
; SLM: # BB#0:
; SLM-NEXT: #APP
-; SLM-NEXT: fcos
+; SLM-NEXT: fcos # sched: [100:1.00]
; SLM-NEXT: #NO_APP
; SLM-NEXT: retl # sched: [4:1.00]
;
; SANDY-LABEL: test_fcos:
; SANDY: # BB#0:
; SANDY-NEXT: #APP
-; SANDY-NEXT: fcos
+; SANDY-NEXT: fcos # sched: [100:0.33]
; SANDY-NEXT: #NO_APP
; SANDY-NEXT: retl # sched: [5:1.00]
;
; HASWELL-LABEL: test_fcos:
; HASWELL: # BB#0:
; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fcos
+; HASWELL-NEXT: fcos # sched: [100:0.25]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retl # sched: [5:0.50]
;
; BROADWELL-LABEL: test_fcos:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fcos
+; BROADWELL-NEXT: fcos # sched: [100:0.25]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retl # sched: [6:0.50]
;
; SKYLAKE-LABEL: test_fcos:
; SKYLAKE: # BB#0:
; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fcos
+; SKYLAKE-NEXT: fcos # sched: [100:0.25]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retl # sched: [6:0.50]
;
; SKX-LABEL: test_fcos:
; SKX: # BB#0:
; SKX-NEXT: #APP
-; SKX-NEXT: fcos
+; SKX-NEXT: fcos # sched: [100:0.25]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BTVER2-LABEL: test_fcos:
; BTVER2: # BB#0:
; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fcos
+; BTVER2-NEXT: fcos # sched: [100:0.17]
; BTVER2-NEXT: #NO_APP
; BTVER2-NEXT: retl # sched: [4:1.00]
;
; ZNVER1-LABEL: test_fcos:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fcos
+; ZNVER1-NEXT: fcos # sched: [100:?]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retl # sched: [1:0.50]
tail call void asm sideeffect "fcos", ""() nounwind
@@ -3048,63 +3048,63 @@ define void @test_fsin() optsize {
; ATOM-LABEL: test_fsin:
; ATOM: # BB#0:
; ATOM-NEXT: #APP
-; ATOM-NEXT: fsin
+; ATOM-NEXT: fsin # sched: [174:87.00]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
; SLM-LABEL: test_fsin:
; SLM: # BB#0:
; SLM-NEXT: #APP
-; SLM-NEXT: fsin
+; SLM-NEXT: fsin # sched: [100:1.00]
; SLM-NEXT: #NO_APP
; SLM-NEXT: retl # sched: [4:1.00]
;
; SANDY-LABEL: test_fsin:
; SANDY: # BB#0:
; SANDY-NEXT: #APP
-; SANDY-NEXT: fsin
+; SANDY-NEXT: fsin # sched: [100:0.33]
; SANDY-NEXT: #NO_APP
; SANDY-NEXT: retl # sched: [5:1.00]
;
; HASWELL-LABEL: test_fsin:
; HASWELL: # BB#0:
; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fsin
+; HASWELL-NEXT: fsin # sched: [100:0.25]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retl # sched: [5:0.50]
;
; BROADWELL-LABEL: test_fsin:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fsin
+; BROADWELL-NEXT: fsin # sched: [100:0.25]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retl # sched: [6:0.50]
;
; SKYLAKE-LABEL: test_fsin:
; SKYLAKE: # BB#0:
; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fsin
+; SKYLAKE-NEXT: fsin # sched: [100:0.25]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retl # sched: [6:0.50]
;
; SKX-LABEL: test_fsin:
; SKX: # BB#0:
; SKX-NEXT: #APP
-; SKX-NEXT: fsin
+; SKX-NEXT: fsin # sched: [100:0.25]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BTVER2-LABEL: test_fsin:
; BTVER2: # BB#0:
; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fsin
+; BTVER2-NEXT: fsin # sched: [100:0.17]
; BTVER2-NEXT: #NO_APP
; BTVER2-NEXT: retl # sched: [4:1.00]
;
; ZNVER1-LABEL: test_fsin:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fsin
+; ZNVER1-NEXT: fsin # sched: [100:?]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retl # sched: [1:0.50]
tail call void asm sideeffect "fsin", ""() nounwind
@@ -3196,7 +3196,7 @@ define void @test_fsqrt() optsize {
; ATOM-LABEL: test_fsqrt:
; ATOM: # BB#0:
; ATOM-NEXT: #APP
-; ATOM-NEXT: fsqrt
+; ATOM-NEXT: fsqrt # sched: [71:35.50]
; ATOM-NEXT: #NO_APP
; ATOM-NEXT: retl # sched: [79:39.50]
;
More information about the llvm-commits
mailing list