[llvm] r329837 - [X86][Atom] Convert Atom scheduler model to SchedRW (PR32431)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 11 11:23:01 PDT 2018


Author: rksimon
Date: Wed Apr 11 11:23:01 2018
New Revision: 329837

URL: http://llvm.org/viewvc/llvm-project?rev=329837&view=rev
Log:
[X86][Atom] Convert Atom scheduler model to SchedRW (PR32431)

Atom is the only x86 target that still uses schedule itineraries, if we can remove this then we can begin the work on removing x86 itineraries. I've also found that it will help with PR36550.

I've focussed on matching the existing model as closely as possible (relying on the schedule tests), PR36895 indicated a lot of these were incorrect but we can just as easily fix these after this patch as before. Hopefully we can get llvm-exegesis to help here,

There are a few instructions that rely on itinerary scheduling (mainly push/pop/return) of multiple resource stages, but I don't think any of these are show stoppers.

There are also a few codegen changes that seem related to the post-ra scheduler acting a little differently, I haven't tracked these down but they don't seem critical.

NOTE: I don't have access to any Atom hardware, so this hasn't been tested in the wild.

Differential Revision: https://reviews.llvm.org/D45486

Modified:
    llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
    llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
    llvm/trunk/test/CodeGen/X86/lsr-static-addr.ll
    llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
    llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll
    llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll
    llvm/trunk/test/CodeGen/X86/select.ll
    llvm/trunk/test/CodeGen/X86/sse-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
    llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll
    llvm/trunk/test/CodeGen/X86/x87-schedule.ll

Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=329837&r1=329836&r2=329837&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Wed Apr 11 11:23:01 2018
@@ -15,549 +15,801 @@
 //
 // Scheduling information derived from the "Intel 64 and IA32 Architectures
 // Optimization Reference Manual", Chapter 13, Section 4.
-// Functional Units
-//    Port 0
-def Port0 : FuncUnit; // ALU: ALU0, shift/rotate, load/store
-                      // SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide
-def Port1 : FuncUnit; // ALU: ALU1, bit processing, jump, and LEA
-                      // SIMD/FP: SIMD ALU, FP Adder
-
-def AtomItineraries : ProcessorItineraries<
-  [ Port0, Port1 ],
-  [], [
-  // P0 only
-  // InstrItinData<class, [InstrStage<N, [P0]>] >,
-  // P0 or P1
-  // InstrItinData<class, [InstrStage<N, [P0, P1]>] >,
-  // P0 and P1
-  // InstrItinData<class, [InstrStage<N, [P0], 0>,  InstrStage<N, [P1]>] >,
-  //
-  // Default is 1 cycle, port0 or port1
-  InstrItinData<IIC_ALU_MEM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_LEA, [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_LEA_16, [InstrStage<2, [Port0, Port1]>] >,
-  // mul
-  InstrItinData<IIC_MUL8_MEM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_MUL8_REG, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_MUL16_MEM, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_MUL16_REG, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_MUL32_MEM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_MUL32_REG, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_MUL64_MEM, [InstrStage<12, [Port0, Port1]>] >,
-  InstrItinData<IIC_MUL64_REG, [InstrStage<12, [Port0, Port1]>] >,
-  // imul by al, ax, eax, rax
-  InstrItinData<IIC_IMUL8_MEM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL8_REG, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL16_MEM, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL16_REG, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL32_MEM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL32_REG, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL64_MEM, [InstrStage<12, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL64_REG, [InstrStage<12, [Port0, Port1]>] >,
-  // imul reg by reg|mem
-  InstrItinData<IIC_IMUL16_RM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL16_RR, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL32_RM, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_IMUL32_RR, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_IMUL64_RM, [InstrStage<12, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL64_RR, [InstrStage<12, [Port0, Port1]>] >,
-  // imul reg = reg/mem * imm
-  InstrItinData<IIC_IMUL16_RRI, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL32_RRI, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_IMUL64_RRI, [InstrStage<14, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL16_RMI, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL32_RMI, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_IMUL64_RMI, [InstrStage<14, [Port0, Port1]>] >,
-  // idiv
-  InstrItinData<IIC_IDIV8_MEM, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_IDIV8_REG, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_IDIV16_MEM, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_IDIV16_REG, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_IDIV32_MEM, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_IDIV32_REG, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_IDIV64_MEM, [InstrStage<130, [Port0, Port1]>] >,
-  InstrItinData<IIC_IDIV64_REG, [InstrStage<130, [Port0, Port1]>] >,
-  // div
-  InstrItinData<IIC_DIV8_REG, [InstrStage<50, [Port0, Port1]>] >,
-  InstrItinData<IIC_DIV8_MEM, [InstrStage<68, [Port0, Port1]>] >,
-  InstrItinData<IIC_DIV16_MEM, [InstrStage<50, [Port0, Port1]>] >,
-  InstrItinData<IIC_DIV16_REG, [InstrStage<50, [Port0, Port1]>] >,
-  InstrItinData<IIC_DIV32_MEM, [InstrStage<50, [Port0, Port1]>] >,
-  InstrItinData<IIC_DIV32_REG, [InstrStage<50, [Port0, Port1]>] >,
-  InstrItinData<IIC_DIV64_MEM, [InstrStage<130, [Port0, Port1]>] >,
-  InstrItinData<IIC_DIV64_REG, [InstrStage<130, [Port0, Port1]>] >,
-  // neg/not/inc/dec
-  InstrItinData<IIC_UNARY_REG, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_UNARY_MEM, [InstrStage<1, [Port0]>] >,
-  // add/sub/and/or/xor/cmp/test
-  InstrItinData<IIC_BIN_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_BIN_MEM, [InstrStage<1, [Port0]>] >,
-  // adc/sbc
-  InstrItinData<IIC_BIN_CARRY_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_BIN_CARRY_MEM, [InstrStage<1, [Port0]>] >,
-  // shift/rotate
-  InstrItinData<IIC_SR, [InstrStage<1, [Port0]>] >,
-  // shift double
-  InstrItinData<IIC_SHD16_REG_IM, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD16_REG_CL, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD16_MEM_IM, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD16_MEM_CL, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD32_REG_IM, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD32_REG_CL, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD32_MEM_IM, [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD32_MEM_CL, [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD64_REG_IM, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD64_REG_CL, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD64_MEM_IM, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD64_MEM_CL, [InstrStage<9, [Port0, Port1]>] >,
-  // cmov
-  InstrItinData<IIC_CMOV16_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_CMOV16_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMOV32_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_CMOV32_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMOV64_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_CMOV64_RR, [InstrStage<1, [Port0, Port1]>] >,
-  // set
-  InstrItinData<IIC_SET_M, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_SET_R, [InstrStage<1, [Port0, Port1]>] >,
-  // jcc
-  InstrItinData<IIC_Jcc, [InstrStage<1, [Port1]>] >,
-  // jcxz/jecxz/jrcxz
-  InstrItinData<IIC_JCXZ, [InstrStage<4, [Port0, Port1]>] >,
-  // jmp rel
-  InstrItinData<IIC_JMP_REL, [InstrStage<1, [Port1]>] >,
-  // jmp indirect
-  InstrItinData<IIC_JMP_REG, [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_JMP_MEM, [InstrStage<2, [Port0, Port1]>] >,
-  // jmp far
-  InstrItinData<IIC_JMP_FAR_MEM, [InstrStage<32, [Port0, Port1]>] >,
-  InstrItinData<IIC_JMP_FAR_PTR, [InstrStage<31, [Port0, Port1]>] >,
-  // loop/loope/loopne
-  InstrItinData<IIC_LOOP, [InstrStage<18, [Port0, Port1]>] >,
-  InstrItinData<IIC_LOOPE, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_LOOPNE, [InstrStage<17, [Port0, Port1]>] >,
-  // call - all but reg/imm
-  InstrItinData<IIC_CALL_RI, [InstrStage<1, [Port0], 0>,
-                              InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_CALL_MEM, [InstrStage<15, [Port0, Port1]>] >,
-  InstrItinData<IIC_CALL_FAR_MEM, [InstrStage<40, [Port0, Port1]>] >,
-  InstrItinData<IIC_CALL_FAR_PTR, [InstrStage<39, [Port0, Port1]>] >,
-  //ret
-  InstrItinData<IIC_RET, [InstrStage<79, [Port0, Port1]>] >,
-  InstrItinData<IIC_RET_IMM, [InstrStage<1, [Port0], 0>,  InstrStage<1, [Port1]>] >,
-  //sign extension movs
-  InstrItinData<IIC_MOVSX,[InstrStage<1, [Port0] >] >,
-  InstrItinData<IIC_MOVSX_R16_R8, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOVSX_R16_M8, [InstrStage<3, [Port0, Port1]>] >,
-  //zero extension movs
-  InstrItinData<IIC_MOVZX,[InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<3, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_REP_MOVS, [InstrStage<75, [Port0, Port1]>] >,
-  InstrItinData<IIC_REP_STOS, [InstrStage<74, [Port0, Port1]>] >,
-
-  // SSE binary operations
-  // arithmetic fp scalar
-  InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_SSE_ALU_F32S_RM, [InstrStage<5, [Port0], 0>,
-                                   InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_SSE_ALU_F64S_RR, [InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_SSE_ALU_F64S_RM, [InstrStage<5, [Port0], 0>,
-                                   InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_SSE_MUL_F32S_RR, [InstrStage<4, [Port0]>] >,
-  InstrItinData<IIC_SSE_MUL_F32S_RM, [InstrStage<4, [Port0]>] >,
-  InstrItinData<IIC_SSE_MUL_F64S_RR, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_SSE_MUL_F64S_RM, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_SSE_DIV_F32S_RR, [InstrStage<34, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_DIV_F32S_RM, [InstrStage<34, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_DIV_F64S_RR, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_DIV_F64S_RM, [InstrStage<62, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_COMIS_RR, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_COMIS_RM, [InstrStage<10, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_HADDSUB_RR, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_HADDSUB_RM, [InstrStage<9, [Port0, Port1]>] >,
-
-  // arithmetic fp parallel
-  InstrItinData<IIC_SSE_ALU_F32P_RR, [InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_SSE_ALU_F32P_RM, [InstrStage<5, [Port0], 0>,
-                                   InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_SSE_ALU_F64P_RR, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_ALU_F64P_RM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MUL_F32P_RR, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_SSE_MUL_F32P_RM, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_SSE_MUL_F64P_RR, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MUL_F64P_RM, [InstrStage<10, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_DIV_F32P_RR, [InstrStage<70, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_DIV_F32P_RM, [InstrStage<70, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_DIV_F64P_RR, [InstrStage<125, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_DIV_F64P_RM, [InstrStage<125, [Port0, Port1]>] >,
-
-  // bitwise parallel
-  InstrItinData<IIC_SSE_BIT_P_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_BIT_P_RM, [InstrStage<1, [Port0]>] >,
-
-  // arithmetic int parallel
-  InstrItinData<IIC_SSE_INTALU_P_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_INTALU_P_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_INTALUQ_P_RR, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_INTALUQ_P_RM, [InstrStage<3, [Port0, Port1]>] >,
-
-  // multiply int parallel
-  InstrItinData<IIC_SSE_INTMUL_P_RR, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_SSE_INTMUL_P_RM, [InstrStage<5, [Port0]>] >,
-
-  // shift parallel
-  InstrItinData<IIC_SSE_INTSH_P_RR, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_INTSH_P_RM, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_INTSH_P_RI, [InstrStage<1, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_INTSHDQ_P_RI, [InstrStage<1, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_SHUFP, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_PSHUF_RI, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_PSHUF_MI, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_PACK, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_SQRTPS_RR, [InstrStage<70, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_SQRTPS_RM, [InstrStage<70, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_SQRTSS_RR, [InstrStage<34, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_SQRTSS_RM, [InstrStage<34, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_SQRTPD_RR, [InstrStage<125, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_SQRTPD_RM, [InstrStage<125, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_SQRTSD_RR, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_SQRTSD_RM, [InstrStage<62, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_RSQRTPS_RR, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_RSQRTPS_RM, [InstrStage<10, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_RSQRTSS_RR, [InstrStage<4, [Port0]>] >,
-  InstrItinData<IIC_SSE_RSQRTSS_RM, [InstrStage<4, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_RCPP_RR, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_RCPP_RM, [InstrStage<10, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_RCPS_RR, [InstrStage<4, [Port0]>] >,
-  InstrItinData<IIC_SSE_RCPS_RM, [InstrStage<4, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_MOVMSK, [InstrStage<3, [Port0]>] >,
-  InstrItinData<IIC_SSE_MASKMOV, [InstrStage<2, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_PEXTRW, [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PINSRW, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_PABS_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PABS_RM, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_MOV_S_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MOV_S_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_MOV_S_MR, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_MOVA_P_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MOVA_P_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_MOVA_P_MR, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_MOVU_P_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MOVU_P_RM, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MOVU_P_MR, [InstrStage<2, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_MOV_LH, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_LDDQU, [InstrStage<3, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_MOVDQ, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_MOVD_ToGP, [InstrStage<3, [Port0]>] >,
-  InstrItinData<IIC_SSE_MOVQ_RR, [InstrStage<1, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_MOVNT, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_PREFETCH, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_PAUSE, [InstrStage<17, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_LFENCE, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MFENCE, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_SFENCE, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_LDMXCSR, [InstrStage<5, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_STMXCSR, [InstrStage<15, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_PHADDSUBD_RR, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PHADDSUBD_RM, [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PHADDSUBSW_RR, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PHADDSUBSW_RM, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PHADDSUBW_RR, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PHADDSUBW_RM, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PSHUFB_RR, [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PSHUFB_RM, [InstrStage<5, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PSIGN_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PSIGN_RM, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_PMADD, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_SSE_PMULHRSW, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_SSE_PALIGNRR, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_PALIGNRM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_MWAIT, [InstrStage<46, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MONITOR, [InstrStage<45, [Port0, Port1]>] >,
-
-  // conversions
-  // to/from PD ...
-  InstrItinData<IIC_SSE_CVT_PD_RR, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_PD_RM, [InstrStage<8, [Port0, Port1]>] >,
-  // to/from PS except to/from PD and PS2PI
-  InstrItinData<IIC_SSE_CVT_PS_RR, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_PS_RM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_Scalar_RR, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_Scalar_RM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_SS2SI32_RR, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_SS2SI32_RM, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<10, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] >,
-
-  // MMX MOVs
-  InstrItinData<IIC_MMX_MOV_MM_RM,  [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_MOV_REG_MM, [InstrStage<3, [Port0]>] >,
-  InstrItinData<IIC_MMX_MOVQ_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_MOVQ_RR, [InstrStage<1, [Port0, Port1]>] >,
-  // other MMX
-  InstrItinData<IIC_MMX_ALU_RM,  [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_ALU_RR,  [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_ALUQ_RM, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_ALUQ_RR, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PHADDSUBW_RM, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PHADDSUBW_RR, [InstrStage<5, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PHADDSUBD_RM, [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PHADDSUBD_RR, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PMUL, [InstrStage<4, [Port0]>] >,
-  InstrItinData<IIC_MMX_MISC_FUNC_MEM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_MISC_FUNC_REG, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PSADBW,   [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_SHIFT_RI, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_SHIFT_RM, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_SHIFT_RR, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_UNPCK_H_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_UNPCK_H_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_UNPCK_L, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_PCK_RM,  [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_PCK_RR,  [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PSHUF,   [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_PEXTR,   [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PINSRW,  [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_MASKMOV, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_MOVMSK, [InstrStage<3, [Port0]>] >,
-  // conversions
-  // from/to PD
-  InstrItinData<IIC_MMX_CVT_PD_RR, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_CVT_PD_RM, [InstrStage<8, [Port0, Port1]>] >,
-  // from/to PI
-  InstrItinData<IIC_MMX_CVT_PS_RR, [InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_MMX_CVT_PS_RM, [InstrStage<5, [Port0], 0>,
-                                    InstrStage<5, [Port1]>]>,
-
-  InstrItinData<IIC_CMPX_LOCK, [InstrStage<14, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<18, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<22, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<3, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_FILD, [InstrStage<5, [Port0], 0>, InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_FLD,  [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_FLD80, [InstrStage<4, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_FST,   [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_FST80, [InstrStage<5, [Port0, Port1]>] >,
-  InstrItinData<IIC_FIST,  [InstrStage<6, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_FCMOV,  [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_FLDZ,   [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_FUCOM,  [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_FUCOMI, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_FCOMI,  [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_FNSTSW, [InstrStage<10, [Port0, Port1]>] >,
-  InstrItinData<IIC_FNSTCW, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_FLDCW,  [InstrStage<5, [Port0, Port1]>] >,
-  InstrItinData<IIC_FNINIT, [InstrStage<63, [Port0, Port1]>] >,
-  InstrItinData<IIC_FFREE,  [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_FNCLEX, [InstrStage<25, [Port0, Port1]>] >,
-  InstrItinData<IIC_WAIT,  [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_FXAM,  [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_FNOP,  [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_FLDL,  [InstrStage<10, [Port0, Port1]>] >,
-  InstrItinData<IIC_F2XM1,  [InstrStage<99, [Port0, Port1]>] >,
-  InstrItinData<IIC_FYL2X,  [InstrStage<146, [Port0, Port1]>] >,
-  InstrItinData<IIC_FPTAN,  [InstrStage<168, [Port0, Port1]>] >,
-  InstrItinData<IIC_FPATAN,  [InstrStage<183, [Port0, Port1]>] >,
-  InstrItinData<IIC_FXTRACT,  [InstrStage<25, [Port0, Port1]>] >,
-  InstrItinData<IIC_FPREM1,  [InstrStage<71, [Port0, Port1]>] >,
-  InstrItinData<IIC_FPSTP,  [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_FPREM,  [InstrStage<55, [Port0, Port1]>] >,
-  InstrItinData<IIC_FYL2XP1,  [InstrStage<147, [Port0, Port1]>] >,
-  InstrItinData<IIC_FSINCOS,  [InstrStage<174, [Port0, Port1]>] >,
-  InstrItinData<IIC_FRNDINT,  [InstrStage<46, [Port0, Port1]>] >,
-  InstrItinData<IIC_FSCALE,  [InstrStage<77, [Port0, Port1]>] >,
-  InstrItinData<IIC_FCOMPP,  [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_FXSAVE,  [InstrStage<140, [Port0, Port1]>] >,
-  InstrItinData<IIC_FXRSTOR,  [InstrStage<141, [Port0, Port1]>] >,
-  InstrItinData<IIC_FXCH, [InstrStage<1, [Port0], 0>, InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_FSIGN,  [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_FSQRT,  [InstrStage<71, [Port0, Port1]>] >,
-
-  // System instructions
-  InstrItinData<IIC_CPUID, [InstrStage<121, [Port0, Port1]>] >,
-  InstrItinData<IIC_INT,   [InstrStage<127, [Port0, Port1]>] >,
-  InstrItinData<IIC_INT3,  [InstrStage<130, [Port0, Port1]>] >,
-  InstrItinData<IIC_INVD,  [InstrStage<1003, [Port0, Port1]>] >,
-  InstrItinData<IIC_INVLPG, [InstrStage<71, [Port0, Port1]>] >,
-  InstrItinData<IIC_IRET,  [InstrStage<109, [Port0, Port1]>] >,
-  InstrItinData<IIC_HLT,   [InstrStage<121, [Port0, Port1]>] >,
-  InstrItinData<IIC_LXS,   [InstrStage<10, [Port0, Port1]>] >,
-  InstrItinData<IIC_LTR,   [InstrStage<83, [Port0, Port1]>] >,
-  InstrItinData<IIC_RDTSC, [InstrStage<30, [Port0, Port1]>] >,
-  InstrItinData<IIC_RDTSCP, [InstrStage<30, [Port0, Port1]>] >,
-  InstrItinData<IIC_RSM,   [InstrStage<741, [Port0, Port1]>] >,
-  InstrItinData<IIC_SIDT,  [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_SGDT,  [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_SLDT,  [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_STR,    [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_SWAPGS, [InstrStage<22, [Port0, Port1]>] >,
-  InstrItinData<IIC_SYSCALL, [InstrStage<96, [Port0, Port1]>] >,
-  InstrItinData<IIC_SYS_ENTER_EXIT, [InstrStage<88, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_IN_RR,  [InstrStage<94, [Port0, Port1]>] >,
-  InstrItinData<IIC_IN_RI,  [InstrStage<92, [Port0, Port1]>] >,
-  InstrItinData<IIC_OUT_RR, [InstrStage<68, [Port0, Port1]>] >,
-  InstrItinData<IIC_OUT_IR, [InstrStage<72, [Port0, Port1]>] >,
-  InstrItinData<IIC_INS,    [InstrStage<59, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_MOV_REG_DR, [InstrStage<88, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOV_DR_REG, [InstrStage<123, [Port0, Port1]>] >,
-  // worst case for mov REG_CRx
-  InstrItinData<IIC_MOV_REG_CR, [InstrStage<12, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOV_CR_REG, [InstrStage<136, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_MOV_REG_SR, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MOV_MEM_SR, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOV_SR_REG, [InstrStage<21, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOV_SR_MEM, [InstrStage<26, [Port0, Port1]>] >,
-  // LAR
-  InstrItinData<IIC_LAR_RM,  [InstrStage<50, [Port0, Port1]>] >,
-  InstrItinData<IIC_LAR_RR,  [InstrStage<54, [Port0, Port1]>] >,
-  // LSL
-  InstrItinData<IIC_LSL_RM,  [InstrStage<46, [Port0, Port1]>] >,
-  InstrItinData<IIC_LSL_RR,  [InstrStage<49, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_LGDT, [InstrStage<44, [Port0, Port1]>] >,
-  InstrItinData<IIC_LIDT, [InstrStage<44, [Port0, Port1]>] >,
-  InstrItinData<IIC_LLDT_REG, [InstrStage<60, [Port0, Port1]>] >,
-  InstrItinData<IIC_LLDT_MEM, [InstrStage<64, [Port0, Port1]>] >,
-  // push control register, segment registers
-  InstrItinData<IIC_PUSH_CS, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_PUSH_SR, [InstrStage<2, [Port0, Port1]>] >,
-  // pop control register, segment registers
-  InstrItinData<IIC_POP_SR,    [InstrStage<29, [Port0, Port1]>] >,
-  InstrItinData<IIC_POP_SR_SS, [InstrStage<48, [Port0, Port1]>] >,
-  // VERR, VERW
-  InstrItinData<IIC_VERR,     [InstrStage<41, [Port0, Port1]>] >,
-  InstrItinData<IIC_VERW_REG, [InstrStage<51, [Port0, Port1]>] >,
-  InstrItinData<IIC_VERW_MEM, [InstrStage<50, [Port0, Port1]>] >,
-  // WRMSR, RDMSR
-  InstrItinData<IIC_WRMSR, [InstrStage<202, [Port0, Port1]>] >,
-  InstrItinData<IIC_RDMSR, [InstrStage<78, [Port0, Port1]>] >,
-  InstrItinData<IIC_RDPMC, [InstrStage<46, [Port0, Port1]>] >,
-  // SMSW, LMSW
-  InstrItinData<IIC_SMSW, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_LMSW_REG, [InstrStage<69, [Port0, Port1]>] >,
-  InstrItinData<IIC_LMSW_MEM, [InstrStage<67, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_ENTER, [InstrStage<32, [Port0, Port1]>] >,
-  InstrItinData<IIC_LEAVE, [InstrStage<2, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_POP_MEM, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_POP_REG16, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_POP_REG, [InstrStage<1, [Port0], 0>,
-                            InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_POP_F, [InstrStage<32, [Port0, Port1]>] >,
-  InstrItinData<IIC_POP_FD, [InstrStage<26, [Port0, Port1]>] >,
-  InstrItinData<IIC_POP_A, [InstrStage<9, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_PUSH_IMM, [InstrStage<1, [Port0], 0>,
-                               InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_PUSH_MEM, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_PUSH_REG, [InstrStage<1, [Port0], 0>,
-                               InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_PUSH_F, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_PUSH_A, [InstrStage<8, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_BSWAP, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_BIT_SCAN_MEM, [InstrStage<16, [Port0, Port1]>] >,
-  InstrItinData<IIC_BIT_SCAN_REG, [InstrStage<16, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOVS, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_STOS, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SCAS, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPS, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOV, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOV_MEM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_AHF, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_BT_MI, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_BT_MR, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_BT_RI, [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_BT_RR, [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_BTX_MI, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_BTX_MR, [InstrStage<11, [Port0, Port1]>] >,
-  InstrItinData<IIC_BTX_RI, [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_BTX_RR, [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_XCHG_REG, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_XCHG_MEM, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_XADD_REG, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_XADD_MEM, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPXCHG_MEM, [InstrStage<14, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPXCHG_REG, [InstrStage<15, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPXCHG_MEM8, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPXCHG_REG8, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPXCHG_8B, [InstrStage<18, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPXCHG_16B, [InstrStage<22, [Port0, Port1]>] >,
-  InstrItinData<IIC_LODS, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_OUTS, [InstrStage<74, [Port0, Port1]>] >,
-  InstrItinData<IIC_CLC_CMC_STC, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_CLD, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_CLI, [InstrStage<14, [Port0, Port1]>] >,
-  InstrItinData<IIC_CLTS, [InstrStage<33, [Port0, Port1]>] >,
-  InstrItinData<IIC_STI, [InstrStage<17, [Port0, Port1]>] >,
-  InstrItinData<IIC_STD, [InstrStage<21, [Port0, Port1]>] >,
-  InstrItinData<IIC_XLAT, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_AAA, [InstrStage<13, [Port0, Port1]>] >,
-  InstrItinData<IIC_AAD, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_AAM, [InstrStage<21, [Port0, Port1]>] >,
-  InstrItinData<IIC_AAS, [InstrStage<13, [Port0, Port1]>] >,
-  InstrItinData<IIC_DAA, [InstrStage<18, [Port0, Port1]>] >,
-  InstrItinData<IIC_DAS, [InstrStage<20, [Port0, Port1]>] >,
-  InstrItinData<IIC_BOUND, [InstrStage<11, [Port0, Port1]>] >,
-  InstrItinData<IIC_ARPL_REG, [InstrStage<24, [Port0, Port1]>] >,
-  InstrItinData<IIC_ARPL_MEM, [InstrStage<23, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOVBE, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_CBW, [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_EMMS, [InstrStage<5, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_NOP, [InstrStage<1, [Port0, Port1]>] >
-  ]>;
 
 // Atom machine model.
 def AtomModel : SchedMachineModel {
   let IssueWidth = 2;  // Allows 2 instructions per scheduling group.
   let MicroOpBufferSize = 0; // In-order execution, always hide latency.
-  let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
-  let HighLatency = 30;// Expected, may be overriden by OperandCycles.
+  let LoadLatency = 3; // Expected cycles, may be overriden.
+  let HighLatency = 30;// Expected, may be overriden.
 
   // On the Atom, the throughput for taken branches is 2 cycles. For small
   // simple loops, expand by a small factor to hide the backedge cost.
   let LoopMicroOpBufferSize = 10;
   let PostRAScheduler = 1;
   let CompleteModel = 0;
+}
+
+let SchedModel = AtomModel in {
+
+// Functional Units
+def AtomPort0 : ProcResource<1>; // ALU: ALU0, shift/rotate, load/store
+                                 // SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide
+def AtomPort1 : ProcResource<1>; // ALU: ALU1, bit processing, jump, and LEA
+                                 // SIMD/FP: SIMD ALU, FP Adder
+
+def AtomPort01 : ProcResGroup<[AtomPort0, AtomPort1]>;
+
+// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 3>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when dispatched by the schedulers.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass AtomWriteResPair<X86FoldableSchedWrite SchedRW,
+                            list<ProcResourceKind> RRPorts,
+                            list<ProcResourceKind> RMPorts,
+                            int RRLat = 1, int RMLat = 1,
+                            list<int> RRRes = [1],
+                            list<int> RMRes = [1]> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, RRPorts> {
+    let Latency = RRLat;
+    let ResourceCycles = RRRes;
+  }
+
+  // Memory variant also uses a cycle on JLAGU and adds 3 cycles to the
+  // latency.
+  def : WriteRes<SchedRW.Folded, RMPorts> {
+    let Latency = RMLat;
+    let ResourceCycles = RMRes;
+  }
+}
+
+// A folded store needs a cycle on Port0 for the store data.
+def : WriteRes<WriteRMW, [AtomPort0]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Arithmetic.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteALU,   [AtomPort01], [AtomPort0]>;
+defm : AtomWriteResPair<WriteIMul,  [AtomPort01], [AtomPort01],  7,  7, [7], [7]>;
+defm : AtomWriteResPair<WriteIDiv,  [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteCRC32, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+defm : AtomWriteResPair<WriteCMOV,  [AtomPort01], [AtomPort0]>;
+
+def  : WriteRes<WriteSETCC, [AtomPort01]>;
+def  : WriteRes<WriteSETCCStore, [AtomPort01]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+}
+
+def : WriteRes<WriteIMulH, [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+// This is for simple LEAs with one or two input operands.
+def : WriteRes<WriteLEA, [AtomPort1]>;
+
+def AtomWriteIMul16Ld : SchedWriteRes<[AtomPort01]> {
+  let Latency = 8;
+  let ResourceCycles = [8];
+}
+def : InstRW<[AtomWriteIMul16Ld], (instrs MUL16m, IMUL16m)>;
+
+def AtomWriteIMul32 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 6;
+  let ResourceCycles = [6];
+}
+def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>;
+
+def AtomWriteIMul64 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 12;
+  let ResourceCycles = [12];
+}
+def : InstRW<[AtomWriteIMul64], (instrs MUL64r, IMUL64r, IMUL64rr, IMUL64rm,
+                                        MUL64m, IMUL64m)>;
+
+def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> {
+  let Latency = 14;
+  let ResourceCycles = [14];
+}
+def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32,
+                                         IMUL64rmi8, IMUL64rmi32)>;
+
+def AtomWriteDiv : SchedWriteRes<[AtomPort01]> {
+  let Latency = 50;
+  let ResourceCycles = [50];
+}
+def : InstRW<[AtomWriteDiv], (instrs DIV8r,
+                                     DIV16r, DIV16m,
+                                     DIV32r, DIV32m)>;
+
+def AtomWriteDiv8Ld : SchedWriteRes<[AtomPort01]> {
+  let Latency = 68;
+  let ResourceCycles = [68];
+}
+def : InstRW<[AtomWriteDiv8Ld], (instrs DIV8m)>;
+
+def AtomWriteIDiv64 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 130;
+  let ResourceCycles = [130];
+}
+def : InstRW<[AtomWriteIDiv64], (instrs DIV64r, IDIV64r,
+                                        DIV64m, IDIV64m)>;
+
+// Bit counts.
+defm : AtomWriteResPair<WriteBitScan, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
+defm : AtomWriteResPair<WritePOPCNT,  [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteLZCNT,   [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteTZCNT,   [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+// BMI1 BEXTR, BMI2 BZHI
+defm : AtomWriteResPair<WriteBEXTR, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteBZHI,  [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// Integer shifts and rotates.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Loads, stores, and moves, not folded with other operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteLoad,  [AtomPort0]>;
+def : WriteRes<WriteStore, [AtomPort0]>;
+def : WriteRes<WriteMove,  [AtomPort01]>;
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteZero,  []>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteJump, [AtomPort1], [AtomPort1]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Special case scheduling classes.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteSystem,     [AtomPort01]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [AtomPort01]> { let Latency = 100; }
+def : WriteRes<WriteFence,      [AtomPort0]>;
+
+// Nops don't have dependencies, so there's no actual latency, but we set this
+// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
+def : WriteRes<WriteNop, [AtomPort01]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Floating point. This covers both scalar and vector operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def  : WriteRes<WriteFLoad,  [AtomPort0]>;
+def  : WriteRes<WriteFStore, [AtomPort0]>;
+def  : WriteRes<WriteFMove,  [AtomPort01]>;
+
+defm : AtomWriteResPair<WriteFAdd,           [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFMul,           [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
+defm : AtomWriteResPair<WriteFRcp,           [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
+defm : AtomWriteResPair<WriteFRsqrt,         [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
+defm : AtomWriteResPair<WriteFDiv,          [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
+defm : AtomWriteResPair<WriteFSqrt,         [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
+defm : AtomWriteResPair<WriteFShuffle,       [AtomPort0],  [AtomPort0]>;
+defm : AtomWriteResPair<WriteFVarShuffle,    [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFMA,            [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFBlend,         [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFVarBlend,      [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFShuffle256,    [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFVarShuffle256, [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// Conversions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteCvtF2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>; // Float -> Integer.
+defm : AtomWriteResPair<WriteCvtI2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Integer -> Float.
+defm : AtomWriteResPair<WriteCvtF2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Float -> Float size conversion.
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector integer operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def  : WriteRes<WriteVecLoad,  [AtomPort0]>;
+def  : WriteRes<WriteVecStore, [AtomPort0]>;
+def  : WriteRes<WriteVecMove,  [AtomPort01]>;
+
+defm : AtomWriteResPair<WriteVecALU,       [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecLogic,     [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecShift,     [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
+defm : AtomWriteResPair<WriteVecIMul,       [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WritePMULLD,       [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteMPSAD,        [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteShuffle,       [AtomPort0],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVarShuffle,   [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
+defm : AtomWriteResPair<WriteBlend,         [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteVarBlend,      [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteShuffle256,    [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteVarShuffle256, [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteVarVecShift,   [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// SSE42 String instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WritePCmpIStrI, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WritePCmpIStrM, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WritePCmpEStrI, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WritePCmpEStrM, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// MOVMSK Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def  : WriteRes<WriteFMOVMSK,   [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+def  : WriteRes<WriteVecMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+def  : WriteRes<WriteMMXMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+
+////////////////////////////////////////////////////////////////////////////////
+// AES Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteAESIMC,    [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteAESKeyGen, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteAESDecEnc, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub  instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteFHAdd, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
+defm : AtomWriteResPair<WritePHAdd, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Carry-less multiplication instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteCLMul, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// Special Cases.
+////////////////////////////////////////////////////////////////////////////////
+
+// Port0
+def AtomWrite0_1 : SchedWriteRes<[AtomPort0]> {
+  let Latency = 1;
+  let ResourceCycles = [1];
+}
+def : InstRW<[AtomWrite0_1], (instrs FXAM,
+                                     BSWAP32r, BSWAP64r,
+                                     DEC8m, DEC16m, DEC32m, DEC64m,
+                                     INC8m, INC16m, INC32m, INC64m,
+                                     MOVSX64rr32,
+                                     MMX_MOVD64rr, MMX_MOVD64mr,
+                                     MMX_MOVD64to64rr, MMX_MOVD64to64rm,
+                                     MMX_PSHUFBrr, MMX_PSHUFBrm,
+                                     MOVDI2PDIrr, MOVDI2PDIrm,
+                                     MOV64toPQIrr, MOV64toPQIrm,
+                                     MOV64toSDrr, MOV64toSDrm, MOVSDto64mr,
+                                     MOVDI2SSrr, MOVDI2SSrm,
+                                     MOVPDI2DImr, MOVPQIto64mr, MOVSS2DImr, MOVQI2PQIrm, MOVPQI2QImr)>;
+def : InstRW<[AtomWrite0_1], (instregex "(ADC|ADD|AND|NEG|NOT|OR|SBB|SUB|XOR)(8|16|32|64)m",
+                                        "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m",
+                                        "MOV(S|Z)X(32|64)(rr|rm)(8|8_NOREX|16)",
+                                        "LD_F(P)?(16|32|64)?(m|rr)",
+                                        "MMX_MASKMOVQ(64)?",
+                                        "MMX_PAVG(B|W)irm",
+                                        "MMX_P(MAX|MIN)(UB|SW)irm",
+                                        "MMX_PSIGN(B|D|W)rm")>;
+                                        
+def AtomWrite0_3 : SchedWriteRes<[AtomPort0]> {
+  let Latency = 3;
+  let ResourceCycles = [3];
+}
+def : InstRW<[AtomWrite0_3], (instrs MMX_MOVD64from64rr, MMX_MOVD64grr,
+                                     MOVPDI2DIrr, MOVPQIto64rr,
+                                     MOVSDto64rr, MOVSS2DIrr)>;
+
+def AtomWrite0_4 : SchedWriteRes<[AtomPort0]> {
+  let Latency = 4;
+  let ResourceCycles = [4];
+}
+def : InstRW<[AtomWrite0_4], (instrs MMX_PMADDUBSWrr, MMX_PMADDUBSWrm,
+                                     MMX_PMADDWDirr, MMX_PMADDWDirm,
+                                     MMX_PMULHRSWrr, MMX_PMULHRSWrm,
+                                     MMX_PMULHUWirr, MMX_PMULHUWirm,
+                                     MMX_PMULHWirr, MMX_PMULHWirm,
+                                     MMX_PMULLWirr, MMX_PMULLWirm,
+                                     MMX_PMULUDQirr, MMX_PMULUDQirm)>;
+
+def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> {
+  let Latency = 5;
+  let ResourceCycles = [5];
+}
+def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)",
+                                        "MUL(PS|SD)(rr|rm)(_Int)?")>;
+
+// Port1
+def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> {
+  let Latency = 1;
+  let ResourceCycles = [1];
+}
+def : InstRW<[AtomWrite1_1], (instrs FCOMPP)>;
+def : InstRW<[AtomWrite1_1], (instregex "ABS_F", "CHS_F",
+                                        "UCOM_F(P|PP)?r",
+                                        "BT(C|R|S)?(16|32|64)(rr|ri8)")>;
+
+def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> {
+  let Latency = 5;
+  let ResourceCycles = [5];
+}
+def : InstRW<[AtomWrite1_5], (instrs MMX_CVTPI2PSirr, MMX_CVTPI2PSirm,
+                                     MMX_CVTPS2PIirr, MMX_CVTTPS2PIirr)>;
+
+// Port0 and Port1
+def AtomWrite0_1_1 : SchedWriteRes<[AtomPort0, AtomPort1]> {
+  let Latency = 1;
+  let ResourceCycles = [1, 1];
+}
+def : InstRW<[AtomWrite0_1_1], (instrs POP32r, POP64r,
+                                       POP16rmr, POP32rmr, POP64rmr,
+                                       PUSH16r, PUSH32r, PUSH64r,
+                                       PUSHi16, PUSHi32,
+                                       PUSH16rmr, PUSH32rmr, PUSH64rmr,
+                                       PUSH16i8, PUSH32i8, PUSH64i8, PUSH64i32,
+                                       XCH_F)>;
+def : InstRW<[AtomWrite0_1_1], (instregex "RETI(L|Q|W)$",
+                                          "IRET(16|32|64)?")>;
+
+def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> {
+  let Latency = 5;
+  let ResourceCycles = [5, 5];
+}
+def : InstRW<[AtomWrite0_1_5], (instrs MMX_CVTPS2PIirm, MMX_CVTTPS2PIirm)>;
+def : InstRW<[AtomWrite0_1_5], (instregex "ILD_F(16|32|64)")>;
+
+// Port0 or Port1
+def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 1;
+  let ResourceCycles = [1];
+}
+def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, LD_F0, WAIT,
+                                      LFENCE,
+                                      STOSB, STOSL, STOSQ, STOSW,
+                                      MOVSSrr, MOVSSrr_REV,
+                                      PSLLDQri, PSRLDQri)>;
+def : InstRW<[AtomWrite01_1], (instregex "(MMX_)?PS(LL|RA|RL)(D|Q|W)ri",
+                                         "MMX_PAVG(B|W)irr",
+                                         "MMX_P(MAX|MIN)(UB|SW)irr",
+                                         "MMX_PSIGN(B|D|W)rr",
+                                         "MMX_PACK(SSDW|SSWB|USWB)irr",
+                                         "MMX_PUNPCKH(BW|DQ|WD)irr")>;
+
+def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+}
+def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
+                                      PUSH16rmm, PUSH32rmm, PUSH64rmm,
+                                      LODSB, LODSL, LODSQ, LODSW,
+                                      SCASB, SCASL, SCASQ, SCASW,
+                                      SHLD32rrCL, SHRD32rrCL,
+                                      SHLD32rri8, SHRD32rri8)>;
+def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8",
+                                         "PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
+                                         "XADD(8|16|32|64)rr",
+                                         "XCHG(8|16|32|64)(ar|rr)",
+                                         "(ST|ISTT)_F(P)?(16|32|64)?(m|rr)",
+                                         "MMX_P(ADD|SUB)Qirr",
+                                         "MOV(S|Z)X16rr8",
+                                         "MOV(UPS|UPD|DQU)mr",
+                                         "MASKMOVDQU(64)?",
+                                         "P(ADD|SUB)Qrr")>;
+
+def AtomWrite01_3 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 3;
+  let ResourceCycles = [3];
+}
+def : InstRW<[AtomWrite01_3], (instrs CLD, LDDQUrm,
+                                      CMPSB, CMPSL, CMPSQ, CMPSW,
+                                      MOVSB, MOVSL, MOVSQ, MOVSW,
+                                      POP16rmm, POP32rmm, POP64rmm)>;
+def : InstRW<[AtomWrite01_3], (instregex "XADD(8|16|32|64)rm",
+                                         "XCHG(8|16|32|64)rm",
+                                         "(MMX_)?PH(ADD|SUB)Drr",
+                                         "MOV(S|Z)X16rm8",
+                                         "MMX_P(ADD|SUB)Qirm",
+                                         "MOV(UPS|UPD|DQU)rm",
+                                         "P(ADD|SUB)Qrm")>;
+
+def AtomWrite01_4 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 4;
+  let ResourceCycles = [4];
+}
+def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO,
+                                      JCXZ, JECXZ, JRCXZ,
+                                      SHLD32mrCL, SHRD32mrCL,
+                                      SHLD32mri8, SHRD32mri8,
+                                      LD_F80m,
+                                      MMX_PSADBWirr, MMX_PSADBWirm)>;
+def : InstRW<[AtomWrite01_4], (instregex "(MMX_)?PH(ADD|SUB)Drm",
+                                         "(MMX_)?PEXTRWrr(_REV)?")>;
+
+def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 5;
+  let ResourceCycles = [5];
+}
+def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, LDMXCSR,
+                                      MMX_EMMS)>;
+def : InstRW<[AtomWrite01_5], (instregex "ST_FP80m",
+                                         "MMX_PH(ADD|SUB)S?Wrr")>;
+
+def AtomWrite01_6 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 6;
+  let ResourceCycles = [6];
+}
+def : InstRW<[AtomWrite01_6], (instrs LD_F1, CMPXCHG8rm, INTO, XLAT,
+                                      SHLD16rrCL, SHRD16rrCL,
+                                      SHLD16rri8, SHRD16rri8,
+                                      SHLD16mrCL, SHRD16mrCL,
+                                      SHLD16mri8, SHRD16mri8,
+                                      ADDSUBPDrr, ADDSUBPDrm,
+                                      CVTPS2DQrr, CVTTPS2DQrr)>;
+def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr",
+                                         "IST_F(P)?(16|32|64)?m",
+                                         "MMX_PH(ADD|SUB)S?Wrm",
+                                         "(ADD|SUB|MAX|MIN)PDrr",
+                                         "CMPPDrri")>;
+
+def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 7;
+  let ResourceCycles = [7];
+}
+def : InstRW<[AtomWrite01_7], (instrs AAD8i8,
+                                      CVTDQ2PDrr,
+                                      CVTPD2DQrr,
+                                      CVTPD2PSrr,
+                                      CVTPS2DQrm,
+                                      CVTPS2PDrr,
+                                      CVTTPD2DQrr,
+                                      CVTTPS2DQrm,
+                                      MMX_CVTPD2PIirr,
+                                      MMX_CVTPI2PDirr,
+                                      MMX_CVTTPD2PIirr)>;
+def : InstRW<[AtomWrite01_7], (instregex "(ADD|SUB|MAX|MIN)PDrm",
+                                         "CMPPDrmi")>;
+
+def AtomWrite01_8 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 8;
+  let ResourceCycles = [8];
+}
+def : InstRW<[AtomWrite01_8], (instrs LOOPE,
+                                      PUSHA16, PUSHA32,
+                                      SHLD64rrCL, SHRD64rrCL,
+                                      FNSTCW16m,
+                                      CVTDQ2PDrm,
+                                      CVTPD2DQrm,
+                                      CVTPD2PSrm,
+                                      CVTPS2PDrm,
+                                      CVTTPD2DQrm,
+                                      MMX_CVTPD2PIirm,
+                                      MMX_CVTPI2PDirm,
+                                      MMX_CVTTPD2PIirm)>;
+
+def AtomWrite01_9 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 9;
+  let ResourceCycles = [9];
+}
+def : InstRW<[AtomWrite01_9], (instrs BT16mr, BT32mr, BT64mr,
+                                      POPA16, POPA32,
+                                      PUSHF16, PUSHF32, PUSHF64,
+                                      SHLD64mrCL, SHRD64mrCL,
+                                      SHLD64mri8, SHRD64mri8,
+                                      SHLD64rri8, SHRD64rri8,
+                                      CMPXCHG8rr,
+                                      MULPDrr, RCPPSr, RSQRTPSr)>;
+def : InstRW<[AtomWrite01_9], (instregex "CMOV(B|BE|E|P|NB|NBE|NE|NP)_F",
+                                         "(U)?COM_FI", "TST_F",
+                                         "(U)?COMIS(D|S)rr",
+                                         "CVT(T)?SS2SI64rr(_Int)?")>;
+
+def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 10;
+  let ResourceCycles = [10];
+}
+def : InstRW<[AtomWrite01_10], (instrs FLDL2E, FLDL2T, FLDLG2, FLDLN2, FLDPI,
+                                       MULPDrm, RCPPSm, RSQRTPSm)>;
+def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm",
+                                          "CVT(T)?SS2SI64rm(_Int)?")>;
+
+def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 11;
+  let ResourceCycles = [11];
+}
+def : InstRW<[AtomWrite01_11], (instrs BOUNDS16rm, BOUNDS32rm)>;
+def : InstRW<[AtomWrite01_11], (instregex "BT(C|R|S)(16|32|64)mr")>;
+
+def AtomWrite01_13 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 13;
+  let ResourceCycles = [13];
+}
+def : InstRW<[AtomWrite01_13], (instrs AAA, AAS)>;
+
+def AtomWrite01_14 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 14;
+  let ResourceCycles = [14];
+}
+def : InstRW<[AtomWrite01_14], (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
+
+def AtomWrite01_15 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 15;
+  let ResourceCycles = [15];
+}
+def : InstRW<[AtomWrite01_15], (instrs CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr,
+                                       STMXCSR)>;
+
+def AtomWrite01_17 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 17;
+  let ResourceCycles = [17];
+}
+def : InstRW<[AtomWrite01_17], (instrs LOOPNE, PAUSE)>;
+
+def AtomWrite01_18 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 18;
+  let ResourceCycles = [18];
+}
+def : InstRW<[AtomWrite01_18], (instrs CMPXCHG8B, DAA, LOOP)>;
+
+def AtomWrite01_20 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 20;
+  let ResourceCycles = [20];
+}
+def : InstRW<[AtomWrite01_20], (instrs DAS)>;
+
+def AtomWrite01_21 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 21;
+  let ResourceCycles = [21];
+}
+def : InstRW<[AtomWrite01_21], (instrs AAM8i8, STD)>;
+
+def AtomWrite01_22 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 22;
+  let ResourceCycles = [22];
+}
+def : InstRW<[AtomWrite01_22], (instrs CMPXCHG16B)>;
+
+def AtomWrite01_23 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 23;
+  let ResourceCycles = [23];
+}
+def : InstRW<[AtomWrite01_23], (instrs ARPL16mr, ARPL16rr)>;
+
+def AtomWrite01_25 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 25;
+  let ResourceCycles = [25];
+}
+def : InstRW<[AtomWrite01_25], (instrs FNCLEX, FXTRACT)>;
+
+def AtomWrite01_26 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 26;
+  let ResourceCycles = [26];
+}
+def : InstRW<[AtomWrite01_26], (instrs POPF32, POPF64)>;
+
+def AtomWrite01_29 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 29;
+  let ResourceCycles = [29];
+}
+def : InstRW<[AtomWrite01_29], (instregex "POP(DS|ES|FS|GS)(16|32|64)")>;
+
+def AtomWrite01_30 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 30;
+  let ResourceCycles = [30];
+}
+def : InstRW<[AtomWrite01_30], (instrs RDTSC, RDTSCP)>;
+
+def AtomWrite01_32 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 32;
+  let ResourceCycles = [32];
+}
+def : InstRW<[AtomWrite01_32], (instrs ENTER, POPF16)>;
+
+def AtomWrite01_45 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 45;
+  let ResourceCycles = [45];
+}
+def : InstRW<[AtomWrite01_45], (instrs MONITORrrr)>;
+
+def AtomWrite01_46 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 46;
+  let ResourceCycles = [46];
+}
+def : InstRW<[AtomWrite01_46], (instrs FRNDINT, MWAITrr, RDPMC)>;
+
+def AtomWrite01_48 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 48;
+  let ResourceCycles = [48];
+}
+def : InstRW<[AtomWrite01_48], (instrs POPSS16, POPSS32)>;
+
+def AtomWrite01_55 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 55;
+  let ResourceCycles = [55];
+}
+def : InstRW<[AtomWrite01_55], (instrs FPREM)>;
+
+def AtomWrite01_59 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 59;
+  let ResourceCycles = [59];
+}
+def : InstRW<[AtomWrite01_59], (instrs INSB, INSL, INSW)>;
+
+def AtomWrite01_62 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 62;
+  let ResourceCycles = [62];
+}
+def : InstRW<[AtomWrite01_62], (instregex "DIVSD(r|m)(_Int)?",
+                                          "SQRTSD(r|m)(_Int)?")>;
+
+def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 63;
+  let ResourceCycles = [63];
+}
+def : InstRW<[AtomWrite01_63], (instrs FNINIT)>;
+
+def AtomWrite01_68 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 68;
+  let ResourceCycles = [68];
+}
+def : InstRW<[AtomWrite01_68], (instrs OUT8rr, OUT16rr, OUT32rr)>;
+
+def AtomWrite01_70 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 70;
+  let ResourceCycles = [70];
+}
+def : InstRW<[AtomWrite01_70], (instrs DIVPSrr, DIVPSrm, SQRTPSr, SQRTPSm)>;
+
+def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 71;
+  let ResourceCycles = [71];
+}
+def : InstRW<[AtomWrite01_71], (instrs FPREM1,
+                                       INVLPG, INVLPGA32, INVLPGA64)>;
+def : InstRW<[AtomWrite01_71], (instregex "SQRT_F")>;
+
+def AtomWrite01_72 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 72;
+  let ResourceCycles = [72];
+}
+def : InstRW<[AtomWrite01_72], (instrs OUT8ir, OUT16ir, OUT32ir)>;
+
+def AtomWrite01_74 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 74;
+  let ResourceCycles = [74];
+}
+def : InstRW<[AtomWrite01_74], (instrs OUTSB, OUTSL, OUTSW)>;
+
+def AtomWrite01_77 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 77;
+  let ResourceCycles = [77];
+}
+def : InstRW<[AtomWrite01_77], (instrs FSCALE)>;
 
-  let Itineraries = AtomItineraries;
+def AtomWrite01_78 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 78;
+  let ResourceCycles = [78];
 }
+def : InstRW<[AtomWrite01_78], (instrs RDMSR)>;
+
+def AtomWrite01_79 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 79;
+  let ResourceCycles = [79];
+}
+def : InstRW<[AtomWrite01_79], (instregex "RET(L|Q|W)?$",
+                                          "LRETI?(L|Q|W)")>;
+
+def AtomWrite01_92 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 92;
+  let ResourceCycles = [92];
+}
+def : InstRW<[AtomWrite01_92], (instrs IN8ri, IN16ri, IN32ri)>;
+
+def AtomWrite01_94 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 94;
+  let ResourceCycles = [94];
+}
+def : InstRW<[AtomWrite01_94], (instrs IN8rr, IN16rr, IN32rr)>;
+
+def AtomWrite01_99 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 99;
+  let ResourceCycles = [99];
+}
+def : InstRW<[AtomWrite01_99], (instrs F2XM1)>;
+
+def AtomWrite01_121 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 121;
+  let ResourceCycles = [121];
+}
+def : InstRW<[AtomWrite01_121], (instrs CPUID)>;
+
+def AtomWrite01_125 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 125;
+  let ResourceCycles = [125];
+}
+def : InstRW<[AtomWrite01_125], (instrs DIVPDrr, DIVPDrm, SQRTPDr, SQRTPDm)>;
+
+def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 127;
+  let ResourceCycles = [127];
+}
+def : InstRW<[AtomWrite01_127], (instrs INT)>;
+
+def AtomWrite01_130 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 130;
+  let ResourceCycles = [130];
+}
+def : InstRW<[AtomWrite01_130], (instrs INT3)>;
+
+def AtomWrite01_140 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 140;
+  let ResourceCycles = [140];
+}
+def : InstRW<[AtomWrite01_140], (instrs FXSAVE, FXSAVE64)>;
+
+def AtomWrite01_141 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 141;
+  let ResourceCycles = [141];
+}
+def : InstRW<[AtomWrite01_141], (instrs FXRSTOR, FXRSTOR64)>;
+
+def AtomWrite01_146 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 146;
+  let ResourceCycles = [146];
+}
+def : InstRW<[AtomWrite01_146], (instrs FYL2X)>;
+
+def AtomWrite01_147 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 147;
+  let ResourceCycles = [147];
+}
+def : InstRW<[AtomWrite01_147], (instrs FYL2XP1)>;
+
+def AtomWrite01_168 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 168;
+  let ResourceCycles = [168];
+}
+def : InstRW<[AtomWrite01_168], (instrs FPTAN)>;
+
+def AtomWrite01_174 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 174;
+  let ResourceCycles = [174];
+}
+def : InstRW<[AtomWrite01_174], (instrs FSINCOS)>;
+def : InstRW<[AtomWrite01_174], (instregex "(COS|SIN)_F")>;
+
+def AtomWrite01_183 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 183;
+  let ResourceCycles = [183];
+}
+def : InstRW<[AtomWrite01_183], (instrs FPATAN)>;
+
+def AtomWrite01_202 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 202;
+  let ResourceCycles = [202];
+}
+def : InstRW<[AtomWrite01_202], (instrs WRMSR)>;
+
+} // SchedModel

Modified: llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll?rev=329837&r1=329836&r2=329837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll Wed Apr 11 11:23:01 2018
@@ -7,8 +7,10 @@
 ; CHECK-NEXT: jne
 
 ; ATOM-LABEL: t:
-; ATOM: movl (%r9,%r{{.+}},4), %e{{..}}
+; ATOM: movl (%r9,%r{{.+}},4), %r{{..}}
+; ATOM-NEXT: xorl
 ; ATOM-NEXT: testq
+; ATOM-NEXT: movl
 ; ATOM-NEXT: jne
 
 @Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]

Modified: llvm/trunk/test/CodeGen/X86/lsr-static-addr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lsr-static-addr.ll?rev=329837&r1=329836&r2=329837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lsr-static-addr.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lsr-static-addr.ll Wed Apr 11 11:23:01 2018
@@ -1,5 +1,5 @@
 ; RUN: llc -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
-; RUN: llc -mcpu=atom -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck -check-prefix=ATOM %s
+; RUN: llc -mcpu=atom -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
 
 ; CHECK: xorl  %eax, %eax
 ; CHECK: movsd .LCPI0_0(%rip), %xmm0
@@ -10,16 +10,6 @@
 ; CHECK-NEXT: movsd
 ; CHECK-NEXT: incq %rax
 
-
-; ATOM: movsd .LCPI0_0(%rip), %xmm0
-; ATOM: xorl  %eax, %eax
-; ATOM: align
-; ATOM-NEXT: BB0_2:
-; ATOM-NEXT: movsd A(,%rax,8)
-; ATOM-NEXT: mulsd
-; ATOM-NEXT: movsd
-; ATOM-NEXT: incq %rax
-
 @A = external global [0 x double]
 
 define void @foo(i64 %n) nounwind {

Modified: llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-schedule.ll?rev=329837&r1=329836&r2=329837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-schedule.ll Wed Apr 11 11:23:01 2018
@@ -647,11 +647,11 @@ define i32 @test_movd(x86_mmx %a0, i32 %
 ;
 ; ATOM-LABEL: test_movd:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    movd (%rsi), %mm1 # sched: [1:1.00]
-; ATOM-NEXT:    movd %edi, %mm2 # sched: [1:1.00]
-; ATOM-NEXT:    paddd %mm2, %mm1 # sched: [1:0.50]
-; ATOM-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm1, %ecx # sched: [3:3.00]
+; ATOM-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
+; ATOM-NEXT:    movd (%rsi), %mm2 # sched: [1:1.00]
+; ATOM-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
+; ATOM-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
+; ATOM-NEXT:    movd %mm2, %ecx # sched: [3:3.00]
 ; ATOM-NEXT:    movd %mm0, %eax # sched: [3:3.00]
 ; ATOM-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
@@ -3509,8 +3509,8 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
 ;
 ; ATOM-LABEL: test_pinsrw:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    movswl (%rsi), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    pinsrw $0, %edi, %mm0 # sched: [1:1.00]
+; ATOM-NEXT:    movswl (%rsi), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    pinsrw $1, %eax, %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]

Modified: llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll?rev=329837&r1=329836&r2=329837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll Wed Apr 11 11:23:01 2018
@@ -1220,7 +1220,7 @@ define void @test_into() optsize {
 ; ATOM-LABEL: test_into:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    into # sched: [0:?]
+; ATOM-NEXT:    into # sched: [6:3.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;

Modified: llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll?rev=329837&r1=329836&r2=329837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll Wed Apr 11 11:23:01 2018
@@ -15737,7 +15737,7 @@ define void @test_ud2() optsize {
 ; ATOM-LABEL: test_ud2:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    ud2 # sched: [0:?]
+; ATOM-NEXT:    ud2 # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;

Modified: llvm/trunk/test/CodeGen/X86/select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/select.ll?rev=329837&r1=329836&r2=329837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/select.ll (original)
+++ llvm/trunk/test/CodeGen/X86/select.ll Wed Apr 11 11:23:01 2018
@@ -104,14 +104,23 @@ declare i1 @return_false()
 
 ;; Select between two floating point constants.
 define float @test3(i32 %x) nounwind readnone {
-; CHECK-LABEL: test3:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    leaq {{.*}}(%rip), %rcx
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test3:
+; GENERIC:       ## %bb.0: ## %entry
+; GENERIC-NEXT:    xorl %eax, %eax
+; GENERIC-NEXT:    testl %edi, %edi
+; GENERIC-NEXT:    sete %al
+; GENERIC-NEXT:    leaq {{.*}}(%rip), %rcx
+; GENERIC-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test3:
+; ATOM:       ## %bb.0: ## %entry
+; ATOM-NEXT:    xorl %eax, %eax
+; ATOM-NEXT:    leaq {{.*}}(%rip), %rcx
+; ATOM-NEXT:    testl %edi, %edi
+; ATOM-NEXT:    sete %al
+; ATOM-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ATOM-NEXT:    retq
 ;
 ; MCU-LABEL: test3:
 ; MCU:       # %bb.0: # %entry
@@ -266,15 +275,25 @@ define void @test6(i32 %C, <4 x float>*
 
 ; Select with fp80's
 define x86_fp80 @test7(i32 %tmp8) nounwind {
-; CHECK-LABEL: test7:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    setns %al
-; CHECK-NEXT:    shlq $4, %rax
-; CHECK-NEXT:    leaq {{.*}}(%rip), %rcx
-; CHECK-NEXT:    fldt (%rax,%rcx)
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test7:
+; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    xorl %eax, %eax
+; GENERIC-NEXT:    testl %edi, %edi
+; GENERIC-NEXT:    setns %al
+; GENERIC-NEXT:    shlq $4, %rax
+; GENERIC-NEXT:    leaq {{.*}}(%rip), %rcx
+; GENERIC-NEXT:    fldt (%rax,%rcx)
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test7:
+; ATOM:       ## %bb.0:
+; ATOM-NEXT:    xorl %eax, %eax
+; ATOM-NEXT:    leaq {{.*}}(%rip), %rcx
+; ATOM-NEXT:    testl %edi, %edi
+; ATOM-NEXT:    setns %al
+; ATOM-NEXT:    shlq $4, %rax
+; ATOM-NEXT:    fldt (%rax,%rcx)
+; ATOM-NEXT:    retq
 ;
 ; MCU-LABEL: test7:
 ; MCU:       # %bb.0:
@@ -330,31 +349,32 @@ define void @test8(i1 %c, <6 x i32>* %ds
 ; ATOM-NEXT:    testb $1, %dil
 ; ATOM-NEXT:    jne LBB7_1
 ; ATOM-NEXT:  ## %bb.2:
-; ATOM-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ATOM-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; ATOM-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; ATOM-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; ATOM-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
-; ATOM-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
 ; ATOM-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ATOM-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; ATOM-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; ATOM-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
 ; ATOM-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; ATOM-NEXT:    jmp LBB7_3
 ; ATOM-NEXT:  LBB7_1:
-; ATOM-NEXT:    movd %r9d, %xmm0
+; ATOM-NEXT:    movd %r9d, %xmm1
 ; ATOM-NEXT:    movd %r8d, %xmm2
-; ATOM-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
 ; ATOM-NEXT:    movd %ecx, %xmm3
 ; ATOM-NEXT:    movd %edx, %xmm0
-; ATOM-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; ATOM-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; ATOM-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; ATOM-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
 ; ATOM-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; ATOM-NEXT:  LBB7_3:
-; ATOM-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
 ; ATOM-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; ATOM-NEXT:    pcmpeqd %xmm2, %xmm2
-; ATOM-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
+; ATOM-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
 ; ATOM-NEXT:    paddd %xmm2, %xmm0
 ; ATOM-NEXT:    paddd %xmm2, %xmm1
-; ATOM-NEXT:    movdqa %xmm0, (%rsi)
 ; ATOM-NEXT:    movq %xmm1, 16(%rsi)
+; ATOM-NEXT:    movdqa %xmm0, (%rsi)
 ; ATOM-NEXT:    retq
 ;
 ; MCU-LABEL: test8:
@@ -634,8 +654,8 @@ define noalias i8* @test12(i64 %count) n
 ; ATOM:       ## %bb.0: ## %entry
 ; ATOM-NEXT:    movq %rdi, %rax
 ; ATOM-NEXT:    movl $4, %ecx
-; ATOM-NEXT:    mulq %rcx
 ; ATOM-NEXT:    movq $-1, %rdi
+; ATOM-NEXT:    mulq %rcx
 ; ATOM-NEXT:    cmovnoq %rax, %rdi
 ; ATOM-NEXT:    jmp __Znam ## TAILCALL
 ;
@@ -894,8 +914,8 @@ define void @clamp_i8(i32 %src, i8* %dst
 ; ATOM:       ## %bb.0:
 ; ATOM-NEXT:    cmpl $127, %edi
 ; ATOM-NEXT:    movl $127, %eax
-; ATOM-NEXT:    cmovlel %edi, %eax
 ; ATOM-NEXT:    movb $-128, %cl
+; ATOM-NEXT:    cmovlel %edi, %eax
 ; ATOM-NEXT:    cmpl $-128, %eax
 ; ATOM-NEXT:    jl LBB22_2
 ; ATOM-NEXT:  ## %bb.1:
@@ -946,8 +966,8 @@ define void @clamp(i32 %src, i16* %dst)
 ; ATOM:       ## %bb.0:
 ; ATOM-NEXT:    cmpl $32767, %edi ## imm = 0x7FFF
 ; ATOM-NEXT:    movl $32767, %eax ## imm = 0x7FFF
-; ATOM-NEXT:    cmovlel %edi, %eax
 ; ATOM-NEXT:    movl $32768, %ecx ## imm = 0x8000
+; ATOM-NEXT:    cmovlel %edi, %eax
 ; ATOM-NEXT:    cmpl $-32768, %eax ## imm = 0x8000
 ; ATOM-NEXT:    cmovgel %eax, %ecx
 ; ATOM-NEXT:    movw %cx, (%rsi)

Modified: llvm/trunk/test/CodeGen/X86/sse-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-schedule.ll?rev=329837&r1=329836&r2=329837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll Wed Apr 11 11:23:01 2018
@@ -6133,8 +6133,6 @@ define <4 x float> @test_fnop() nounwind
 ; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_fnop:

Modified: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-schedule.ll?rev=329837&r1=329836&r2=329837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll Wed Apr 11 11:23:01 2018
@@ -4670,10 +4670,10 @@ define i64 @test_movd_64(<2 x i64> %a0,
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00]
 ; ATOM-NEXT:    movq %rdi, %xmm2 # sched: [1:1.00]
-; ATOM-NEXT:    paddq %xmm0, %xmm2 # sched: [2:1.00]
 ; ATOM-NEXT:    paddq %xmm0, %xmm1 # sched: [2:1.00]
-; ATOM-NEXT:    movq %xmm2, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    paddq %xmm0, %xmm2 # sched: [2:1.00]
 ; ATOM-NEXT:    movq %xmm1, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %xmm2, (%rsi) # sched: [1:1.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_movd_64:
@@ -10447,10 +10447,11 @@ define <4 x i32> @test_pshufd(<4 x i32>
 ;
 ; ATOM-LABEL: test_pshufd:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    pshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00]
-; ATOM-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
-; ATOM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
+; ATOM-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [1:1.00]
+; ATOM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pshufd:
@@ -10575,10 +10576,11 @@ define <8 x i16> @test_pshufhw(<8 x i16>
 ;
 ; ATOM-LABEL: test_pshufhw:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    pshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00]
-; ATOM-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; ATOM-NEXT:    paddw %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; ATOM-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00]
+; ATOM-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pshufhw:
@@ -10703,10 +10705,11 @@ define <8 x i16> @test_pshuflw(<8 x i16>
 ;
 ; ATOM-LABEL: test_pshuflw:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    pshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00]
-; ATOM-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; ATOM-NEXT:    paddw %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; ATOM-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00]
+; ATOM-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pshuflw:

Modified: llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-schedule.ll?rev=329837&r1=329836&r2=329837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-schedule.ll Wed Apr 11 11:23:01 2018
@@ -899,10 +899,9 @@ define <2 x double> @test_movddup(<2 x d
 ;
 ; ATOM-LABEL: test_movddup:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    movddup {{.*#+}} xmm1 = mem[0,0] sched: [1:1.00]
-; ATOM-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; ATOM-NEXT:    subpd %xmm0, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; ATOM-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [1:1.00]
+; ATOM-NEXT:    subpd %xmm1, %xmm0 # sched: [6:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_movddup:
@@ -1027,10 +1026,9 @@ define <4 x float> @test_movshdup(<4 x f
 ;
 ; ATOM-LABEL: test_movshdup:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [1:1.00]
-; ATOM-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; ATOM-NEXT:    addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; ATOM-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [1:1.00]
+; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_movshdup:
@@ -1155,10 +1153,9 @@ define <4 x float> @test_movsldup(<4 x f
 ;
 ; ATOM-LABEL: test_movsldup:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [1:1.00]
-; ATOM-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; ATOM-NEXT:    addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; ATOM-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [1:1.00]
+; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_movsldup:

Modified: llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll?rev=329837&r1=329836&r2=329837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll Wed Apr 11 11:23:01 2018
@@ -29,10 +29,11 @@ define <16 x i8> @test_pabsb(<16 x i8> %
 ;
 ; ATOM-LABEL: test_pabsb:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    pabsb (%rdi), %xmm1 # sched: [1:1.00]
-; ATOM-NEXT:    pabsb %xmm0, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    por %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
+; ATOM-NEXT:    pabsb (%rdi), %xmm0 # sched: [1:1.00]
+; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pabsb:
@@ -157,10 +158,11 @@ define <4 x i32> @test_pabsd(<4 x i32> %
 ;
 ; ATOM-LABEL: test_pabsd:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    pabsd (%rdi), %xmm1 # sched: [1:1.00]
-; ATOM-NEXT:    pabsd %xmm0, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    por %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
+; ATOM-NEXT:    pabsd (%rdi), %xmm0 # sched: [1:1.00]
+; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pabsd:
@@ -285,10 +287,11 @@ define <8 x i16> @test_pabsw(<8 x i16> %
 ;
 ; ATOM-LABEL: test_pabsw:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    pabsw (%rdi), %xmm1 # sched: [1:1.00]
-; ATOM-NEXT:    pabsw %xmm0, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    por %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
+; ATOM-NEXT:    pabsw (%rdi), %xmm0 # sched: [1:1.00]
+; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pabsw:

Modified: llvm/trunk/test/CodeGen/X86/x87-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x87-schedule.ll?rev=329837&r1=329836&r2=329837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x87-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x87-schedule.ll Wed Apr 11 11:23:01 2018
@@ -177,10 +177,10 @@ define void @test_fadd(float *%a0, doubl
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fadd %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT:    fadd %st(2) # sched: [0:?]
-; ATOM-NEXT:    fadds (%ecx) # sched: [0:?]
-; ATOM-NEXT:    faddl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fadd %st(0), %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fadd %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fadds (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    faddl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -301,10 +301,10 @@ define void @test_faddp_fiadd(i16 *%a0,
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    faddp %st(1) # sched: [0:?]
-; ATOM-NEXT:    faddp %st(2) # sched: [0:?]
-; ATOM-NEXT:    fiadds (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fiaddl (%eax) # sched: [0:?]
+; ATOM-NEXT:    faddp %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    faddp %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fiadds (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    fiaddl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -421,8 +421,8 @@ define void @test_fbld_fbstp(i8* %a0) op
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fbld (%eax) # sched: [0:?]
-; ATOM-NEXT:    fbstp (%eax) # sched: [0:?]
+; ATOM-NEXT:    fbld (%eax) # sched: [100:0.50]
+; ATOM-NEXT:    fbstp (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -895,10 +895,10 @@ define void @test_fcom(float *%a0, doubl
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fcom %st(1) # sched: [0:?]
-; ATOM-NEXT:    fcom %st(3) # sched: [0:?]
-; ATOM-NEXT:    fcoms (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fcoml (%eax) # sched: [0:?]
+; ATOM-NEXT:    fcom %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fcom %st(3) # sched: [5:5.00]
+; ATOM-NEXT:    fcoms (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    fcoml (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -1020,10 +1020,10 @@ define void @test_fcomp_fcompp(float *%a
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fcomp %st(1) # sched: [0:?]
-; ATOM-NEXT:    fcomp %st(3) # sched: [0:?]
-; ATOM-NEXT:    fcomps (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fcompl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fcomp %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fcomp %st(3) # sched: [5:5.00]
+; ATOM-NEXT:    fcomps (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    fcompl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    fcompp # sched: [1:1.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
@@ -1385,10 +1385,10 @@ define void @test_fdiv(float *%a0, doubl
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fdiv %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT:    fdiv %st(2) # sched: [0:?]
-; ATOM-NEXT:    fdivs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fdivl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fdiv %st(0), %st(1) # sched: [34:17.00]
+; ATOM-NEXT:    fdiv %st(2) # sched: [34:17.00]
+; ATOM-NEXT:    fdivs (%ecx) # sched: [34:17.00]
+; ATOM-NEXT:    fdivl (%eax) # sched: [34:17.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -1509,10 +1509,10 @@ define void @test_fdivp_fidiv(i16 *%a0,
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fdivp %st(1) # sched: [0:?]
-; ATOM-NEXT:    fdivp %st(2) # sched: [0:?]
-; ATOM-NEXT:    fidivs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fidivl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fdivp %st(1) # sched: [34:17.00]
+; ATOM-NEXT:    fdivp %st(2) # sched: [34:17.00]
+; ATOM-NEXT:    fidivs (%ecx) # sched: [34:17.00]
+; ATOM-NEXT:    fidivl (%eax) # sched: [34:17.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -1633,10 +1633,10 @@ define void @test_fdivr(float *%a0, doub
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fdivr %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT:    fdivr %st(2) # sched: [0:?]
-; ATOM-NEXT:    fdivrs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fdivrl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fdivr %st(0), %st(1) # sched: [34:17.00]
+; ATOM-NEXT:    fdivr %st(2) # sched: [34:17.00]
+; ATOM-NEXT:    fdivrs (%ecx) # sched: [34:17.00]
+; ATOM-NEXT:    fdivrl (%eax) # sched: [34:17.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -1757,10 +1757,10 @@ define void @test_fdivrp_fidivr(i16 *%a0
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fdivrp %st(1) # sched: [0:?]
-; ATOM-NEXT:    fdivrp %st(2) # sched: [0:?]
-; ATOM-NEXT:    fidivrs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fidivrl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fdivrp %st(1) # sched: [34:17.00]
+; ATOM-NEXT:    fdivrp %st(2) # sched: [34:17.00]
+; ATOM-NEXT:    fidivrs (%ecx) # sched: [34:17.00]
+; ATOM-NEXT:    fidivrl (%eax) # sched: [34:17.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -1955,10 +1955,10 @@ define void @test_ficom(i16 *%a0, i32 *%
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    ficoms (%ecx) # sched: [0:?]
-; ATOM-NEXT:    ficoml (%eax) # sched: [0:?]
-; ATOM-NEXT:    ficomps (%ecx) # sched: [0:?]
-; ATOM-NEXT:    ficompl (%eax) # sched: [0:?]
+; ATOM-NEXT:    ficoms (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    ficoml (%eax) # sched: [5:5.00]
+; ATOM-NEXT:    ficomps (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    ficompl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -2740,7 +2740,7 @@ define void @test_fldcw_fldenv(i8* %a0)
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
 ; ATOM-NEXT:    fldcw (%eax) # sched: [5:2.50]
-; ATOM-NEXT:    fldenv (%eax) # sched: [0:?]
+; ATOM-NEXT:    fldenv (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -2961,10 +2961,10 @@ define void @test_fmul(float *%a0, doubl
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fmul %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT:    fmul %st(2) # sched: [0:?]
-; ATOM-NEXT:    fmuls (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fmull (%eax) # sched: [0:?]
+; ATOM-NEXT:    fmul %st(0), %st(1) # sched: [4:4.00]
+; ATOM-NEXT:    fmul %st(2) # sched: [4:4.00]
+; ATOM-NEXT:    fmuls (%ecx) # sched: [4:4.00]
+; ATOM-NEXT:    fmull (%eax) # sched: [4:4.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -3085,10 +3085,10 @@ define void @test_fmulp_fimul(i16 *%a0,
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fmulp %st(1) # sched: [0:?]
-; ATOM-NEXT:    fmulp %st(2) # sched: [0:?]
-; ATOM-NEXT:    fimuls (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fimull (%eax) # sched: [0:?]
+; ATOM-NEXT:    fmulp %st(1) # sched: [4:4.00]
+; ATOM-NEXT:    fmulp %st(2) # sched: [4:4.00]
+; ATOM-NEXT:    fimuls (%ecx) # sched: [4:4.00]
+; ATOM-NEXT:    fimull (%eax) # sched: [4:4.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -3584,7 +3584,7 @@ define void @test_frstor(i8* %a0) optsiz
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    frstor (%eax) # sched: [0:?]
+; ATOM-NEXT:    frstor (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -3670,7 +3670,7 @@ define void @test_fsave(i8* %a0) optsize
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
 ; ATOM-NEXT:    wait # sched: [1:0.50]
-; ATOM-NEXT:    fnsave (%eax) # sched: [0:?]
+; ATOM-NEXT:    fnsave (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -3762,7 +3762,7 @@ define void @test_fnsave(i8* %a0) optsiz
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fnsave (%eax) # sched: [0:?]
+; ATOM-NEXT:    fnsave (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -4314,9 +4314,9 @@ define void @test_fstcw_fstenv_fstsw(i8*
 ; ATOM-NEXT:    wait # sched: [1:0.50]
 ; ATOM-NEXT:    fnstcw (%eax) # sched: [8:4.00]
 ; ATOM-NEXT:    wait # sched: [1:0.50]
-; ATOM-NEXT:    fnstenv (%eax) # sched: [0:?]
+; ATOM-NEXT:    fnstenv (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    wait # sched: [1:0.50]
-; ATOM-NEXT:    fnstsw (%eax) # sched: [0:?]
+; ATOM-NEXT:    fnstsw (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -4443,8 +4443,8 @@ define void @test_fnstcw_fnstenv_fnstsw(
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
 ; ATOM-NEXT:    fnstcw (%eax) # sched: [8:4.00]
-; ATOM-NEXT:    fnstenv (%eax) # sched: [0:?]
-; ATOM-NEXT:    fnstsw (%eax) # sched: [0:?]
+; ATOM-NEXT:    fnstenv (%eax) # sched: [100:0.50]
+; ATOM-NEXT:    fnstsw (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -4549,10 +4549,10 @@ define void @test_fsub(float *%a0, doubl
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fsub %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT:    fsub %st(2) # sched: [0:?]
-; ATOM-NEXT:    fsubs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fsubl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fsub %st(0), %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fsub %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fsubs (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    fsubl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -4673,10 +4673,10 @@ define void @test_fsubp_fisub(i16 *%a0,
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fsubp %st(1) # sched: [0:?]
-; ATOM-NEXT:    fsubp %st(2) # sched: [0:?]
-; ATOM-NEXT:    fisubs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fisubl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fsubp %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fsubp %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fisubs (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    fisubl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -4797,10 +4797,10 @@ define void @test_fsubr(float *%a0, doub
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fsubr %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT:    fsubr %st(2) # sched: [0:?]
-; ATOM-NEXT:    fsubrs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fsubrl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fsubr %st(0), %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fsubr %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fsubrs (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    fsubrl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -4921,10 +4921,10 @@ define void @test_fsubrp_fisubr(i16 *%a0
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fsubrp %st(1) # sched: [0:?]
-; ATOM-NEXT:    fsubrp %st(2) # sched: [0:?]
-; ATOM-NEXT:    fisubrs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fisubrl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fsubrp %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fsubrp %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fisubrs (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    fisubrl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;




More information about the llvm-commits mailing list