[llvm] r190717 - Adds support for Atom Silvermont (SLM) - -march=slm

Hal Finkel hfinkel at anl.gov
Fri Sep 13 12:39:52 PDT 2013


----- Original Message -----
> Author: pgurd
> Date: Fri Sep 13 14:23:28 2013
> New Revision: 190717
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=190717&view=rev
> Log:
> Adds support for Atom Silvermont (SLM) - -march=slm
> 
> Implements Instruction scheduler latencies for Silvermont,
> using latencies from the Intel Silvermont Optimization Guide.

Just out of curiosity, when you have this:
+  InstrItinData<IIC_SHD16_REG_IM, [InstrStage<2, [IEC_RSV0]>] >,

do you intend this to mean that the shift occupied the IEC_RSV0 unit, and nothing else can use it for 2 cycles? Or you do mean that the latency is 2 cycles, but you can still issue back-to-back independent shifts?

 -Hal

> 
> Auto detects SLM.
> 
> Turns on post RA scheduler when generating code for SLM.
> 
> 
> 
> Added:
>     llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
> Modified:
>     llvm/trunk/lib/Support/Host.cpp
>     llvm/trunk/lib/Target/X86/X86.td
>     llvm/trunk/lib/Target/X86/X86InstrAVX512.td
>     llvm/trunk/lib/Target/X86/X86InstrArithmetic.td
>     llvm/trunk/lib/Target/X86/X86InstrExtension.td
>     llvm/trunk/lib/Target/X86/X86InstrFormats.td
>     llvm/trunk/lib/Target/X86/X86InstrInfo.td
>     llvm/trunk/lib/Target/X86/X86InstrMMX.td
>     llvm/trunk/lib/Target/X86/X86InstrSSE.td
>     llvm/trunk/lib/Target/X86/X86Schedule.td
>     llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
>     llvm/trunk/lib/Target/X86/X86Subtarget.cpp
>     llvm/trunk/lib/Target/X86/X86Subtarget.h
> 
> Modified: llvm/trunk/lib/Support/Host.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Host.cpp?rev=190717&r1=190716&r2=190717&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Support/Host.cpp (original)
> +++ llvm/trunk/lib/Support/Host.cpp Fri Sep 13 14:23:28 2013
> @@ -265,7 +265,8 @@ std::string sys::getHostCPUName() {
>        case 54: // 32 nm Atom Midview
>          return "atom";
>  
> -      case 55: // Intel Atom Silvermont processors
> +      // Atom Silvermont codes from the Intel software optimization
> guide.
> +      case 55:
>        case 74:
>        case 77:
>          return "slm";
> 
> Modified: llvm/trunk/lib/Target/X86/X86.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=190717&r1=190716&r2=190717&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86.td (original)
> +++ llvm/trunk/lib/Target/X86/X86.td Fri Sep 13 14:23:28 2013
> @@ -166,10 +166,17 @@ include "X86Schedule.td"
>  
>  def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily",
>  "IntelAtom",
>                      "Intel Atom processors">;
> +def ProcIntelSLM  : SubtargetFeature<"slm", "X86ProcFamily",
> "IntelSLM",
> +                    "Intel Silvermont processors">;
>  
>  class Proc<string Name, list<SubtargetFeature> Features>
>   : ProcessorModel<Name, GenericModel, Features>;
>  
> +//class AtomProc<string Name, list<SubtargetFeature> Features>
> +// : ProcessorModel<Name, AtomModel, Features>;
> +//class SLMProc<string Name, list<SubtargetFeature> Features>
> +// : ProcessorModel<Name, SLMModel, Features>;
> +
>  def : Proc<"generic",         []>;
>  def : Proc<"i386",            []>;
>  def : Proc<"i486",            []>;
> @@ -209,15 +216,14 @@ def : ProcessorModel<"atom", AtomModel,
>                        FeatureLEAUsesAG,
>                        FeaturePadShortFunctions]>;
>  
> -// Silvermont.
> -def : ProcessorModel<"slm", AtomModel,
> -                     [ProcIntelAtom, FeatureSSE42,
> FeatureCMPXCHG16B,
> -                      FeatureMOVBE, FeatureSlowBTMem,
> FeatureLeaForSP,
> -                      FeatureSlowDivide,
> -                      FeatureCallRegIndirect,
> -                      FeatureLEAUsesAG,
> -                      FeaturePadShortFunctions]>;
> -
> +// Atom Silvermont.
> +def : ProcessorModel<"slm",  SLMModel, [ProcIntelSLM,
> +                               FeatureSSE42, FeatureCMPXCHG16B,
> +                               FeatureMOVBE, FeaturePOPCNT,
> +                               FeaturePCLMUL, FeatureAES,
> +                               FeatureCallRegIndirect,
> +                               FeaturePRFCHW,
> +                               FeatureSlowBTMem]>;
>  // "Arrandale" along with corei3 and corei5
>  def : ProcessorModel<"corei7", SandyBridgeModel,
>                       [FeatureSSE42, FeatureCMPXCHG16B,
>                       FeatureSlowBTMem,
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=190717&r1=190716&r2=190717&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Fri Sep 13 14:23:28
> 2013
> @@ -642,12 +642,12 @@ multiclass avx512_icmp_packed<bits<8> op
>               (outs KRC:$dst), (ins RC:$src1, RC:$src2),
>               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst,
>               $src1, $src2}"),
>               [(set KRC:$dst, (OpNode (vt RC:$src1), (vt
>               RC:$src2)))],
> -             IIC_SSE_CMPP_RR>, EVEX_4V;
> +             IIC_SSE_ALU_F32P_RR>, EVEX_4V;
>    def rm : AVX512BI<opc, MRMSrcMem,
>               (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
>               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst,
>               $src1, $src2}"),
>               [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag
>               addr:$src2)))],
> -             IIC_SSE_CMPP_RM>, EVEX_4V;
> +             IIC_SSE_ALU_F32P_RM>, EVEX_4V;
>  }
>  
>  defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512,
>  i512mem,
> @@ -677,19 +677,19 @@ multiclass avx512_icmp_cc<bits<8> opc, R
>    def rri : AVX512AIi8<opc, MRMSrcReg,
>               (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
>               [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2),
>               imm:$cc))],
> -             IIC_SSE_CMPP_RR>, EVEX_4V;
> +             IIC_SSE_ALU_F32P_RR>, EVEX_4V;
>    def rmi : AVX512AIi8<opc, MRMSrcMem,
>               (outs KRC:$dst), (ins RC:$src1, x86memop:$src2,
>               CC:$cc), asm,
>               [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag
>               addr:$src2),
> -                              imm:$cc))], IIC_SSE_CMPP_RM>, EVEX_4V;
> +                              imm:$cc))], IIC_SSE_ALU_F32P_RM>,
> EVEX_4V;
>    // Accept explicit immediate argument form instead of comparison
>    code.
>    let neverHasSideEffects = 1 in {
>      def rri_alt : AVX512AIi8<opc, MRMSrcReg,
>                 (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
> -               asm_alt, [], IIC_SSE_CMPP_RR>, EVEX_4V;
> +               asm_alt, [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
>      def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
>                 (outs RC:$dst), (ins RC:$src1, x86memop:$src2,
>                 i8imm:$cc),
> -               asm_alt, [], IIC_SSE_CMPP_RM>, EVEX_4V;
> +               asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
>    }
>  }
>  
> @@ -732,10 +732,10 @@ multiclass avx512_cmp_packed<RegisterCla
>    let neverHasSideEffects = 1 in {
>      def rri_alt : PIi8<0xC2, MRMSrcReg,
>                 (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
> -               asm_alt, [], IIC_SSE_CMPP_RR, d>;
> +               asm_alt, [], IIC_SSE_ALU_F32P_RR, d>;
>      def rmi_alt : PIi8<0xC2, MRMSrcMem,
>                 (outs RC:$dst), (ins RC:$src1, x86memop:$src2,
>                 i8imm:$cc),
> -               asm_alt, [], IIC_SSE_CMPP_RM, d>;
> +               asm_alt, [], IIC_SSE_ALU_F32P_RM, d>;
>    }
>  }
>  
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrArithmetic.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrArithmetic.td?rev=190717&r1=190716&r2=190717&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrArithmetic.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrArithmetic.td Fri Sep 13
> 14:23:28 2013
> @@ -726,20 +726,25 @@ class BinOpRR_RFF<bits<8> opcode, string
>    : BinOpRR<opcode, mnemonic, typeinfo, (outs
>    typeinfo.RegClass:$dst),
>              [(set typeinfo.RegClass:$dst, EFLAGS,
>                    (opnode typeinfo.RegClass:$src1,
>                    typeinfo.RegClass:$src2,
> -                          EFLAGS))], IIC_BIN_NONMEM>;
> +                          EFLAGS))], IIC_BIN_CARRY_NONMEM>;
>  
>  // BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed
>  encoding).
> -class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo
> typeinfo>
> +class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo
> typeinfo,
> +                 InstrItinClass itin = IIC_BIN_NONMEM>
>    : ITy<opcode, MRMSrcReg, typeinfo,
>          (outs typeinfo.RegClass:$dst),
>          (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
> -        mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>,
> +        mnemonic, "{$src2, $dst|$dst, $src2}", [], itin>,
>      Sched<[WriteALU]> {
>    // The disassembler should know about this, but not the asmparser.
>    let isCodeGenOnly = 1;
>    let hasSideEffects = 0;
>  }
>  
> +// BinOpRR_RDD_Rev - Instructions like "adc reg, reg, reg" (reversed
> encoding).
> +class BinOpRR_RFF_Rev<bits<8> opcode, string mnemonic, X86TypeInfo
> typeinfo>
> +  : BinOpRR_Rev<opcode, mnemonic, typeinfo, IIC_BIN_CARRY_NONMEM>;
> +
>  // BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed
>  encoding).
>  class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo
>  typeinfo>
>    : ITy<opcode, MRMSrcReg, typeinfo, (outs),
> @@ -753,10 +758,11 @@ class BinOpRR_F_Rev<bits<8> opcode, stri
>  
>  // BinOpRM - Instructions like "add reg, reg, [mem]".
>  class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
> -              dag outlist, list<dag> pattern>
> +              dag outlist, list<dag> pattern,
> +              InstrItinClass itin = IIC_BIN_MEM>
>    : ITy<opcode, MRMSrcMem, typeinfo, outlist,
>          (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
> -        mnemonic, "{$src2, $src1|$src1, $src2}", pattern,
> IIC_BIN_NONMEM>,
> +        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
>      Sched<[WriteALULd, ReadAfterLd]>;
>  
>  // BinOpRM_R - Instructions like "add reg, reg, [mem]".
> @@ -786,14 +792,15 @@ class BinOpRM_RFF<bits<8> opcode, string
>    : BinOpRM<opcode, mnemonic, typeinfo, (outs
>    typeinfo.RegClass:$dst),
>              [(set typeinfo.RegClass:$dst, EFLAGS,
>              (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode
>              addr:$src2),
> -                    EFLAGS))]>;
> +                    EFLAGS))], IIC_BIN_CARRY_MEM>;
>  
>  // BinOpRI - Instructions like "add reg, reg, imm".
>  class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
> -              Format f, dag outlist, list<dag> pattern>
> +              Format f, dag outlist, list<dag> pattern,
> +              InstrItinClass itin = IIC_BIN_NONMEM>
>    : ITy<opcode, f, typeinfo, outlist,
>          (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
> -        mnemonic, "{$src2, $src1|$src1, $src2}", pattern,
> IIC_BIN_NONMEM>,
> +        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
>      Sched<[WriteALU]> {
>    let ImmT = typeinfo.ImmEncoding;
>  }
> @@ -824,14 +831,15 @@ class BinOpRI_RFF<bits<8> opcode, string
>    : BinOpRI<opcode, mnemonic, typeinfo, f, (outs
>    typeinfo.RegClass:$dst),
>              [(set typeinfo.RegClass:$dst, EFLAGS,
>                  (opnode typeinfo.RegClass:$src1,
>                  typeinfo.ImmOperator:$src2,
> -                        EFLAGS))]>;
> +                        EFLAGS))], IIC_BIN_CARRY_NONMEM>;
>  
>  // BinOpRI8 - Instructions like "add reg, reg, imm8".
>  class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo
>  typeinfo,
> -               Format f, dag outlist, list<dag> pattern>
> +               Format f, dag outlist, list<dag> pattern,
> +               InstrItinClass itin = IIC_BIN_NONMEM>
>    : ITy<opcode, f, typeinfo, outlist,
>          (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
> -        mnemonic, "{$src2, $src1|$src1, $src2}", pattern,
> IIC_BIN_NONMEM>,
> +        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
>      Sched<[WriteALU]> {
>    let ImmT = Imm8; // Always 8-bit immediate.
>  }
> @@ -863,14 +871,14 @@ class BinOpRI8_RFF<bits<8> opcode, strin
>    : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs
>    typeinfo.RegClass:$dst),
>               [(set typeinfo.RegClass:$dst, EFLAGS,
>                 (opnode typeinfo.RegClass:$src1,
>                 typeinfo.Imm8Operator:$src2,
> -                       EFLAGS))]>;
> +                       EFLAGS))], IIC_BIN_CARRY_NONMEM>;
>  
>  // BinOpMR - Instructions like "add [mem], reg".
>  class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
> -              list<dag> pattern>
> +              list<dag> pattern, InstrItinClass itin = IIC_BIN_MEM>
>    : ITy<opcode, MRMDestMem, typeinfo,
>          (outs), (ins typeinfo.MemOperand:$dst,
>          typeinfo.RegClass:$src),
> -        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
> +        mnemonic, "{$src, $dst|$dst, $src}", pattern, itin>,
>      Sched<[WriteALULd, WriteRMW]>;
>  
>  // BinOpMR_RMW - Instructions like "add [mem], reg".
> @@ -886,7 +894,7 @@ class BinOpMR_RMW_FF<bits<8> opcode, str
>    : BinOpMR<opcode, mnemonic, typeinfo,
>            [(store (opnode (load addr:$dst), typeinfo.RegClass:$src,
>            EFLAGS),
>                    addr:$dst),
> -           (implicit EFLAGS)]>;
> +           (implicit EFLAGS)], IIC_BIN_CARRY_MEM>;
>  
>  // BinOpMR_F - Instructions like "cmp [mem], reg".
>  class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo
>  typeinfo,
> @@ -896,10 +904,11 @@ class BinOpMR_F<bits<8> opcode, string m
>  
>  // BinOpMI - Instructions like "add [mem], imm".
>  class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
> -              Format f, list<dag> pattern, bits<8> opcode = 0x80>
> +              Format f, list<dag> pattern, bits<8> opcode = 0x80,
> +              InstrItinClass itin = IIC_BIN_MEM>
>    : ITy<opcode, f, typeinfo,
>          (outs), (ins typeinfo.MemOperand:$dst,
>          typeinfo.ImmOperand:$src),
> -        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
> +        mnemonic, "{$src, $dst|$dst, $src}", pattern, itin>,
>      Sched<[WriteALULd, WriteRMW]> {
>    let ImmT = typeinfo.ImmEncoding;
>  }
> @@ -917,7 +926,7 @@ class BinOpMI_RMW_FF<string mnemonic, X8
>    : BinOpMI<mnemonic, typeinfo, f,
>              [(store (opnode (typeinfo.VT (load addr:$dst)),
>                              typeinfo.ImmOperator:$src, EFLAGS),
>                              addr:$dst),
> -             (implicit EFLAGS)]>;
> +             (implicit EFLAGS)], 0x80, IIC_BIN_CARRY_MEM>;
>  
>  // BinOpMI_F - Instructions like "cmp [mem], imm".
>  class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
> @@ -929,10 +938,11 @@ class BinOpMI_F<string mnemonic, X86Type
>  
>  // BinOpMI8 - Instructions like "add [mem], imm8".
>  class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
> -               Format f, list<dag> pattern>
> +               Format f, list<dag> pattern,
> +               InstrItinClass itin = IIC_BIN_MEM>
>    : ITy<0x82, f, typeinfo,
>          (outs), (ins typeinfo.MemOperand:$dst,
>          typeinfo.Imm8Operand:$src),
> -        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
> +        mnemonic, "{$src, $dst|$dst, $src}", pattern, itin>,
>      Sched<[WriteALULd, WriteRMW]> {
>    let ImmT = Imm8; // Always 8-bit immediate.
>  }
> @@ -951,7 +961,7 @@ class BinOpMI8_RMW_FF<string mnemonic, X
>    : BinOpMI8<mnemonic, typeinfo, f,
>               [(store (opnode (load addr:$dst),
>                               typeinfo.Imm8Operator:$src, EFLAGS),
>                               addr:$dst),
> -              (implicit EFLAGS)]>;
> +              (implicit EFLAGS)], IIC_BIN_CARRY_MEM>;
>  
>  // BinOpMI8_F - Instructions like "cmp [mem], imm8".
>  class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
> @@ -962,10 +972,11 @@ class BinOpMI8_F<string mnemonic, X86Typ
>  
>  // BinOpAI - Instructions like "add %eax, %eax, imm", that imp-def
>  EFLAGS.
>  class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
> -              Register areg, string operands>
> +              Register areg, string operands,
> +              InstrItinClass itin = IIC_BIN_NONMEM>
>    : ITy<opcode, RawFrm, typeinfo,
>          (outs), (ins typeinfo.ImmOperand:$src),
> -        mnemonic, operands, []>, Sched<[WriteALU]> {
> +        mnemonic, operands, [], itin>, Sched<[WriteALU]> {
>    let ImmT = typeinfo.ImmEncoding;
>    let Uses = [areg];
>    let Defs = [areg, EFLAGS];
> @@ -976,7 +987,8 @@ class BinOpAI<bits<8> opcode, string mne
>  // and use EFLAGS.
>  class BinOpAI_FF<bits<8> opcode, string mnemonic, X86TypeInfo
>  typeinfo,
>                  Register areg, string operands>
> -  : BinOpAI<opcode, mnemonic, typeinfo, areg, operands> {
> +  : BinOpAI<opcode, mnemonic, typeinfo, areg, operands,
> +            IIC_BIN_CARRY_NONMEM> {
>    let Uses = [areg, EFLAGS];
>  }
>  
> @@ -1070,10 +1082,10 @@ multiclass ArithBinOp_RFF<bits<8> BaseOp
>          def NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64,
>          opnode>;
>        } // isCommutable
>  
> -      def NAME#8rr_REV  : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
> -      def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
> -      def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
> -      def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
> +      def NAME#8rr_REV  : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi8>;
> +      def NAME#16rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi16>;
> +      def NAME#32rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi32>;
> +      def NAME#64rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi64>;
>  
>        def NAME#8rm   : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 ,
>        opnode>;
>        def NAME#16rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16,
>        opnode>;
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrExtension.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrExtension.td?rev=190717&r1=190716&r2=190717&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrExtension.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrExtension.td Fri Sep 13
> 14:23:28 2013
> @@ -14,26 +14,26 @@
>  let neverHasSideEffects = 1 in {
>    let Defs = [AX], Uses = [AL] in
>    def CBW : I<0x98, RawFrm, (outs), (ins),
> -              "{cbtw|cbw}", []>, OpSize;   // AX = signext(AL)
> +              "{cbtw|cbw}", [], IIC_CBW>, OpSize;   // AX =
> signext(AL)
>    let Defs = [EAX], Uses = [AX] in
>    def CWDE : I<0x98, RawFrm, (outs), (ins),
> -              "{cwtl|cwde}", []>;   // EAX = signext(AX)
> +              "{cwtl|cwde}", [], IIC_CBW>;   // EAX = signext(AX)
>  
>    let Defs = [AX,DX], Uses = [AX] in
>    def CWD : I<0x99, RawFrm, (outs), (ins),
> -              "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
> +              "{cwtd|cwd}", [], IIC_CBW>, OpSize; // DX:AX =
> signext(AX)
>    let Defs = [EAX,EDX], Uses = [EAX] in
>    def CDQ : I<0x99, RawFrm, (outs), (ins),
> -              "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
> +              "{cltd|cdq}", [], IIC_CBW>; // EDX:EAX = signext(EAX)
>  
>  
>    let Defs = [RAX], Uses = [EAX] in
>    def CDQE : RI<0x98, RawFrm, (outs), (ins),
> -               "{cltq|cdqe}", []>;     // RAX = signext(EAX)
> +               "{cltq|cdqe}", [], IIC_CBW>;     // RAX =
> signext(EAX)
>  
>    let Defs = [RAX,RDX], Uses = [RAX] in
>    def CQO  : RI<0x99, RawFrm, (outs), (ins),
> -                "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
> +                "{cqto|cqo}", [], IIC_CBW>; // RDX:RAX =
> signext(RAX)
>  }
>  
>  
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrFormats.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFormats.td?rev=190717&r1=190716&r2=190717&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrFormats.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrFormats.td Fri Sep 13 14:23:28
> 2013
> @@ -671,7 +671,7 @@ class AVX512FMA3<bits<8> o, Format F, da
>  // AES8I
>  // These use the same encoding as the SSE4.2 T8 and TA encodings.
>  class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
> -            list<dag>pattern, InstrItinClass itin = NoItinerary>
> +            list<dag>pattern, InstrItinClass itin = IIC_AES>
>        : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
>          Requires<[HasAES]>;
>  
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=190717&r1=190716&r2=190717&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Fri Sep 13 14:23:28
> 2013
> @@ -977,53 +977,56 @@ let Defs = [EFLAGS] in {
>  def BSF16rr  : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
>                   "bsf{w}\t{$src, $dst|$dst, $src}",
>                   [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))],
> -                  IIC_BSF>, TB, OpSize, Sched<[WriteShift]>;
> +                  IIC_BIT_SCAN_REG>, TB, OpSize,
> Sched<[WriteShift]>;
>  def BSF16rm  : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins
>  i16mem:$src),
>                   "bsf{w}\t{$src, $dst|$dst, $src}",
>                   [(set GR16:$dst, EFLAGS, (X86bsf (loadi16
>                   addr:$src)))],
> -                  IIC_BSF>, TB, OpSize, Sched<[WriteShiftLd]>;
> +                  IIC_BIT_SCAN_MEM>, TB, OpSize,
> Sched<[WriteShiftLd]>;
>  def BSF32rr  : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
>                   "bsf{l}\t{$src, $dst|$dst, $src}",
> -                 [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))],
> IIC_BSF>, TB,
> +                 [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))],
> +                 IIC_BIT_SCAN_REG>, TB,
>                 Sched<[WriteShift]>;
>  def BSF32rm  : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins
>  i32mem:$src),
>                   "bsf{l}\t{$src, $dst|$dst, $src}",
>                   [(set GR32:$dst, EFLAGS, (X86bsf (loadi32
>                   addr:$src)))],
> -                 IIC_BSF>, TB, Sched<[WriteShiftLd]>;
> +                 IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>;
>  def BSF64rr  : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins
>  GR64:$src),
>                    "bsf{q}\t{$src, $dst|$dst, $src}",
>                    [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))],
> -                  IIC_BSF>, TB, Sched<[WriteShift]>;
> +                  IIC_BIT_SCAN_REG>, TB, Sched<[WriteShift]>;
>  def BSF64rm  : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins
>  i64mem:$src),
>                    "bsf{q}\t{$src, $dst|$dst, $src}",
>                    [(set GR64:$dst, EFLAGS, (X86bsf (loadi64
>                    addr:$src)))],
> -                  IIC_BSF>, TB, Sched<[WriteShiftLd]>;
> +                  IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>;
>  
>  def BSR16rr  : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
>                   "bsr{w}\t{$src, $dst|$dst, $src}",
> -                 [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))],
> IIC_BSR>,
> +                 [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))],
> +                 IIC_BIT_SCAN_REG>,
>                   TB, OpSize, Sched<[WriteShift]>;
>  def BSR16rm  : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins
>  i16mem:$src),
>                   "bsr{w}\t{$src, $dst|$dst, $src}",
>                   [(set GR16:$dst, EFLAGS, (X86bsr (loadi16
>                   addr:$src)))],
> -                 IIC_BSR>, TB,
> +                 IIC_BIT_SCAN_MEM>, TB,
>                   OpSize, Sched<[WriteShiftLd]>;
>  def BSR32rr  : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
>                   "bsr{l}\t{$src, $dst|$dst, $src}",
> -                 [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))],
> IIC_BSR>, TB,
> +                 [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))],
> +                 IIC_BIT_SCAN_REG>, TB,
>                 Sched<[WriteShift]>;
>  def BSR32rm  : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins
>  i32mem:$src),
>                   "bsr{l}\t{$src, $dst|$dst, $src}",
>                   [(set GR32:$dst, EFLAGS, (X86bsr (loadi32
>                   addr:$src)))],
> -                 IIC_BSR>, TB, Sched<[WriteShiftLd]>;
> +                 IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>;
>  def BSR64rr  : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins
>  GR64:$src),
>                    "bsr{q}\t{$src, $dst|$dst, $src}",
> -                  [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))],
> IIC_BSR>, TB,
> +                  [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))],
> IIC_BIT_SCAN_REG>, TB,
>                 Sched<[WriteShift]>;
>  def BSR64rm  : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins
>  i64mem:$src),
>                    "bsr{q}\t{$src, $dst|$dst, $src}",
>                    [(set GR64:$dst, EFLAGS, (X86bsr (loadi64
>                    addr:$src)))],
> -                  IIC_BSR>, TB, Sched<[WriteShiftLd]>;
> +                  IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>;
>  } // Defs = [EFLAGS]
>  
>  let SchedRW = [WriteMicrocoded] in {
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrMMX.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrMMX.td?rev=190717&r1=190716&r2=190717&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrMMX.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrMMX.td Fri Sep 13 14:23:28 2013
> @@ -204,7 +204,7 @@ multiclass sse12_cvt_pint_3addr<bits<8>
>  //===----------------------------------------------------------------------===//
>  
>  def MMX_EMMS  : MMXI<0x77, RawFrm, (outs), (ins), "emms",
> -                     [(int_x86_mmx_emms)]>;
> +                     [(int_x86_mmx_emms)], IIC_MMX_EMMS>;
>  
>  //===----------------------------------------------------------------------===//
>  // MMX Scalar Instructions
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=190717&r1=190716&r2=190717&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Sep 13 14:23:28 2013
> @@ -151,6 +151,34 @@ def SSE_MOVU_ITINS : OpndItins<
>    IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM
>  >;
>  
> +def SSE_DPPD_ITINS : OpndItins<
> +  IIC_SSE_DPPD_RR, IIC_SSE_DPPD_RM
> +>;
> +
> +def SSE_DPPS_ITINS : OpndItins<
> +  IIC_SSE_DPPS_RR, IIC_SSE_DPPD_RM
> +>;
> +
> +def DEFAULT_ITINS : OpndItins<
> +  IIC_ALU_NONMEM, IIC_ALU_MEM
> +>;
> +
> +def SSE_EXTRACT_ITINS : OpndItins<
> +  IIC_SSE_EXTRACTPS_RR, IIC_SSE_EXTRACTPS_RM
> +>;
> +
> +def SSE_INSERT_ITINS : OpndItins<
> +  IIC_SSE_INSERTPS_RR, IIC_SSE_INSERTPS_RM
> +>;
> +
> +def SSE_MPSADBW_ITINS : OpndItins<
> +  IIC_SSE_MPSADBW_RR, IIC_SSE_MPSADBW_RM
> +>;
> +
> +def SSE_PMULLD_ITINS : OpndItins<
> +  IIC_SSE_PMULLD_RR, IIC_SSE_PMULLD_RM
> +>;
> +
>  //===----------------------------------------------------------------------===//
>  // SSE 1 & 2 Instructions Classes
>  //===----------------------------------------------------------------------===//
> @@ -2307,7 +2335,7 @@ let Constraints = "$src1 = $dst" in {
>    defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmpsd, f64,
>    loadf64,
>                    "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
>                    "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
> -                  SSE_ALU_F32S>, // same latency as 32 bit compare
> +                  SSE_ALU_F64S>,
>                    XD;
>  }
>  
> @@ -2342,7 +2370,7 @@ let Constraints = "$src1 = $dst" in {
>                         SSE_ALU_F32S>, XS;
>    defm Int_CMPSD  : sse12_cmp_scalar_int<f64mem, SSECC,
>    int_x86_sse2_cmp_sd,
>                         "cmp${cc}sd\t{$src, $dst|$dst, $src}",
> -                       SSE_ALU_F32S>, // same latency as f32
> +                       SSE_ALU_F64S>,
>                         XD;
>  }
>  
> @@ -2411,26 +2439,27 @@ let Defs = [EFLAGS] in {
>  // sse12_cmp_packed - sse 1 & 2 compare packed instructions
>  multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand
>  x86memop,
>                              Operand CC, Intrinsic Int, string asm,
> -                            string asm_alt, Domain d> {
> +                            string asm_alt, Domain d,
> +                            OpndItins itins = SSE_ALU_F32P> {
>    def rri : PIi8<0xC2, MRMSrcReg,
>               (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
>               [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
> -             IIC_SSE_CMPP_RR, d>,
> +             itins.rr, d>,
>              Sched<[WriteFAdd]>;
>    def rmi : PIi8<0xC2, MRMSrcMem,
>               (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc),
>               asm,
>               [(set RC:$dst, (Int RC:$src1, (memop addr:$src2),
>               imm:$cc))],
> -             IIC_SSE_CMPP_RM, d>,
> +             itins.rm, d>,
>              Sched<[WriteFAddLd, ReadAfterLd]>;
>  
>    // Accept explicit immediate argument form instead of comparison
>    code.
>    let neverHasSideEffects = 1 in {
>      def rri_alt : PIi8<0xC2, MRMSrcReg,
>                 (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
> -               asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>;
> +               asm_alt, [], itins.rr, d>, Sched<[WriteFAdd]>;
>      def rmi_alt : PIi8<0xC2, MRMSrcMem,
>                 (outs RC:$dst), (ins RC:$src1, x86memop:$src2,
>                 i8imm:$cc),
> -               asm_alt, [], IIC_SSE_CMPP_RM, d>,
> +               asm_alt, [], itins.rm, d>,
>                 Sched<[WriteFAddLd, ReadAfterLd]>;
>    }
>  }
> @@ -2455,11 +2484,11 @@ let Constraints = "$src1 = $dst" in {
>    defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC,
>    int_x86_sse_cmp_ps,
>                   "cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
>                   "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
> -                 SSEPackedSingle>, TB;
> +                 SSEPackedSingle, SSE_ALU_F32P>, TB;
>    defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC,
>    int_x86_sse2_cmp_pd,
>                   "cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
>                   "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
> -                 SSEPackedDouble>, TB, OpSize;
> +                 SSEPackedDouble, SSE_ALU_F64P>, TB, OpSize;
>  }
>  
>  let Predicates = [HasAVX] in {
> @@ -3830,7 +3859,7 @@ defm PAVGB   : PDI_binop_all_int<0xE0, "
>  defm PAVGW   : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
>                                   int_x86_avx2_pavg_w,
>                                   SSE_INTALU_ITINS_P, 1>;
>  defm PSADBW  : PDI_binop_all_int<0xF6, "psadbw",
>  int_x86_sse2_psad_bw,
> -                                 int_x86_avx2_psad_bw,
> SSE_INTALU_ITINS_P, 1>;
> +                                 int_x86_avx2_psad_bw, SSE_PMADD,
> 1>;
>  
>  let Predicates = [HasAVX] in
>  defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64,
>  v4i32, VR128,
> @@ -3974,12 +4003,14 @@ let ExeDomain = SSEPackedInt, SchedRW =
>                         (outs VR128:$dst), (ins VR128:$src1,
>                         i32i8imm:$src2),
>                         "pslldq\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
> -                         (int_x86_sse2_psll_dq_bs VR128:$src1,
> imm:$src2))]>;
> +                         (int_x86_sse2_psll_dq_bs VR128:$src1,
> imm:$src2))],
> +                         IIC_SSE_INTSHDQ_P_RI>;
>    def PSRLDQri : PDIi8<0x73, MRM3r,
>                         (outs VR128:$dst), (ins VR128:$src1,
>                         i32i8imm:$src2),
>                         "psrldq\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
> -                         (int_x86_sse2_psrl_dq_bs VR128:$src1,
> imm:$src2))]>;
> +                         (int_x86_sse2_psrl_dq_bs VR128:$src1,
> imm:$src2))],
> +                         IIC_SSE_INTSHDQ_P_RI>;
>    // PSRADQri doesn't exist in SSE[1-3].
>  }
>  } // Constraints = "$src1 = $dst"
> @@ -4063,14 +4094,14 @@ let Predicates = [HasAVX] in {
>                                   "\t{$src2, $src1, $dst|$dst, $src1,
>                                   $src2}"),
>                        [(set VR128:$dst,
>                          (vt128 (OpNode VR128:$src1, (i8
>                          imm:$src2))))],
> -                      IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>;
> +                      IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>;
>    def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
>                        (ins i128mem:$src1, i8imm:$src2),
>                        !strconcat("v", OpcodeStr,
>                                   "\t{$src2, $src1, $dst|$dst, $src1,
>                                   $src2}"),
>                       [(set VR128:$dst,
>                         (vt128 (OpNode (bitconvert (memopv2i64
>                         addr:$src1)),
> -                        (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX,
> +                        (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX,
>                    Sched<[WriteShuffleLd]>;
>  }
>  
> @@ -4081,14 +4112,14 @@ let Predicates = [HasAVX2] in {
>                                    "\t{$src2, $src1, $dst|$dst,
>                                    $src1, $src2}"),
>                         [(set VR256:$dst,
>                           (vt256 (OpNode VR256:$src1, (i8
>                           imm:$src2))))],
> -                       IIC_SSE_PSHUF>, VEX, VEX_L,
> Sched<[WriteShuffle]>;
> +                       IIC_SSE_PSHUF_RI>, VEX, VEX_L,
> Sched<[WriteShuffle]>;
>    def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
>                         (ins i256mem:$src1, i8imm:$src2),
>                         !strconcat("v", OpcodeStr,
>                                    "\t{$src2, $src1, $dst|$dst,
>                                    $src1, $src2}"),
>                        [(set VR256:$dst,
>                          (vt256 (OpNode (bitconvert (memopv4i64
>                          addr:$src1)),
> -                         (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX,
> VEX_L,
> +                         (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX,
> VEX_L,
>                     Sched<[WriteShuffleLd]>;
>  }
>  
> @@ -4099,14 +4130,14 @@ let Predicates = [UseSSE2] in {
>                            "\t{$src2, $src1, $dst|$dst, $src1,
>                            $src2}"),
>                  [(set VR128:$dst,
>                    (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
> -                IIC_SSE_PSHUF>, Sched<[WriteShuffle]>;
> +                IIC_SSE_PSHUF_RI>, Sched<[WriteShuffle]>;
>    def mi : Ii8<0x70, MRMSrcMem,
>                 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
>                 !strconcat(OpcodeStr,
>                            "\t{$src2, $src1, $dst|$dst, $src1,
>                            $src2}"),
>                  [(set VR128:$dst,
>                    (vt128 (OpNode (bitconvert (memopv2i64
>                    addr:$src1)),
> -                          (i8 imm:$src2))))], IIC_SSE_PSHUF>,
> +                          (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>,
>             Sched<[WriteShuffleLd]>;
>  }
>  }
> @@ -5382,7 +5413,7 @@ multiclass ssse3_palignr<string asm, bit
>          !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2,
>          $src3}"),
>          !strconcat(asm,
>                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2,
>                    $src3}")),
> -      [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>;
> +      [], IIC_SSE_PALIGNRR>, OpSize, Sched<[WriteShuffle]>;
>    let mayLoad = 1 in
>    def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
>        (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
> @@ -5390,7 +5421,7 @@ multiclass ssse3_palignr<string asm, bit
>          !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2,
>          $src3}"),
>          !strconcat(asm,
>                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2,
>                    $src3}")),
> -      [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd,
> ReadAfterLd]>;
> +      [], IIC_SSE_PALIGNRM>, OpSize, Sched<[WriteShuffleLd,
> ReadAfterLd]>;
>    }
>  }
>  
> @@ -5482,16 +5513,17 @@ def : InstAlias<"monitor\t{%rax, %rcx, %
>  // SSE4.1 - Packed Move with Sign/Zero Extend
>  //===----------------------------------------------------------------------===//
>  
> -multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr,
> Intrinsic IntId> {
> +multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr,
> Intrinsic IntId,
> +                               OpndItins itins = DEFAULT_ITINS> {
>    def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins
>    VR128:$src),
>                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
> -                 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
> +                 [(set VR128:$dst, (IntId VR128:$src))], itins.rr>,
> OpSize;
>  
>    def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins
>    i64mem:$src),
>                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>         [(set VR128:$dst,
> -         (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64
> addr:$src))))))]>,
> -       OpSize;
> +         (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64
> addr:$src))))))],
> +         itins.rm>, OpSize;
>  }
>  
>  multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr,
> @@ -5502,22 +5534,23 @@ multiclass SS41I_binop_rm_int16_y<bits<8
>  
>    def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins
>    i128mem:$src),
>                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst,
>                    $src}"),
> -                  [(set VR256:$dst, (IntId (load addr:$src)))]>,
> OpSize;
> +                  [(set VR256:$dst, (IntId (load addr:$src)))]>,
> +                  OpSize;
>  }
>  
>  let Predicates = [HasAVX] in {
> -defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw",
> int_x86_sse41_pmovsxbw>,
> -                                     VEX;
> -defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd",
> int_x86_sse41_pmovsxwd>,
> -                                     VEX;
> -defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq",
> int_x86_sse41_pmovsxdq>,
> -                                     VEX;
> -defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw",
> int_x86_sse41_pmovzxbw>,
> -                                     VEX;
> -defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd",
> int_x86_sse41_pmovzxwd>,
> -                                     VEX;
> -defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq",
> int_x86_sse41_pmovzxdq>,
> -                                     VEX;
> +defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw",
> +                                     int_x86_sse41_pmovsxbw>, VEX;
> +defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd",
> +                                     int_x86_sse41_pmovsxwd>, VEX;
> +defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq",
> +                                     int_x86_sse41_pmovsxdq>, VEX;
> +defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw",
> +                                     int_x86_sse41_pmovzxbw>, VEX;
> +defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd",
> +                                     int_x86_sse41_pmovzxwd>, VEX;
> +defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq",
> +                                     int_x86_sse41_pmovzxdq>, VEX;
>  }
>  
>  let Predicates = [HasAVX2] in {
> @@ -5535,12 +5568,12 @@ defm VPMOVZXDQ : SS41I_binop_rm_int16_y<
>                                          int_x86_avx2_pmovzxdq>, VEX,
>                                          VEX_L;
>  }
>  
> -defm PMOVSXBW   : SS41I_binop_rm_int8<0x20, "pmovsxbw",
> int_x86_sse41_pmovsxbw>;
> -defm PMOVSXWD   : SS41I_binop_rm_int8<0x23, "pmovsxwd",
> int_x86_sse41_pmovsxwd>;
> -defm PMOVSXDQ   : SS41I_binop_rm_int8<0x25, "pmovsxdq",
> int_x86_sse41_pmovsxdq>;
> -defm PMOVZXBW   : SS41I_binop_rm_int8<0x30, "pmovzxbw",
> int_x86_sse41_pmovzxbw>;
> -defm PMOVZXWD   : SS41I_binop_rm_int8<0x33, "pmovzxwd",
> int_x86_sse41_pmovzxwd>;
> -defm PMOVZXDQ   : SS41I_binop_rm_int8<0x35, "pmovzxdq",
> int_x86_sse41_pmovzxdq>;
> +defm PMOVSXBW   : SS41I_binop_rm_int8<0x20, "pmovsxbw",
> int_x86_sse41_pmovsxbw,
>                                       SSE_INTALU_ITINS_P>;
> +defm PMOVSXWD   : SS41I_binop_rm_int8<0x23, "pmovsxwd",
> int_x86_sse41_pmovsxwd,
>                                       SSE_INTALU_ITINS_P>;
> +defm PMOVSXDQ   : SS41I_binop_rm_int8<0x25, "pmovsxdq",
> int_x86_sse41_pmovsxdq,
>                                       SSE_INTALU_ITINS_P>;
> +defm PMOVZXBW   : SS41I_binop_rm_int8<0x30, "pmovzxbw",
> int_x86_sse41_pmovzxbw,
>                                       SSE_INTALU_ITINS_P>;
> +defm PMOVZXWD   : SS41I_binop_rm_int8<0x33, "pmovzxwd",
> int_x86_sse41_pmovzxwd,
>                                       SSE_INTALU_ITINS_P>;
> +defm PMOVZXDQ   : SS41I_binop_rm_int8<0x35, "pmovzxdq",
> int_x86_sse41_pmovzxdq,
>                                       SSE_INTALU_ITINS_P>;
>  
>  let Predicates = [HasAVX] in {
>    // Common patterns involving scalar load.
> @@ -5655,15 +5688,17 @@ let Predicates = [UseSSE41] in {
>  }
>  
>  
> -multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr,
> Intrinsic IntId> {
> +multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr,
> Intrinsic IntId,
> +                               OpndItins itins = DEFAULT_ITINS> {
>    def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins
>    VR128:$src),
>                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
> -                 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
> +                 [(set VR128:$dst, (IntId VR128:$src))], itins.rr>,
> OpSize;
>  
>    def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins
>    i32mem:$src),
>                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>         [(set VR128:$dst,
> -         (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32
> addr:$src))))))]>,
> +         (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32
> addr:$src))))))],
> +         itins.rm>,
>            OpSize;
>  }
>  
> @@ -5702,10 +5737,14 @@ defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0
>                                         int_x86_avx2_pmovzxwq>, VEX,
>                                         VEX_L;
>  }
>  
> -defm PMOVSXBD   : SS41I_binop_rm_int4<0x21, "pmovsxbd",
> int_x86_sse41_pmovsxbd>;
> -defm PMOVSXWQ   : SS41I_binop_rm_int4<0x24, "pmovsxwq",
> int_x86_sse41_pmovsxwq>;
> -defm PMOVZXBD   : SS41I_binop_rm_int4<0x31, "pmovzxbd",
> int_x86_sse41_pmovzxbd>;
> -defm PMOVZXWQ   : SS41I_binop_rm_int4<0x34, "pmovzxwq",
> int_x86_sse41_pmovzxwq>;
> +defm PMOVSXBD   : SS41I_binop_rm_int4<0x21, "pmovsxbd",
> int_x86_sse41_pmovsxbd,
> +                                      SSE_INTALU_ITINS_P>;
> +defm PMOVSXWQ   : SS41I_binop_rm_int4<0x24, "pmovsxwq",
> int_x86_sse41_pmovsxwq,
> +                                      SSE_INTALU_ITINS_P>;
> +defm PMOVZXBD   : SS41I_binop_rm_int4<0x31, "pmovzxbd",
> int_x86_sse41_pmovzxbd,
> +                                      SSE_INTALU_ITINS_P>;
> +defm PMOVZXWQ   : SS41I_binop_rm_int4<0x34, "pmovzxwq",
> int_x86_sse41_pmovzxwq,
> +                                      SSE_INTALU_ITINS_P>;
>  
>  let Predicates = [HasAVX] in {
>    // Common patterns involving scalar load
> @@ -5733,7 +5772,8 @@ let Predicates = [UseSSE41] in {
>              (PMOVZXWQrm addr:$src)>;
>  }
>  
> -multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr,
> Intrinsic IntId> {
> +multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr,
> Intrinsic IntId,
> +                               OpndItins itins = DEFAULT_ITINS> {
>    def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins
>    VR128:$src),
>                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>                   [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
> @@ -5772,8 +5812,10 @@ defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0
>  defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
>                                         int_x86_avx2_pmovzxbq>, VEX,
>                                         VEX_L;
>  }
> -defm PMOVSXBQ   : SS41I_binop_rm_int2<0x22, "pmovsxbq",
> int_x86_sse41_pmovsxbq>;
> -defm PMOVZXBQ   : SS41I_binop_rm_int2<0x32, "pmovzxbq",
> int_x86_sse41_pmovzxbq>;
> +defm PMOVSXBQ   : SS41I_binop_rm_int2<0x22, "pmovsxbq",
> int_x86_sse41_pmovsxbq,
> +                                      SSE_INTALU_ITINS_P>;
> +defm PMOVZXBQ   : SS41I_binop_rm_int2<0x32, "pmovzxbq",
> int_x86_sse41_pmovzxbq,
> +                                      SSE_INTALU_ITINS_P>;
>  
>  let Predicates = [HasAVX2] in {
>    def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr
>    VR128:$src)>;
> @@ -6115,20 +6157,22 @@ defm PEXTRQ      : SS41I_extract64<0x16,
>  
>  /// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg
>  or memory
>  /// destination
> -multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
> +multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr,
> +                            OpndItins itins = DEFAULT_ITINS> {
>    def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
>                   (ins VR128:$src1, i32i8imm:$src2),
>                   !strconcat(OpcodeStr,
>                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
>                   [(set GR32:$dst,
> -                    (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
> imm:$src2))]>,
> +                    (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
> imm:$src2))],
> +                    itins.rr>,
>             OpSize;
>    def mr : SS4AIi8<opc, MRMDestMem, (outs),
>                   (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
>                   !strconcat(OpcodeStr,
>                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
>                   [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
>                   imm:$src2),
> -                          addr:$dst)]>, OpSize;
> +                          addr:$dst)], itins.rm>, OpSize;
>  }
>  
>  let ExeDomain = SSEPackedSingle in {
> @@ -6139,7 +6183,7 @@ let ExeDomain = SSEPackedSingle in {
>                      "vextractps\t{$src2, $src1, $dst|$dst, $src1,
>                      $src2}",
>                      []>, OpSize, VEX;
>    }
> -  defm EXTRACTPS   : SS41I_extractf32<0x17, "extractps">;
> +  defm EXTRACTPS   : SS41I_extractf32<0x17, "extractps",
> SSE_EXTRACT_ITINS>;
>  }
>  
>  // Also match an EXTRACTPS store when the store is done as f32
>  instead of i32.
> @@ -6239,7 +6283,8 @@ let Constraints = "$src1 = $dst" in
>  // are optimized inserts that won't zero arbitrary elements in the
>  destination
>  // vector. The next one matches the intrinsic and could zero
>  arbitrary elements
>  // in the target vector.
> -multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1>
> {
> +multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
> +                           OpndItins itins = DEFAULT_ITINS> {
>    def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
>        (ins VR128:$src1, VR128:$src2, u32u8imm:$src3),
>        !if(Is2Addr,
> @@ -6247,7 +6292,7 @@ multiclass SS41I_insertf32<bits<8> opc,
>          !strconcat(asm,
>                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2,
>                     $src3}")),
>        [(set VR128:$dst,
> -        (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
> +        (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))],
> itins.rr>,
>        OpSize;
>    def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
>        (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
> @@ -6258,14 +6303,14 @@ multiclass SS41I_insertf32<bits<8> opc,
>        [(set VR128:$dst,
>          (X86insrtps VR128:$src1,
>                     (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
> -                    imm:$src3))]>, OpSize;
> +                    imm:$src3))], itins.rm>, OpSize;
>  }
>  
>  let ExeDomain = SSEPackedSingle in {
>    let Predicates = [HasAVX] in
>      defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
>    let Constraints = "$src1 = $dst" in
> -    defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
> +    defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1,
> SSE_INSERT_ITINS>;
>  }
>  
>  //===----------------------------------------------------------------------===//
> @@ -6283,7 +6328,8 @@ let ExeDomain = SSEPackedSingle in {
>                      (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
>                      !strconcat(OpcodeStr,
>                      "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> -                    [(set RC:$dst, (V4F32Int RC:$src1,
> imm:$src2))]>,
> +                    [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))],
> +                    IIC_SSE_ROUNDPS_REG>,
>                      OpSize;
>  
>    // Vector intrinsic operation, mem
> @@ -6292,7 +6338,8 @@ let ExeDomain = SSEPackedSingle in {
>                      !strconcat(OpcodeStr,
>                      "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
>                      [(set RC:$dst,
> -                          (V4F32Int (mem_frag32
> addr:$src1),imm:$src2))]>,
> +                          (V4F32Int (mem_frag32
> addr:$src1),imm:$src2))],
> +                          IIC_SSE_ROUNDPS_MEM>,
>                      OpSize;
>  } // ExeDomain = SSEPackedSingle
>  
> @@ -6302,7 +6349,8 @@ let ExeDomain = SSEPackedDouble in {
>                      (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
>                      !strconcat(OpcodeStr,
>                      "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> -                    [(set RC:$dst, (V2F64Int RC:$src1,
> imm:$src2))]>,
> +                    [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))],
> +                    IIC_SSE_ROUNDPS_REG>,
>                      OpSize;
>  
>    // Vector intrinsic operation, mem
> @@ -6311,7 +6359,8 @@ let ExeDomain = SSEPackedDouble in {
>                      !strconcat(OpcodeStr,
>                      "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
>                      [(set RC:$dst,
> -                          (V2F64Int (mem_frag64
> addr:$src1),imm:$src2))]>,
> +                          (V2F64Int (mem_frag64
> addr:$src1),imm:$src2))],
> +                          IIC_SSE_ROUNDPS_REG>,
>                      OpSize;
>  } // ExeDomain = SSEPackedDouble
>  }
> @@ -6593,30 +6642,33 @@ defm VTESTPDY : avx_bittest<0x0F, "vtest
>  let Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
>    def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins
>    GR16:$src),
>                       "popcnt{w}\t{$src, $dst|$dst, $src}",
> -                     [(set GR16:$dst, (ctpop GR16:$src)), (implicit
> EFLAGS)]>,
> +                     [(set GR16:$dst, (ctpop GR16:$src)), (implicit
> EFLAGS)],
> +                     IIC_SSE_POPCNT_RR>,
>                       OpSize, XS;
>    def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins
>    i16mem:$src),
>                       "popcnt{w}\t{$src, $dst|$dst, $src}",
>                       [(set GR16:$dst, (ctpop (loadi16 addr:$src))),
> -                      (implicit EFLAGS)]>, OpSize, XS;
> +                      (implicit EFLAGS)], IIC_SSE_POPCNT_RM>,
> OpSize, XS;
>  
>    def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins
>    GR32:$src),
>                       "popcnt{l}\t{$src, $dst|$dst, $src}",
> -                     [(set GR32:$dst, (ctpop GR32:$src)), (implicit
> EFLAGS)]>,
> +                     [(set GR32:$dst, (ctpop GR32:$src)), (implicit
> EFLAGS)],
> +                     IIC_SSE_POPCNT_RR>,
>                       XS;
>    def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins
>    i32mem:$src),
>                       "popcnt{l}\t{$src, $dst|$dst, $src}",
>                       [(set GR32:$dst, (ctpop (loadi32 addr:$src))),
> -                      (implicit EFLAGS)]>, XS;
> +                      (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, XS;
>  
>    def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins
>    GR64:$src),
>                        "popcnt{q}\t{$src, $dst|$dst, $src}",
> -                      [(set GR64:$dst, (ctpop GR64:$src)), (implicit
> EFLAGS)]>,
> +                      [(set GR64:$dst, (ctpop GR64:$src)), (implicit
> EFLAGS)],
> +                      IIC_SSE_POPCNT_RR>,
>                        XS;
>    def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins
>    i64mem:$src),
>                        "popcnt{q}\t{$src, $dst|$dst, $src}",
>                        [(set GR64:$dst, (ctpop (loadi64 addr:$src))),
> -                       (implicit EFLAGS)]>, XS;
> +                       (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, XS;
>  }
>  
>  
> @@ -6644,14 +6696,16 @@ defm PHMINPOSUW : SS41I_unop_rm_int_v16
>  
>  /// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
>  multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
> -                              Intrinsic IntId128, bit Is2Addr = 1> {
> +                              Intrinsic IntId128, bit Is2Addr = 1,
> +                              OpndItins itins = DEFAULT_ITINS> {
>    let isCommutable = 1 in
>    def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
>         (ins VR128:$src1, VR128:$src2),
>         !if(Is2Addr,
>             !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
>             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1,
>             $src2}")),
> -       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
> OpSize;
> +       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))],
> +        itins.rr>, OpSize;
>    def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
>         (ins VR128:$src1, i128mem:$src2),
>         !if(Is2Addr,
> @@ -6659,7 +6713,8 @@ multiclass SS41I_binop_rm_int<bits<8> op
>             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1,
>             $src2}")),
>         [(set VR128:$dst,
>           (IntId128 VR128:$src1,
> -          (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
> +          (bitconvert (memopv2i64 addr:$src2))))],
> +          itins.rm>, OpSize;
>  }
>  
>  /// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator
> @@ -6682,7 +6737,8 @@ multiclass SS41I_binop_rm_int_y<bits<8>
>  /// SS48I_binop_rm - Simple SSE41 binary operator.
>  multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode
>  OpNode,
>                            ValueType OpVT, RegisterClass RC, PatFrag
>                            memop_frag,
> -                          X86MemOperand x86memop, bit Is2Addr = 1> {
> +                          X86MemOperand x86memop, bit Is2Addr = 1,
> +                          OpndItins itins = DEFAULT_ITINS> {
>    let isCommutable = 1 in
>    def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
>         (ins RC:$src1, RC:$src2),
> @@ -6752,22 +6808,23 @@ let Constraints = "$src1 = $dst" in {
>    let isCommutable = 0 in
>    defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw",
>    int_x86_sse41_packusdw>;
>    defm PMINSB   : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8,
>    VR128,
> -                                 memopv2i64, i128mem>;
> +                                 memopv2i64, i128mem, 1,
> SSE_INTALU_ITINS_P>;
>    defm PMINSD   : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32,
>    VR128,
> -                                 memopv2i64, i128mem>;
> +                                 memopv2i64, i128mem, 1,
> SSE_INTALU_ITINS_P>;
>    defm PMINUD   : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32,
>    VR128,
> -                                 memopv2i64, i128mem>;
> +                                 memopv2i64, i128mem, 1,
> SSE_INTALU_ITINS_P>;
>    defm PMINUW   : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16,
>    VR128,
> -                                 memopv2i64, i128mem>;
> +                                 memopv2i64, i128mem, 1,
> SSE_INTALU_ITINS_P>;
>    defm PMAXSB   : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8,
>    VR128,
> -                                 memopv2i64, i128mem>;
> +                                 memopv2i64, i128mem, 1,
> SSE_INTALU_ITINS_P>;
>    defm PMAXSD   : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32,
>    VR128,
> -                                 memopv2i64, i128mem>;
> +                                 memopv2i64, i128mem, 1,
> SSE_INTALU_ITINS_P>;
>    defm PMAXUD   : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32,
>    VR128,
> -                                 memopv2i64, i128mem>;
> +                                 memopv2i64, i128mem, 1,
> SSE_INTALU_ITINS_P>;
>    defm PMAXUW   : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16,
>    VR128,
> -                                 memopv2i64, i128mem>;
> -  defm PMULDQ   : SS41I_binop_rm_int<0x28, "pmuldq",
>   int_x86_sse41_pmuldq>;
> +                                 memopv2i64, i128mem, 1,
> SSE_INTALU_ITINS_P>;
> +  defm PMULDQ   : SS41I_binop_rm_int<0x28, "pmuldq",
>   int_x86_sse41_pmuldq,
> +                                     1, SSE_INTMUL_ITINS_P>;
>  }
>  
>  let Predicates = [HasAVX] in {
> @@ -6785,15 +6842,16 @@ let Predicates = [HasAVX2] in {
>  
>  let Constraints = "$src1 = $dst" in {
>    defm PMULLD  : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
> -                                memopv2i64, i128mem>;
> +                                memopv2i64, i128mem, 1,
> SSE_PMULLD_ITINS>;
>    defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64,
>    VR128,
> -                                memopv2i64, i128mem>;
> +                                memopv2i64, i128mem, 1,
> SSE_INTALUQ_ITINS_P>;
>  }
>  
>  /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit
>  immediate
>  multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
>                   Intrinsic IntId, RegisterClass RC, PatFrag
>                   memop_frag,
> -                 X86MemOperand x86memop, bit Is2Addr = 1> {
> +                 X86MemOperand x86memop, bit Is2Addr = 1,
> +                 OpndItins itins = DEFAULT_ITINS> {
>    let isCommutable = 1 in
>    def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
>          (ins RC:$src1, RC:$src2, u32u8imm:$src3),
> @@ -6802,7 +6860,7 @@ multiclass SS41I_binop_rmi_int<bits<8> o
>                  "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
>              !strconcat(OpcodeStr,
>                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2,
>                  $src3}")),
> -        [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
> +        [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))],
> itins.rr>,
>          OpSize;
>    def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
>          (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
> @@ -6813,7 +6871,7 @@ multiclass SS41I_binop_rmi_int<bits<8> o
>                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2,
>                  $src3}")),
>          [(set RC:$dst,
>            (IntId RC:$src1,
> -           (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
> +           (bitconvert (memop_frag addr:$src2)), imm:$src3))],
> itins.rm>,
>          OpSize;
>  }
>  
> @@ -6862,21 +6920,27 @@ let Constraints = "$src1 = $dst" in {
>    let isCommutable = 0 in {
>    let ExeDomain = SSEPackedSingle in
>    defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps",
>    int_x86_sse41_blendps,
> -                                     VR128, memopv4f32, f128mem>;
> +                                     VR128, memopv4f32, f128mem,
> +                                     1, SSE_INTALU_ITINS_P>;
>    let ExeDomain = SSEPackedDouble in
>    defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd",
>    int_x86_sse41_blendpd,
> -                                     VR128, memopv2f64, f128mem>;
> +                                     VR128, memopv2f64, f128mem,
> +                                     1, SSE_INTALU_ITINS_P>;
>    defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw",
>    int_x86_sse41_pblendw,
> -                                     VR128, memopv2i64, i128mem>;
> +                                     VR128, memopv2i64, i128mem,
> +                                     1, SSE_INTALU_ITINS_P>;
>    defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw",
>    int_x86_sse41_mpsadbw,
> -                                     VR128, memopv2i64, i128mem>;
> +                                     VR128, memopv2i64, i128mem,
> +                                     1, SSE_INTMUL_ITINS_P>;
>    }
>    let ExeDomain = SSEPackedSingle in
>    defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
> -                                  VR128, memopv4f32, f128mem>;
> +                                  VR128, memopv4f32, f128mem, 1,
> +                                  SSE_DPPS_ITINS>;
>    let ExeDomain = SSEPackedDouble in
>    defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
> -                                  VR128, memopv2f64, f128mem>;
> +                                  VR128, memopv2f64, f128mem, 1,
> +                                  SSE_DPPD_ITINS>;
>  }
>  
>  /// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
> @@ -6981,13 +7045,14 @@ let Predicates = [HasAVX2] in {
>  /// SS41I_ternary_int - SSE 4.1 ternary operator
>  let Uses = [XMM0], Constraints = "$src1 = $dst" in {
>    multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr,
>    PatFrag mem_frag,
> -                               X86MemOperand x86memop, Intrinsic
> IntId> {
> +                               X86MemOperand x86memop, Intrinsic
> IntId,
> +                               OpndItins itins = DEFAULT_ITINS> {
>      def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
>                      (ins VR128:$src1, VR128:$src2),
>                      !strconcat(OpcodeStr,
>                       "\t{$src2, $dst|$dst, $src2}"),
> -                    [(set VR128:$dst, (IntId VR128:$src1,
> VR128:$src2, XMM0))]>,
> -                    OpSize;
> +                    [(set VR128:$dst, (IntId VR128:$src1,
> VR128:$src2, XMM0))],
> +                    itins.rr>, OpSize;
>  
>      def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
>                      (ins VR128:$src1, x86memop:$src2),
> @@ -6995,7 +7060,8 @@ let Uses = [XMM0], Constraints = "$src1
>                       "\t{$src2, $dst|$dst, $src2}"),
>                      [(set VR128:$dst,
>                        (IntId VR128:$src1,
> -                       (bitconvert (mem_frag addr:$src2)), XMM0))]>,
> OpSize;
> +                       (bitconvert (mem_frag addr:$src2)), XMM0))],
> +                       itins.rm>, OpSize;
>    }
>  }
>  
> @@ -7262,61 +7328,66 @@ let Constraints = "$src1 = $dst" in {
>                        "crc32{b}\t{$src2, $src1|$src1, $src2}",
>                         [(set GR32:$dst,
>                           (int_x86_sse42_crc32_32_8 GR32:$src1,
> -                         (load addr:$src2)))]>;
> +                         (load addr:$src2)))], IIC_CRC32_MEM>;
>    def CRC32r32r8  : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
>                        (ins GR32:$src1, GR8:$src2),
>                        "crc32{b}\t{$src2, $src1|$src1, $src2}",
>                         [(set GR32:$dst,
> -                         (int_x86_sse42_crc32_32_8 GR32:$src1,
> GR8:$src2))]>;
> +                         (int_x86_sse42_crc32_32_8 GR32:$src1,
> GR8:$src2))],
> +                         IIC_CRC32_REG>;
>    def CRC32r32m16  : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
>                        (ins GR32:$src1, i16mem:$src2),
>                        "crc32{w}\t{$src2, $src1|$src1, $src2}",
>                         [(set GR32:$dst,
>                           (int_x86_sse42_crc32_32_16 GR32:$src1,
> -                         (load addr:$src2)))]>,
> +                         (load addr:$src2)))], IIC_CRC32_MEM>,
>                           OpSize;
>    def CRC32r32r16  : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
>                        (ins GR32:$src1, GR16:$src2),
>                        "crc32{w}\t{$src2, $src1|$src1, $src2}",
>                         [(set GR32:$dst,
> -                         (int_x86_sse42_crc32_32_16 GR32:$src1,
> GR16:$src2))]>,
> +                         (int_x86_sse42_crc32_32_16 GR32:$src1,
> GR16:$src2))],
> +                         IIC_CRC32_REG>,
>                           OpSize;
>    def CRC32r32m32  : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
>                        (ins GR32:$src1, i32mem:$src2),
>                        "crc32{l}\t{$src2, $src1|$src1, $src2}",
>                         [(set GR32:$dst,
>                           (int_x86_sse42_crc32_32_32 GR32:$src1,
> -                         (load addr:$src2)))]>;
> +                         (load addr:$src2)))], IIC_CRC32_MEM>;
>    def CRC32r32r32  : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
>                        (ins GR32:$src1, GR32:$src2),
>                        "crc32{l}\t{$src2, $src1|$src1, $src2}",
>                         [(set GR32:$dst,
> -                         (int_x86_sse42_crc32_32_32 GR32:$src1,
> GR32:$src2))]>;
> +                         (int_x86_sse42_crc32_32_32 GR32:$src1,
> GR32:$src2))],
> +                         IIC_CRC32_REG>;
>    def CRC32r64m8  : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
>                        (ins GR64:$src1, i8mem:$src2),
>                        "crc32{b}\t{$src2, $src1|$src1, $src2}",
>                         [(set GR64:$dst,
>                           (int_x86_sse42_crc32_64_8 GR64:$src1,
> -                         (load addr:$src2)))]>,
> +                         (load addr:$src2)))], IIC_CRC32_MEM>,
>                           REX_W;
>    def CRC32r64r8  : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
>                        (ins GR64:$src1, GR8:$src2),
>                        "crc32{b}\t{$src2, $src1|$src1, $src2}",
>                         [(set GR64:$dst,
> -                         (int_x86_sse42_crc32_64_8 GR64:$src1,
> GR8:$src2))]>,
> +                         (int_x86_sse42_crc32_64_8 GR64:$src1,
> GR8:$src2))],
> +                         IIC_CRC32_REG>,
>                           REX_W;
>    def CRC32r64m64  : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
>                        (ins GR64:$src1, i64mem:$src2),
>                        "crc32{q}\t{$src2, $src1|$src1, $src2}",
>                         [(set GR64:$dst,
>                           (int_x86_sse42_crc32_64_64 GR64:$src1,
> -                         (load addr:$src2)))]>,
> +                         (load addr:$src2)))], IIC_CRC32_MEM>,
>                           REX_W;
>    def CRC32r64r64  : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
>                        (ins GR64:$src1, GR64:$src2),
>                        "crc32{q}\t{$src2, $src1|$src1, $src2}",
>                         [(set GR64:$dst,
> -                         (int_x86_sse42_crc32_64_64 GR64:$src1,
> GR64:$src2))]>,
> +                         (int_x86_sse42_crc32_64_64 GR64:$src1,
> GR64:$src2))],
> +                         IIC_CRC32_REG>,
>                           REX_W;
>  }
>  
> @@ -7458,13 +7529,15 @@ def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrc
>             (ins VR128:$src1, VR128:$src2, i8imm:$src3),
>             "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
>             [(set VR128:$dst,
> -             (int_x86_pclmulqdq VR128:$src1, VR128:$src2,
> imm:$src3))]>;
> +             (int_x86_pclmulqdq VR128:$src1, VR128:$src2,
> imm:$src3))],
> +             IIC_SSE_PCLMULQDQ_RR>;
>  
>  def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
>             (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
>             "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
>             [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
> -                              (memopv2i64 addr:$src2),
> imm:$src3))]>;
> +                              (memopv2i64 addr:$src2), imm:$src3))],
> +                              IIC_SSE_PCLMULQDQ_RM>;
>  } // Constraints = "$src1 = $dst"
>  
>  
> 
> Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=190717&r1=190716&r2=190717&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
> +++ llvm/trunk/lib/Target/X86/X86Schedule.td Fri Sep 13 14:23:28 2013
> @@ -141,9 +141,12 @@ def IIC_IDIV64      : InstrItinClass;
>  // neg/not/inc/dec
>  def IIC_UNARY_REG   : InstrItinClass;
>  def IIC_UNARY_MEM   : InstrItinClass;
> -// add/sub/and/or/xor/adc/sbc/cmp/test
> +// add/sub/and/or/xor/sbc/cmp/test
>  def IIC_BIN_MEM     : InstrItinClass;
>  def IIC_BIN_NONMEM  : InstrItinClass;
> +// adc/sbc
> +def IIC_BIN_CARRY_MEM     : InstrItinClass;
> +def IIC_BIN_CARRY_NONMEM  : InstrItinClass;
>  // shift/rotate
>  def IIC_SR          : InstrItinClass;
>  // shift double
> @@ -250,11 +253,11 @@ def IIC_SSE_INTSH_P_RR : InstrItinClass;
>  def IIC_SSE_INTSH_P_RM : InstrItinClass;
>  def IIC_SSE_INTSH_P_RI : InstrItinClass;
>  
> -def IIC_SSE_CMPP_RR : InstrItinClass;
> -def IIC_SSE_CMPP_RM : InstrItinClass;
> +def IIC_SSE_INTSHDQ_P_RI : InstrItinClass;
>  
>  def IIC_SSE_SHUFP : InstrItinClass;
> -def IIC_SSE_PSHUF : InstrItinClass;
> +def IIC_SSE_PSHUF_RI : InstrItinClass;
> +def IIC_SSE_PSHUF_MI : InstrItinClass;
>  
>  def IIC_SSE_UNPCK : InstrItinClass;
>  
> @@ -316,7 +319,8 @@ def IIC_SSE_PSIGN_RM : InstrItinClass;
>  
>  def IIC_SSE_PMADD : InstrItinClass;
>  def IIC_SSE_PMULHRSW : InstrItinClass;
> -def IIC_SSE_PALIGNR : InstrItinClass;
> +def IIC_SSE_PALIGNRR : InstrItinClass;
> +def IIC_SSE_PALIGNRM : InstrItinClass;
>  def IIC_SSE_MWAIT : InstrItinClass;
>  def IIC_SSE_MONITOR : InstrItinClass;
>  
> @@ -492,8 +496,8 @@ def IIC_PUSH_REG : InstrItinClass;
>  def IIC_PUSH_F : InstrItinClass;
>  def IIC_PUSH_A : InstrItinClass;
>  def IIC_BSWAP : InstrItinClass;
> -def IIC_BSF : InstrItinClass;
> -def IIC_BSR : InstrItinClass;
> +def IIC_BIT_SCAN_MEM : InstrItinClass;
> +def IIC_BIT_SCAN_REG : InstrItinClass;
>  def IIC_MOVS : InstrItinClass;
>  def IIC_STOS : InstrItinClass;
>  def IIC_SCAS : InstrItinClass;
> @@ -540,6 +544,33 @@ def IIC_BOUND : InstrItinClass;
>  def IIC_ARPL_REG : InstrItinClass;
>  def IIC_ARPL_MEM : InstrItinClass;
>  def IIC_MOVBE : InstrItinClass;
> +def IIC_AES   : InstrItinClass;
> +def IIC_BLEND_MEM : InstrItinClass;
> +def IIC_BLEND_NOMEM : InstrItinClass;
> +def IIC_CBW   : InstrItinClass;
> +def IIC_CRC32_REG : InstrItinClass;
> +def IIC_CRC32_MEM : InstrItinClass;
> +def IIC_SSE_DPPD_RR : InstrItinClass;
> +def IIC_SSE_DPPD_RM : InstrItinClass;
> +def IIC_SSE_DPPS_RR : InstrItinClass;
> +def IIC_SSE_DPPS_RM : InstrItinClass;
> +def IIC_MMX_EMMS : InstrItinClass;
> +def IIC_SSE_EXTRACTPS_RR : InstrItinClass;
> +def IIC_SSE_EXTRACTPS_RM : InstrItinClass;
> +def IIC_SSE_INSERTPS_RR : InstrItinClass;
> +def IIC_SSE_INSERTPS_RM : InstrItinClass;
> +def IIC_SSE_MPSADBW_RR : InstrItinClass;
> +def IIC_SSE_MPSADBW_RM : InstrItinClass;
> +def IIC_SSE_PMULLD_RR : InstrItinClass;
> +def IIC_SSE_PMULLD_RM : InstrItinClass;
> +def IIC_SSE_ROUNDPS_REG : InstrItinClass;
> +def IIC_SSE_ROUNDPS_MEM : InstrItinClass;
> +def IIC_SSE_ROUNDPD_REG : InstrItinClass;
> +def IIC_SSE_ROUNDPD_MEM : InstrItinClass;
> +def IIC_SSE_POPCNT_RR : InstrItinClass;
> +def IIC_SSE_POPCNT_RM : InstrItinClass;
> +def IIC_SSE_PCLMULQDQ_RR : InstrItinClass;
> +def IIC_SSE_PCLMULQDQ_RM : InstrItinClass;
>  
>  def IIC_NOP : InstrItinClass;
>  
> @@ -561,7 +592,7 @@ def IIC_NOP : InstrItinClass;
>  // latencies. Since these latencies are not used for pipeline
>  hazards,
>  // they do not need to be exact.
>  //
> -// The GenericModel contains no instruciton itineraries.
> +// The GenericModel contains no instruction itineraries.
>  def GenericModel : SchedMachineModel {
>    let IssueWidth = 4;
>    let MicroOpBufferSize = 32;
> @@ -572,3 +603,4 @@ def GenericModel : SchedMachineModel {
>  include "X86ScheduleAtom.td"
>  include "X86SchedSandyBridge.td"
>  include "X86SchedHaswell.td"
> +include "X86ScheduleSLM.td"
> 
> Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=190717&r1=190716&r2=190717&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
> +++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Fri Sep 13 14:23:28
> 2013
> @@ -7,8 +7,8 @@
>  //
>  //===----------------------------------------------------------------------===//
>  //
> -// This file defines the itinerary class data for the Intel Atom
> (Bonnell)
> -// processors.
> +// This file defines the itinerary class data for the Intel Atom
> +// in order (Saltwell-32nm/Bonnell-45nm) processors.
>  //
>  //===----------------------------------------------------------------------===//
>  
> @@ -79,9 +79,12 @@ def AtomItineraries : ProcessorItinerari
>    // neg/not/inc/dec
>    InstrItinData<IIC_UNARY_REG, [InstrStage<1, [Port0, Port1]>] >,
>    InstrItinData<IIC_UNARY_MEM, [InstrStage<1, [Port0]>] >,
> -  // add/sub/and/or/xor/adc/sbc/cmp/test
> +  // add/sub/and/or/xor/cmp/test
>    InstrItinData<IIC_BIN_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
>    InstrItinData<IIC_BIN_MEM, [InstrStage<1, [Port0]>] >,
> +  // adc/sbc
> +  InstrItinData<IIC_BIN_CARRY_NONMEM, [InstrStage<1, [Port0,
> Port1]>] >,
> +  InstrItinData<IIC_BIN_CARRY_MEM, [InstrStage<1, [Port0]>] >,
>    // shift/rotate
>    InstrItinData<IIC_SR, [InstrStage<1, [Port0]>] >,
>    // shift double
> @@ -203,11 +206,11 @@ def AtomItineraries : ProcessorItinerari
>    InstrItinData<IIC_SSE_INTSH_P_RM, [InstrStage<3, [Port0, Port1]>]
>    >,
>    InstrItinData<IIC_SSE_INTSH_P_RI, [InstrStage<1, [Port0, Port1]>]
>    >,
>  
> -  InstrItinData<IIC_SSE_CMPP_RR, [InstrStage<6, [Port0, Port1]>] >,
> -  InstrItinData<IIC_SSE_CMPP_RM, [InstrStage<7, [Port0, Port1]>] >,
> +  InstrItinData<IIC_SSE_INTSHDQ_P_RI, [InstrStage<1, [Port0,
> Port1]>] >,
>  
>    InstrItinData<IIC_SSE_SHUFP, [InstrStage<1, [Port0]>] >,
> -  InstrItinData<IIC_SSE_PSHUF, [InstrStage<1, [Port0]>] >,
> +  InstrItinData<IIC_SSE_PSHUF_RI, [InstrStage<1, [Port0]>] >,
> +  InstrItinData<IIC_SSE_PSHUF_MI, [InstrStage<1, [Port0]>] >,
>  
>    InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >,
>  
> @@ -278,7 +281,8 @@ def AtomItineraries : ProcessorItinerari
>  
>    InstrItinData<IIC_SSE_PMADD, [InstrStage<5, [Port0]>] >,
>    InstrItinData<IIC_SSE_PMULHRSW, [InstrStage<5, [Port0]>] >,
> -  InstrItinData<IIC_SSE_PALIGNR, [InstrStage<1, [Port0]>] >,
> +  InstrItinData<IIC_SSE_PALIGNRR, [InstrStage<1, [Port0]>] >,
> +  InstrItinData<IIC_SSE_PALIGNRM, [InstrStage<1, [Port0]>] >,
>    InstrItinData<IIC_SSE_MWAIT, [InstrStage<46, [Port0, Port1]>] >,
>    InstrItinData<IIC_SSE_MONITOR, [InstrStage<45, [Port0, Port1]>] >,
>  
> @@ -470,8 +474,8 @@ def AtomItineraries : ProcessorItinerari
>    InstrItinData<IIC_PUSH_A, [InstrStage<8, [Port0, Port1]>] >,
>  
>    InstrItinData<IIC_BSWAP, [InstrStage<1, [Port0]>] >,
> -  InstrItinData<IIC_BSF, [InstrStage<16, [Port0, Port1]>] >,
> -  InstrItinData<IIC_BSR, [InstrStage<16, [Port0, Port1]>] >,
> +  InstrItinData<IIC_BIT_SCAN_MEM, [InstrStage<16, [Port0, Port1]>]
> >,
> +  InstrItinData<IIC_BIT_SCAN_REG, [InstrStage<16, [Port0, Port1]>]
> >,
>    InstrItinData<IIC_MOVS, [InstrStage<3, [Port0, Port1]>] >,
>    InstrItinData<IIC_STOS, [InstrStage<1, [Port0, Port1]>] >,
>    InstrItinData<IIC_SCAS, [InstrStage<2, [Port0, Port1]>] >,
> @@ -518,6 +522,8 @@ def AtomItineraries : ProcessorItinerari
>    InstrItinData<IIC_ARPL_REG, [InstrStage<24, [Port0, Port1]>] >,
>    InstrItinData<IIC_ARPL_MEM, [InstrStage<23, [Port0, Port1]>] >,
>    InstrItinData<IIC_MOVBE, [InstrStage<1, [Port0]>] >,
> +  InstrItinData<IIC_CBW, [InstrStage<4, [Port0, Port1]>] >,
> +  InstrItinData<IIC_MMX_EMMS, [InstrStage<5, [Port0, Port1]>] >,
>  
>    InstrItinData<IIC_NOP, [InstrStage<1, [Port0, Port1]>] >
>    ]>;
> 
> Added: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=190717&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (added)
> +++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Fri Sep 13 14:23:28
> 2013
> @@ -0,0 +1,668 @@
> +//===- X86ScheduleSLM.td - X86 Atom Scheduling Definitions -*-
> tablegen -*-==//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open
> Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file defines the itinerary class data for the Intel Atom
> +// (Silvermont) processor.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +def IEC_RSV0 : FuncUnit;
> +def IEC_RSV1 : FuncUnit;
> +def FPC_RSV0 : FuncUnit;
> +def FPC_RSV1 : FuncUnit;
> +def MEC_RSV : FuncUnit;
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +def SLMItineraries : ProcessorItineraries<
> +  [ IEC_RSV0, IEC_RSV1, FPC_RSV0, FPC_RSV1, MEC_RSV ],
> +  [], [
> +  // [InstrStage<N, [FPC_RSV0, FPC_RSV1]>]
> +  // [InstrStage<N, [FPC_RSV0, FPC_RSV1], 0>, InstrStage<N,
> [MEC_RSV]>]
> +  // [InstrStage<N, [IEC_RSV0, IEC_RSV1]>]
> +  // [InstrStage<N, [IEC_RSV0, IEC_RSV1],
> 0>,InstrStage<N,[MEC_RSV]>]
> +  //
> +  // Default is 1 cycle, IEC_RSV0 or IEC_RSV1
> +  //InstrItinData<IIC_DEFAULT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_ALU_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_LEA, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_LEA_16, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  // mul
> +  InstrItinData<IIC_MUL8, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_MUL16_MEM, [InstrStage<4, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +  InstrItinData<IIC_MUL16_REG, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_MUL32_MEM, [InstrStage<3, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<3, [MEC_RSV]>] >,
> +  InstrItinData<IIC_MUL32_REG, [InstrStage<3, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_MUL64, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
> +  // imul by al, ax, eax, rax
> +  InstrItinData<IIC_IMUL8, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_IMUL16_MEM, [InstrStage<6, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<6, [MEC_RSV]>] >,
> +  InstrItinData<IIC_IMUL16_REG, [InstrStage<6, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_IMUL32_MEM, [InstrStage<6, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<6, [MEC_RSV]>] >,
> +  InstrItinData<IIC_IMUL32_REG, [InstrStage<6, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_IMUL64, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
> +  // imul reg by reg|mem
> +  InstrItinData<IIC_IMUL16_RM, [InstrStage<4, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +  InstrItinData<IIC_IMUL16_RR, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_IMUL32_RM, [InstrStage<3, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<3, [MEC_RSV]>] >,
> +  InstrItinData<IIC_IMUL32_RR, [InstrStage<3, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_IMUL64_RM, [InstrStage<4, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +  InstrItinData<IIC_IMUL64_RR, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>]
>  >,
> +  // imul reg = reg/mem * imm
> +  InstrItinData<IIC_IMUL16_RRI, [InstrStage<4, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_IMUL32_RRI, [InstrStage<3, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_IMUL64_RRI, [InstrStage<4, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_IMUL16_RMI, [InstrStage<4, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +  InstrItinData<IIC_IMUL32_RMI, [InstrStage<3, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<3, [MEC_RSV]>] >,
> +  InstrItinData<IIC_IMUL64_RMI, [InstrStage<4, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +  // idiv - min latency
> +  InstrItinData<IIC_IDIV8, [InstrStage<34, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_IDIV16, [InstrStage<35, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_IDIV32, [InstrStage<35, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_IDIV64, [InstrStage<49, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  // div - min latency
> +  InstrItinData<IIC_DIV8_REG, [InstrStage<25, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_DIV8_MEM, [InstrStage<25, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<25, [MEC_RSV]>] >,
> +  InstrItinData<IIC_DIV16, [InstrStage<26, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_DIV32, [InstrStage<26, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_DIV64, [InstrStage<38, [IEC_RSV0, IEC_RSV1]>] >,
> +  // neg/not/inc/dec
> +  InstrItinData<IIC_UNARY_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_UNARY_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  // add/sub/and/or/xor/adc/sbc/cmp/test
> +  InstrItinData<IIC_BIN_NONMEM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_BIN_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  // adc/sbb
> +  InstrItinData<IIC_BIN_CARRY_NONMEM, [InstrStage<2, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_BIN_CARRY_MEM, [InstrStage<2, [IEC_RSV0,
> IEC_RSV1], 0>,
> +                  InstrStage<2, [MEC_RSV]>] >,
> +  // shift/rotate
> +  InstrItinData<IIC_SR, [InstrStage<1, [IEC_RSV0], 0>,
> +                   InstrStage<1, [MEC_RSV]>] >,
> +  // shift double
> +  InstrItinData<IIC_SHD16_REG_IM, [InstrStage<2, [IEC_RSV0]>] >,
> +  InstrItinData<IIC_SHD16_REG_CL, [InstrStage<4, [IEC_RSV0]>] >,
> +  InstrItinData<IIC_SHD16_MEM_IM, [InstrStage<2, [IEC_RSV0], 0>,
> +                   InstrStage<2, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SHD16_MEM_CL, [InstrStage<4, [IEC_RSV0], 0>,
> +                   InstrStage<4, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SHD32_REG_IM, [InstrStage<2, [IEC_RSV0]>] >,
> +  InstrItinData<IIC_SHD32_REG_CL, [InstrStage<4, [IEC_RSV0]>] >,
> +  InstrItinData<IIC_SHD32_MEM_IM, [InstrStage<2, [IEC_RSV0], 0>,
> +                   InstrStage<2, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SHD32_MEM_CL, [InstrStage<4, [IEC_RSV0], 0>,
> +                   InstrStage<4, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SHD64_REG_IM, [InstrStage<2, [IEC_RSV0]>] >,
> +  InstrItinData<IIC_SHD64_REG_CL, [InstrStage<4, [IEC_RSV0]>] >,
> +  InstrItinData<IIC_SHD64_MEM_IM, [InstrStage<2, [IEC_RSV0], 0>,
> +                   InstrStage<2, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SHD64_MEM_CL, [InstrStage<4, [IEC_RSV0], 0>,
> +                   InstrStage<4, [MEC_RSV]>] >,
> +  // cmov
> +  InstrItinData<IIC_CMOV16_RM, [InstrStage<2, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<2, [MEC_RSV]>] >,
> +  InstrItinData<IIC_CMOV16_RR, [InstrStage<2, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_CMOV32_RM, [InstrStage<2, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<2, [MEC_RSV]>] >,
> +  InstrItinData<IIC_CMOV32_RR, [InstrStage<2, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_CMOV64_RM, [InstrStage<2, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<2, [MEC_RSV]>] >,
> +  InstrItinData<IIC_CMOV64_RR, [InstrStage<2, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  // set
> +  InstrItinData<IIC_SET_M, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SET_R, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  // jcc
> +  InstrItinData<IIC_Jcc, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  // jcxz/jecxz/jrcxz
> +  InstrItinData<IIC_JCXZ, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  // jmp rel
> +  InstrItinData<IIC_JMP_REL, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  // jmp indirect
> +  InstrItinData<IIC_JMP_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_JMP_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  // jmp far
> +  InstrItinData<IIC_JMP_FAR_MEM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_JMP_FAR_PTR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  // loop/loope/loopne
> +  InstrItinData<IIC_LOOP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_LOOPE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_LOOPNE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  // call - all but reg/imm
> +  InstrItinData<IIC_CALL_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_CALL_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_CALL_FAR_MEM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_CALL_FAR_PTR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  //ret
> +  InstrItinData<IIC_RET, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_RET_IMM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  //sign extension movs
> +  InstrItinData<IIC_MOVSX, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_MOVSX_R16_R8, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MOVSX_R16_M8, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_MOVSX_R16_R16, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MOVSX_R32_R32, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  //zero extension movs
> +  InstrItinData<IIC_MOVZX, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +
> +  InstrItinData<IIC_REP_MOVS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_REP_STOS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +
> +  // SSE binary operations
> +  // arithmetic fp scalar
> +  InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<3, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_ALU_F32S_RM, [InstrStage<3, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<3, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_ALU_F64S_RR, [InstrStage<3, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_ALU_F64S_RM, [InstrStage<3, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<3, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_MUL_F32S_RR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_MUL_F32S_RM, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_MUL_F64S_RR, [InstrStage<2, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_MUL_F64S_RM, [InstrStage<2, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<2, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_DIV_F32S_RR, [InstrStage<13, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_DIV_F32S_RM, [InstrStage<13, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<13, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_DIV_F64S_RR, [InstrStage<13, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_DIV_F64S_RM, [InstrStage<13, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<13, [MEC_RSV]>] >,
> +
> +  InstrItinData<IIC_SSE_COMIS_RR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_COMIS_RM, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +
> +  InstrItinData<IIC_SSE_HADDSUB_RR, [InstrStage<6, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_HADDSUB_RM, [InstrStage<6, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<6, [MEC_RSV]>] >,
> +
> +  // arithmetic fp parallel
> +  InstrItinData<IIC_SSE_ALU_F32P_RR, [InstrStage<3, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_ALU_F32P_RM, [InstrStage<3, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<3, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_ALU_F64P_RR, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_ALU_F64P_RM, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_MUL_F32P_RR, [InstrStage<2, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_MUL_F32P_RM, [InstrStage<2, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<2, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_MUL_F64P_RR, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_MUL_F64P_RM, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_DIV_F32P_RR, [InstrStage<27, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_DIV_F32P_RM, [InstrStage<27, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<27, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_DIV_F64P_RR, [InstrStage<27, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_DIV_F64P_RM, [InstrStage<27, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<27, [MEC_RSV]>] >,
> +
> +  // bitwise parallel
> +  InstrItinData<IIC_SSE_BIT_P_RR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_BIT_P_RM, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +
> +  // arithmetic int parallel
> +  InstrItinData<IIC_SSE_INTALU_P_RR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_INTALU_P_RM, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_INTALUQ_P_RR, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_INTALUQ_P_RM, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +
> +  // multiply int parallel
> +  InstrItinData<IIC_SSE_INTMUL_P_RR, [InstrStage<5, [FPC_RSV0]>] >,
> +  InstrItinData<IIC_SSE_INTMUL_P_RM, [InstrStage<5, [FPC_RSV0], 0>,
> +                   InstrStage<5, [MEC_RSV]>] >,
> +
> +  // shift parallel
> +  InstrItinData<IIC_SSE_INTSH_P_RR, [InstrStage<2, [FPC_RSV0]>] >,
> +  InstrItinData<IIC_SSE_INTSH_P_RM, [InstrStage<2, [FPC_RSV0], 0>,
> +                   InstrStage<2, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_INTSH_P_RI, [InstrStage<1, [FPC_RSV0]>] >,
> +
> +  InstrItinData<IIC_SSE_INTSHDQ_P_RI, [InstrStage<1, [FPC_RSV0]>] >,
> +
> +  InstrItinData<IIC_SSE_SHUFP, [InstrStage<1, [FPC_RSV0]>] >,
> +  InstrItinData<IIC_SSE_PSHUF_RI, [InstrStage<1, [FPC_RSV0]>] >,
> +  InstrItinData<IIC_SSE_PSHUF_MI, [InstrStage<1, [FPC_RSV0], 0>,
> +                   InstrStage<1, [MEC_RSV]>] >,
> +
> +  InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [FPC_RSV0]>] >,
> +
> +  InstrItinData<IIC_SSE_SQRTPS_RR, [InstrStage<26, [FPC_RSV0]>] >,
> +  InstrItinData<IIC_SSE_SQRTPS_RM, [InstrStage<26, [FPC_RSV0], 0>,
> +                   InstrStage<26, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_SQRTSS_RR, [InstrStage<13, [FPC_RSV0]>] >,
> +  InstrItinData<IIC_SSE_SQRTSS_RM, [InstrStage<13, [FPC_RSV0], 0>,
> +                   InstrStage<13, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_SQRTPD_RR, [InstrStage<26, [FPC_RSV0]>] >,
> +  InstrItinData<IIC_SSE_SQRTPD_RM, [InstrStage<26, [FPC_RSV0], 0>,
> +                   InstrStage<26, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_SQRTSD_RR, [InstrStage<13, [FPC_RSV0]>] >,
> +  InstrItinData<IIC_SSE_SQRTSD_RM, [InstrStage<13, [FPC_RSV0], 0>,
> +                   InstrStage<13, [MEC_RSV]>] >,
> +
> +  InstrItinData<IIC_SSE_RCPP_RR, [InstrStage<9, [FPC_RSV0]>] >,
> +  InstrItinData<IIC_SSE_RCPP_RM, [InstrStage<9, [FPC_RSV0], 0>,
> +                   InstrStage<9, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_RCPS_RR, [InstrStage<4, [FPC_RSV0]>] >,
> +  InstrItinData<IIC_SSE_RCPS_RM, [InstrStage<4, [FPC_RSV0], 0>,
> +                   InstrStage<4, [MEC_RSV]>] >,
> +
> +  InstrItinData<IIC_SSE_MOVMSK, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_MASKMOV, [InstrStage<5, [FPC_RSV0,
> FPC_RSV1]>] >,
> +
> +  InstrItinData<IIC_SSE_PEXTRW, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_PINSRW, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +
> +  InstrItinData<IIC_SSE_PABS_RR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_PABS_RM, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +
> +  InstrItinData<IIC_SSE_MOV_S_RR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_MOV_S_RM, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_MOV_S_MR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +
> +  InstrItinData<IIC_SSE_MOVA_P_RR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_MOVA_P_RM, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_MOVA_P_MR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +
> +  InstrItinData<IIC_SSE_MOVU_P_RR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_MOVU_P_RM, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_MOVU_P_MR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +
> +  InstrItinData<IIC_SSE_MOV_LH, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +
> +  InstrItinData<IIC_SSE_LDDQU, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>]
> >,
> +
> +  InstrItinData<IIC_SSE_MOVDQ, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>]
> >,
> +  InstrItinData<IIC_SSE_MOVD_ToGP, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1], 0>,
> +                  InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_MOVQ_RR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +
> +  InstrItinData<IIC_SSE_MOVNT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +
> +  InstrItinData<IIC_SSE_PREFETCH, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_PAUSE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_SSE_LFENCE, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_MFENCE, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_SFENCE, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_LDMXCSR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_STMXCSR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +
> +  InstrItinData<IIC_SSE_PHADDSUBD_RR, [InstrStage<6, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_PHADDSUBD_RM, [InstrStage<6, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<6, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_PHADDSUBSW_RR, [InstrStage<9, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_PHADDSUBSW_RM, [InstrStage<9, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<9, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_PHADDSUBW_RR, [InstrStage<9, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<9, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_PHADDSUBW_RM, [InstrStage<9, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<9, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_PSHUFB_RR, [InstrStage<5, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_PSHUFB_RM, [InstrStage<5, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<5, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_PSIGN_RR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_PSIGN_RM, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +
> +  InstrItinData<IIC_SSE_PMADD, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>]
> >,
> +  InstrItinData<IIC_SSE_PMULHRSW, [InstrStage<5, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_PALIGNRR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_PALIGNRM, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_MWAIT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_SSE_MONITOR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +
> +  // conversions
> +  // to/from PD ...
> +  InstrItinData<IIC_SSE_CVT_PD_RR, [InstrStage<5, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_CVT_PD_RM, [InstrStage<5, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<5, [MEC_RSV]>] >,
> +  // to/from PS except to/from PD and PS2PI
> +  InstrItinData<IIC_SSE_CVT_PS_RR, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_CVT_PS_RM, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_CVT_Scalar_RR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_CVT_Scalar_RM, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_CVT_SS2SI32_RR, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_CVT_SS2SI32_RM, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +
> +  // MMX MOVs
> +  InstrItinData<IIC_MMX_MOV_MM_RM,  [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_MOV_REG_MM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_MOVQ_RM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_MOVQ_RR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  // other MMX
> +  InstrItinData<IIC_MMX_ALU_RM,  [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_ALU_RR,  [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_ALUQ_RM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_ALUQ_RR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_PHADDSUBW_RM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_PHADDSUBW_RR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_PHADDSUBD_RM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_PHADDSUBD_RR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_PMUL, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_MMX_MISC_FUNC_MEM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_MISC_FUNC_REG, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_PSADBW,   [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_SHIFT_RI, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_SHIFT_RM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_SHIFT_RR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_UNPCK_H_RM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_UNPCK_H_RR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_UNPCK_L, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_PCK_RM,  [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_PCK_RR,  [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_PSHUF,   [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_PEXTR,   [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_PINSRW,  [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_MASKMOV, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  // conversions
> +  // from/to PD
> +  InstrItinData<IIC_MMX_CVT_PD_RR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_CVT_PD_RM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  // from/to PI
> +  InstrItinData<IIC_MMX_CVT_PS_RR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MMX_CVT_PS_RM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +
> +  InstrItinData<IIC_CMPX_LOCK, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +
> +  InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +
> +  InstrItinData<IIC_FILD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FLD,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FLD80, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +
> +  InstrItinData<IIC_FST,   [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FST80, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FIST,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +
> +  InstrItinData<IIC_FLDZ,   [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FUCOM,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FUCOMI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FCOMI,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FNSTSW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FNSTCW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FLDCW,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FNINIT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FFREE,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FNCLEX, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_WAIT,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FXAM,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FNOP,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FLDL,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_F2XM1,  [InstrStage<88, [FPC_RSV0, FPC_RSV1]>]
> >,
> +  InstrItinData<IIC_FYL2X,  [InstrStage<296, [FPC_RSV0, FPC_RSV1]>]
> >,
> +  InstrItinData<IIC_FPTAN,  [InstrStage<281, [FPC_RSV0, FPC_RSV1]>]
> >,
> +  InstrItinData<IIC_FPATAN,  [InstrStage<296, [FPC_RSV0, FPC_RSV1]>]
> >,
> +  InstrItinData<IIC_FXTRACT,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_FPREM1,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_FPSTP,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FPREM,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_FYL2XP1,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_FSINCOS,  [InstrStage<281, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_FRNDINT,  [InstrStage<25, [FPC_RSV0, FPC_RSV1]>]
> >,
> +  InstrItinData<IIC_FSCALE,  [InstrStage<74, [FPC_RSV0, FPC_RSV1]>]
> >,
> +  InstrItinData<IIC_FCOMPP,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_FXSAVE,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_FXRSTOR,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_FXCH, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
> +
> +  // System instructions
> +  InstrItinData<IIC_CPUID, [InstrStage<60, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_INT,   [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_INT3,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_INVD,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_INVLPG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_IRET,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_HLT,   [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_LXS,   [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_LTR,   [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_RDTSC, [InstrStage<30, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_RSM,   [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_SIDT,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_SGDT,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_SLDT,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_STR,    [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_SWAPGS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_SYSCALL, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_SYS_ENTER_EXIT, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +
> +  InstrItinData<IIC_IN_RR,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_IN_RI,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_OUT_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_OUT_IR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_INS,    [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +
> +  InstrItinData<IIC_MOV_REG_DR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MOV_DR_REG, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  // worst case for mov REG_CRx
> +  InstrItinData<IIC_MOV_REG_CR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MOV_CR_REG, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +
> +  InstrItinData<IIC_MOV_REG_SR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MOV_MEM_SR, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MOV_SR_REG, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MOV_SR_MEM, [InstrStage<1, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  // LAR
> +  InstrItinData<IIC_LAR_RM,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_LAR_RR,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  // LSL
> +  InstrItinData<IIC_LSL_RM,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_LSL_RR,  [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +
> +  InstrItinData<IIC_LGDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_LIDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_LLDT_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_LLDT_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  // push control register, segment registers
> +  InstrItinData<IIC_PUSH_CS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_PUSH_SR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  // pop control register, segment registers
> +  InstrItinData<IIC_POP_SR,    [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_POP_SR_SS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  // VERR, VERW
> +  InstrItinData<IIC_VERR,     [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_VERW_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_VERW_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  // WRMSR, RDMSR
> +  InstrItinData<IIC_WRMSR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_RDMSR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_RDPMC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  // SMSW, LMSW
> +  InstrItinData<IIC_SMSW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_LMSW_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_LMSW_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +
> +  InstrItinData<IIC_ENTER, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_LEAVE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +
> +  InstrItinData<IIC_POP_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_POP_REG16, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_POP_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_POP_F, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_POP_FD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_POP_A, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +
> +  InstrItinData<IIC_PUSH_IMM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_PUSH_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_PUSH_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_PUSH_F, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_PUSH_A, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +
> +  InstrItinData<IIC_BSWAP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_BIT_SCAN_MEM, [InstrStage<10, [IEC_RSV0,
> IEC_RSV1], 0>,
> +                  InstrStage<10, [MEC_RSV]>] >,
> +  InstrItinData<IIC_BIT_SCAN_REG, [InstrStage<10, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_MOVS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_STOS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_SCAS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_CMPS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_MOV, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_MOV_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_AHF, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_BT_MI, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_BT_MR, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_BT_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_BT_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_BTX_MI, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_BTX_MR, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_BTX_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_BTX_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_XCHG_REG, [InstrStage<5, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_XCHG_MEM, [InstrStage<5, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<5, [MEC_RSV]>] >,
> +  InstrItinData<IIC_XADD_REG, [InstrStage<5, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_XADD_MEM, [InstrStage<5, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<5, [MEC_RSV]>] >,
> +  InstrItinData<IIC_CMPXCHG_MEM, [InstrStage<6, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_CMPXCHG_REG, [InstrStage<6, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_CMPXCHG_MEM8, [InstrStage<6, [IEC_RSV0,
> IEC_RSV1], 0>,
> +                  InstrStage<6, [MEC_RSV]>] >,
> +  InstrItinData<IIC_CMPXCHG_REG8, [InstrStage<6, [IEC_RSV0,
> IEC_RSV1], 0>,
> +                  InstrStage<6, [MEC_RSV]>] >,
> +  InstrItinData<IIC_CMPXCHG_8B, [InstrStage<6, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_CMPXCHG_16B, [InstrStage<6, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_LODS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_OUTS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_CLC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_CLD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_CLI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_CMC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_CLTS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_STC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_STI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_STD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_XLAT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_AAA, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_AAD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_AAM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_AAS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_DAA, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_DAS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_BOUND, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_ARPL_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_ARPL_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_MOVBE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_AES, [InstrStage<8, [FPC_RSV0]>] >,
> +  InstrItinData<IIC_BLEND_NOMEM, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_BLEND_MEM, [InstrStage<4, [FPC_RSV0, FPC_RSV1],
> 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +  InstrItinData<IIC_BIT_SCAN_MEM, [InstrStage<10, [IEC_RSV0,
> IEC_RSV1], 0>,
> +                  InstrStage<10, [MEC_RSV]>] >,
> +  InstrItinData<IIC_BIT_SCAN_REG, [InstrStage<10, [IEC_RSV0,
> IEC_RSV1]>] >,
> +  InstrItinData<IIC_CBW, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
> +  InstrItinData<IIC_CRC32_REG, [InstrStage<3, [IEC_RSV0, IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_CRC32_MEM, [InstrStage<3, [IEC_RSV0, IEC_RSV1],
> 0>,
> +                  InstrStage<3, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_DPPD_RR, [InstrStage<12, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_DPPD_RM, [InstrStage<12, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<12, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_DPPS_RR, [InstrStage<15, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_DPPS_RM, [InstrStage<15, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<15, [MEC_RSV]>] >,
> +  InstrItinData<IIC_MMX_EMMS, [InstrStage<10, [FPC_RSV0, FPC_RSV1]>]
> >,
> +  InstrItinData<IIC_SSE_EXTRACTPS_RR, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_EXTRACTPS_RM, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_INSERTPS_RR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_INSERTPS_RM, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_MPSADBW_RR, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_MPSADBW_RM, [InstrStage<1, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<1, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_PMULLD_RR, [InstrStage<11, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_PMULLD_RM, [InstrStage<11, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<11, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_ROUNDPS_REG, [InstrStage<5, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_ROUNDPS_MEM, [InstrStage<5, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<5, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_ROUNDPD_REG, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_ROUNDPD_MEM, [InstrStage<4, [FPC_RSV0,
> FPC_RSV1], 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_POPCNT_RR, [InstrStage<4, [IEC_RSV1]>] >,
> +  InstrItinData<IIC_SSE_POPCNT_RM, [InstrStage<4, [IEC_RSV1], 0>,
> +                  InstrStage<4, [MEC_RSV]>] >,
> +  InstrItinData<IIC_SSE_PCLMULQDQ_RR, [InstrStage<10, [IEC_RSV1]>]
> >,
> +  InstrItinData<IIC_SSE_PCLMULQDQ_RM, [InstrStage<10, [IEC_RSV1],
> 0>,
> +                  InstrStage<10, [MEC_RSV]>] >,
> +
> +  InstrItinData<IIC_NOP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >
> +  ]>;
> +
> +// Silvermont machine model.
> +def SLMModel : SchedMachineModel {
> +  let IssueWidth = 2;  // Allows 2 instructions per scheduling
> group.
> +  let MinLatency = 1;  // InstrStage cycles overrides MinLatency.
> +                       // OperandCycles may be used for expected
> latency.
> +  let LoadLatency = 3; // Expected cycles, may be overriden by
> OperandCycles.
> +  let HighLatency = 30;// Expected, may be overriden by
> OperandCycles.
> +
> +  let Itineraries = SLMItineraries;
> +}
> 
> Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=190717&r1=190716&r2=190717&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Fri Sep 13 14:23:28
> 2013
> @@ -281,7 +281,7 @@ void X86Subtarget::AutoDetectSubtargetFe
>        ToggleFeature(X86::FeatureFastUAMem);
>      }
>  
> -    // Set processor type. Currently only Atom is detected.
> +    // Set processor type. Currently only Atom or Silvermont (SLM)
> is detected.
>      if (Family == 6 &&
>          (Model == 28 || Model == 38 || Model == 39 ||
>           Model == 53 || Model == 54)) {
> @@ -290,6 +290,10 @@ void X86Subtarget::AutoDetectSubtargetFe
>        UseLeaForSP = true;
>        ToggleFeature(X86::FeatureLeaForSP);
>      }
> +    else if (Family == 6 &&
> +        (Model == 55 || Model == 74 || Model == 77)) {
> +      X86ProcFamily = IntelSLM;
> +    }
>  
>      unsigned MaxExtLevel;
>      X86_MC::GetCpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX,
>      &EDX);
> @@ -451,7 +455,7 @@ void X86Subtarget::resetSubtargetFeature
>    // new MCSchedModel is used.
>    InitMCProcessorInfo(CPUName, FS);
>  
> -  if (X86ProcFamily == IntelAtom)
> +  if (X86ProcFamily == IntelAtom || X86ProcFamily == IntelSLM)
>      PostRAScheduler = true;
>  
>    InstrItins = getInstrItineraryForCPU(CPUName);
> 
> Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=190717&r1=190716&r2=190717&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
> +++ llvm/trunk/lib/Target/X86/X86Subtarget.h Fri Sep 13 14:23:28 2013
> @@ -50,7 +50,7 @@ protected:
>    };
>  
>    enum X86ProcFamilyEnum {
> -    Others, IntelAtom
> +    Others, IntelAtom, IntelSLM
>    };
>  
>    /// X86ProcFamily - X86 processor family: Intel Atom, and others
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 

-- 
Hal Finkel
Assistant Computational Scientist
Leadership Computing Facility
Argonne National Laboratory



More information about the llvm-commits mailing list