[llvm] r328566 - [X86] Add WriteBitScan/WriteLZCNT/WriteTZCNT/WritePOPCNT scheduler classes (PR36881)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 26 11:19:28 PDT 2018


Author: rksimon
Date: Mon Mar 26 11:19:28 2018
New Revision: 328566

URL: http://llvm.org/viewvc/llvm-project?rev=328566&view=rev
Log:
[X86] Add WriteBitScan/WriteLZCNT/WriteTZCNT/WritePOPCNT scheduler classes (PR36881)

Give the bit count instructions their own scheduler classes instead of forcing them into existing classes.

These were mostly overridden anyway, but I had to add in costs from Agner for silvermont and znver1 and the Fam16h SoG for btver2 (Jaguar).

Differential Revision: https://reviews.llvm.org/D44879

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
    llvm/trunk/lib/Target/X86/X86SchedHaswell.td
    llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/trunk/lib/Target/X86/X86Schedule.td
    llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
    llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
    llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
    llvm/trunk/test/CodeGen/X86/bmi-schedule.ll
    llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll
    llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll
    llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=328566&r1=328565&r2=328566&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Mon Mar 26 11:19:28 2018
@@ -1341,52 +1341,52 @@ let Defs = [EFLAGS] in {
 def BSF16rr  : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                  "bsf{w}\t{$src, $dst|$dst, $src}",
                  [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))],
-                  IIC_BIT_SCAN_REG>, PS, OpSize16, Sched<[WriteShift]>;
+                  IIC_BIT_SCAN_REG>, PS, OpSize16, Sched<[WriteBitScan]>;
 def BSF16rm  : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                  "bsf{w}\t{$src, $dst|$dst, $src}",
                  [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))],
-                  IIC_BIT_SCAN_MEM>, PS, OpSize16, Sched<[WriteShiftLd]>;
+                  IIC_BIT_SCAN_MEM>, PS, OpSize16, Sched<[WriteBitScanLd]>;
 def BSF32rr  : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                  "bsf{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))],
-                 IIC_BIT_SCAN_REG>, PS, OpSize32, Sched<[WriteShift]>;
+                 IIC_BIT_SCAN_REG>, PS, OpSize32, Sched<[WriteBitScan]>;
 def BSF32rm  : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                  "bsf{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))],
-                 IIC_BIT_SCAN_MEM>, PS, OpSize32, Sched<[WriteShiftLd]>;
+                 IIC_BIT_SCAN_MEM>, PS, OpSize32, Sched<[WriteBitScanLd]>;
 def BSF64rr  : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                   "bsf{q}\t{$src, $dst|$dst, $src}",
                   [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))],
-                  IIC_BIT_SCAN_REG>, PS, Sched<[WriteShift]>;
+                  IIC_BIT_SCAN_REG>, PS, Sched<[WriteBitScan]>;
 def BSF64rm  : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                   "bsf{q}\t{$src, $dst|$dst, $src}",
                   [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))],
-                  IIC_BIT_SCAN_MEM>, PS, Sched<[WriteShiftLd]>;
+                  IIC_BIT_SCAN_MEM>, PS, Sched<[WriteBitScanLd]>;
 
 def BSR16rr  : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                  "bsr{w}\t{$src, $dst|$dst, $src}",
                  [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))],
-                 IIC_BIT_SCAN_REG>, PS, OpSize16, Sched<[WriteShift]>;
+                 IIC_BIT_SCAN_REG>, PS, OpSize16, Sched<[WriteBitScan]>;
 def BSR16rm  : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                  "bsr{w}\t{$src, $dst|$dst, $src}",
                  [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))],
-                 IIC_BIT_SCAN_MEM>, PS, OpSize16, Sched<[WriteShiftLd]>;
+                 IIC_BIT_SCAN_MEM>, PS, OpSize16, Sched<[WriteBitScanLd]>;
 def BSR32rr  : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                  "bsr{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))],
-                 IIC_BIT_SCAN_REG>, PS, OpSize32, Sched<[WriteShift]>;
+                 IIC_BIT_SCAN_REG>, PS, OpSize32, Sched<[WriteBitScan]>;
 def BSR32rm  : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                  "bsr{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))],
-                 IIC_BIT_SCAN_MEM>, PS, OpSize32, Sched<[WriteShiftLd]>;
+                 IIC_BIT_SCAN_MEM>, PS, OpSize32, Sched<[WriteBitScanLd]>;
 def BSR64rr  : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                   "bsr{q}\t{$src, $dst|$dst, $src}",
                   [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))],
-                  IIC_BIT_SCAN_REG>, PS, Sched<[WriteShift]>;
+                  IIC_BIT_SCAN_REG>, PS, Sched<[WriteBitScan]>;
 def BSR64rm  : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                   "bsr{q}\t{$src, $dst|$dst, $src}",
                   [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))],
-                  IIC_BIT_SCAN_MEM>, PS, Sched<[WriteShiftLd]>;
+                  IIC_BIT_SCAN_MEM>, PS, Sched<[WriteBitScanLd]>;
 } // Defs = [EFLAGS]
 
 let SchedRW = [WriteMicrocoded] in {
@@ -2269,32 +2269,32 @@ let Predicates = [HasLZCNT], Defs = [EFL
   def LZCNT16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                     "lzcnt{w}\t{$src, $dst|$dst, $src}",
                     [(set GR16:$dst, (ctlz GR16:$src)), (implicit EFLAGS)],
-                    IIC_LZCNT_RR>, XS, OpSize16, Sched<[WriteIMul]>;
+                    IIC_LZCNT_RR>, XS, OpSize16, Sched<[WriteLZCNT]>;
   def LZCNT16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                     "lzcnt{w}\t{$src, $dst|$dst, $src}",
                     [(set GR16:$dst, (ctlz (loadi16 addr:$src))),
                      (implicit EFLAGS)], IIC_LZCNT_RM>, XS, OpSize16,
-                    Sched<[WriteIMulLd]>;
+                    Sched<[WriteLZCNTLd]>;
 
   def LZCNT32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                     "lzcnt{l}\t{$src, $dst|$dst, $src}",
                     [(set GR32:$dst, (ctlz GR32:$src)), (implicit EFLAGS)],
-                    IIC_LZCNT_RR>, XS, OpSize32, Sched<[WriteIMul]>;
+                    IIC_LZCNT_RR>, XS, OpSize32, Sched<[WriteLZCNT]>;
   def LZCNT32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                     "lzcnt{l}\t{$src, $dst|$dst, $src}",
                     [(set GR32:$dst, (ctlz (loadi32 addr:$src))),
                      (implicit EFLAGS)], IIC_LZCNT_RM>, XS, OpSize32,
-                    Sched<[WriteIMulLd]>;
+                    Sched<[WriteLZCNTLd]>;
 
   def LZCNT64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                      "lzcnt{q}\t{$src, $dst|$dst, $src}",
                      [(set GR64:$dst, (ctlz GR64:$src)), (implicit EFLAGS)],
-                     IIC_LZCNT_RR>, XS, Sched<[WriteIMul]>;
+                     IIC_LZCNT_RR>, XS, Sched<[WriteLZCNT]>;
   def LZCNT64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                      "lzcnt{q}\t{$src, $dst|$dst, $src}",
                      [(set GR64:$dst, (ctlz (loadi64 addr:$src))),
                       (implicit EFLAGS)], IIC_LZCNT_RM>, XS,
-                     Sched<[WriteIMulLd]>;
+                     Sched<[WriteLZCNTLd]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2304,32 +2304,32 @@ let Predicates = [HasBMI], Defs = [EFLAG
   def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                     "tzcnt{w}\t{$src, $dst|$dst, $src}",
                     [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)],
-                    IIC_TZCNT_RR>, XS, OpSize16, Sched<[WriteIMul]>;
+                    IIC_TZCNT_RR>, XS, OpSize16, Sched<[WriteTZCNT]>;
   def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                     "tzcnt{w}\t{$src, $dst|$dst, $src}",
                     [(set GR16:$dst, (cttz (loadi16 addr:$src))),
                      (implicit EFLAGS)], IIC_TZCNT_RM>, XS, OpSize16,
-                    Sched<[WriteIMulLd]>;
+                    Sched<[WriteTZCNTLd]>;
 
   def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                     "tzcnt{l}\t{$src, $dst|$dst, $src}",
                     [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)],
-                    IIC_TZCNT_RR>, XS, OpSize32, Sched<[WriteIMul]>;
+                    IIC_TZCNT_RR>, XS, OpSize32, Sched<[WriteTZCNT]>;
   def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                     "tzcnt{l}\t{$src, $dst|$dst, $src}",
                     [(set GR32:$dst, (cttz (loadi32 addr:$src))),
                      (implicit EFLAGS)], IIC_TZCNT_RM>, XS, OpSize32,
-                    Sched<[WriteIMulLd]>;
+                    Sched<[WriteTZCNTLd]>;
 
   def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                      "tzcnt{q}\t{$src, $dst|$dst, $src}",
                      [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)],
-                     IIC_TZCNT_RR>, XS, Sched<[WriteIMul]>;
+                     IIC_TZCNT_RR>, XS, Sched<[WriteTZCNT]>;
   def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                      "tzcnt{q}\t{$src, $dst|$dst, $src}",
                      [(set GR64:$dst, (cttz (loadi64 addr:$src))),
                       (implicit EFLAGS)], IIC_TZCNT_RM>, XS,
-                     Sched<[WriteIMulLd]>;
+                     Sched<[WriteTZCNTLd]>;
 }
 
 multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=328566&r1=328565&r2=328566&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Mar 26 11:19:28 2018
@@ -6227,35 +6227,35 @@ let Defs = [EFLAGS], Predicates = [HasPO
   def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                      "popcnt{w}\t{$src, $dst|$dst, $src}",
                      [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)],
-                     IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>,
+                     IIC_SSE_POPCNT_RR>, Sched<[WritePOPCNT]>,
                      OpSize16, XS;
   def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                      "popcnt{w}\t{$src, $dst|$dst, $src}",
                      [(set GR16:$dst, (ctpop (loadi16 addr:$src))),
                       (implicit EFLAGS)], IIC_SSE_POPCNT_RM>,
-                      Sched<[WriteFAddLd]>, OpSize16, XS;
+                      Sched<[WritePOPCNTLd]>, OpSize16, XS;
 
   def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                      "popcnt{l}\t{$src, $dst|$dst, $src}",
                      [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)],
-                     IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>,
+                     IIC_SSE_POPCNT_RR>, Sched<[WritePOPCNT]>,
                      OpSize32, XS;
 
   def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                      "popcnt{l}\t{$src, $dst|$dst, $src}",
                      [(set GR32:$dst, (ctpop (loadi32 addr:$src))),
                       (implicit EFLAGS)], IIC_SSE_POPCNT_RM>,
-                      Sched<[WriteFAddLd]>, OpSize32, XS;
+                      Sched<[WritePOPCNTLd]>, OpSize32, XS;
 
   def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                       "popcnt{q}\t{$src, $dst|$dst, $src}",
                       [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)],
-                      IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>, XS;
+                      IIC_SSE_POPCNT_RR>, Sched<[WritePOPCNT]>, XS;
   def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                       "popcnt{q}\t{$src, $dst|$dst, $src}",
                       [(set GR64:$dst, (ctpop (loadi64 addr:$src))),
                        (implicit EFLAGS)], IIC_SSE_POPCNT_RM>,
-                       Sched<[WriteFAddLd]>, XS;
+                       Sched<[WritePOPCNTLd]>, XS;
 }
 
 // SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.

Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=328566&r1=328565&r2=328566&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Mon Mar 26 11:19:28 2018
@@ -110,6 +110,12 @@ def : WriteRes<WriteIMulH, []> { let Lat
 
 def : WriteRes<WriteLEA, [BWPort15]>; // LEA instructions can't fold loads.
 
+// Bit counts.
+defm : BWWriteResPair<WriteBitScan, [BWPort1], 3>;
+defm : BWWriteResPair<WriteLZCNT,   [BWPort1], 3>;
+defm : BWWriteResPair<WriteTZCNT,   [BWPort1], 3>;
+defm : BWWriteResPair<WritePOPCNT,  [BWPort1], 3>;
+
 // Integer shifts and rotates.
 defm : BWWriteResPair<WriteShift, [BWPort06],  1>;
 
@@ -851,13 +857,9 @@ def: InstRW<[BWWriteResGroup27], (instrs
 def: InstRW<[BWWriteResGroup27], (instregex "ADD_FPrST0",
                                             "ADD_FST0r",
                                             "ADD_FrST0",
-                                            "BSF(16|32|64)rr",
-                                            "BSR(16|32|64)rr",
-                                            "LZCNT(16|32|64)rr",
                                             "MMX_CVTPI2PSirr",
                                             "PDEP(32|64)rr",
                                             "PEXT(32|64)rr",
-                                            "POPCNT(16|32|64)rr",
                                             "SHLD(16|32|64)rri8",
                                             "SHRD(16|32|64)rri8",
                                             "SUBR_FPrST0",
@@ -866,7 +868,6 @@ def: InstRW<[BWWriteResGroup27], (instre
                                             "SUB_FPrST0",
                                             "SUB_FST0r",
                                             "SUB_FrST0",
-                                            "TZCNT(16|32|64)rr",
                                             "(V?)ADDPD(Y?)rr",
                                             "(V?)ADDPS(Y?)rr",
                                             "(V?)ADDSDrr",
@@ -1889,16 +1890,11 @@ def BWWriteResGroup91 : SchedWriteRes<[B
 }
 def: InstRW<[BWWriteResGroup91], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi8, IMUL64rmi32)>;
 def: InstRW<[BWWriteResGroup91], (instrs IMUL8m, MUL8m)>;
-def: InstRW<[BWWriteResGroup91], (instregex "BSF(16|32|64)rm",
-                                            "BSR(16|32|64)rm",
-                                            "LZCNT(16|32|64)rm",
-                                            "MMX_CVTPI2PSirm",
+def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPI2PSirm",
                                             "MMX_CVTPS2PIirm",
                                             "MMX_CVTTPS2PIirm",
                                             "PDEP(32|64)rm",
                                             "PEXT(32|64)rm",
-                                            "POPCNT(16|32|64)rm",
-                                            "TZCNT(16|32|64)rm",
                                             "(V?)ADDPDrm",
                                             "(V?)ADDPSrm",
                                             "(V?)ADDSDrm",

Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=328566&r1=328565&r2=328566&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Mon Mar 26 11:19:28 2018
@@ -121,6 +121,12 @@ defm : HWWriteResPair<WriteJump,  [HWPor
 // the port to read all inputs. We don't model that.
 def : WriteRes<WriteLEA, [HWPort15]>;
 
+// Bit counts.
+defm : HWWriteResPair<WriteBitScan, [HWPort1], 3>;
+defm : HWWriteResPair<WriteLZCNT,   [HWPort1], 3>;
+defm : HWWriteResPair<WriteTZCNT,   [HWPort1], 3>;
+defm : HWWriteResPair<WritePOPCNT,  [HWPort1], 3>;
+
 // This is quite rough, latency depends on the dividend.
 defm : HWWriteResPair<WriteIDiv,  [HWPort0, HWDivider], 25, [1,10], 1, 4>;
 // Scalar and vector floating point.
@@ -1042,20 +1048,15 @@ def HWWriteResGroup12 : SchedWriteRes<[H
 def: InstRW<[HWWriteResGroup12], (instrs MUL8m, MUL16m,
                                          IMUL8m, IMUL16m,
                                          IMUL16rm, IMUL16rmi, IMUL16rmi8, IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>;
-def: InstRW<[HWWriteResGroup12], (instregex "BSF(16|32|64)rm",
-                                            "BSR(16|32|64)rm",
-                                            "FCOM32m",
+def: InstRW<[HWWriteResGroup12], (instregex "FCOM32m",
                                             "FCOM64m",
                                             "FCOMP32m",
                                             "FCOMP64m",
-                                            "LZCNT(16|32|64)rm",
                                             "MMX_CVTPI2PSirm",
                                             "MMX_CVTPS2PIirm",
                                             "MMX_CVTTPS2PIirm",
                                             "PDEP(32|64)rm",
                                             "PEXT(32|64)rm",
-                                            "POPCNT(16|32|64)rm",
-                                            "TZCNT(16|32|64)rm",
                                             "(V?)ADDSDrm",
                                             "(V?)ADDSSrm",
                                             "(V?)CMPSDrm",
@@ -1779,13 +1780,9 @@ def: InstRW<[HWWriteResGroup50], (instrs
 def: InstRW<[HWWriteResGroup50], (instregex "ADD_FPrST0",
                                             "ADD_FST0r",
                                             "ADD_FrST0",
-                                            "BSF(16|32|64)rr",
-                                            "BSR(16|32|64)rr",
-                                            "LZCNT(16|32|64)rr",
                                             "MMX_CVTPI2PSirr",
                                             "PDEP(32|64)rr",
                                             "PEXT(32|64)rr",
-                                            "POPCNT(16|32|64)rr",
                                             "SHLD(16|32|64)rri8",
                                             "SHRD(16|32|64)rri8",
                                             "SUBR_FPrST0",
@@ -1794,7 +1791,6 @@ def: InstRW<[HWWriteResGroup50], (instre
                                             "SUB_FPrST0",
                                             "SUB_FST0r",
                                             "SUB_FrST0",
-                                            "TZCNT(16|32|64)rr",
                                             "(V?)ADDPD(Y?)rr",
                                             "(V?)ADDPS(Y?)rr",
                                             "(V?)ADDSDrr",

Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=328566&r1=328565&r2=328566&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Mon Mar 26 11:19:28 2018
@@ -112,6 +112,12 @@ defm : SBWriteResPair<WriteJump,  [SBPor
 // the port to read all inputs. We don't model that.
 def : WriteRes<WriteLEA, [SBPort15]>;
 
+// Bit counts.
+defm : SBWriteResPair<WriteBitScan, [SBPort1], 3, [1], 1, 5>;
+defm : SBWriteResPair<WriteLZCNT,   [SBPort1], 3, [1], 1, 5>;
+defm : SBWriteResPair<WriteTZCNT,   [SBPort1], 3, [1], 1, 5>;
+defm : SBWriteResPair<WritePOPCNT,  [SBPort1], 3, [1], 1, 5>;
+
 // Scalar and vector floating point.
 def  : WriteRes<WriteFStore,       [SBPort23, SBPort4]>;
 def  : WriteRes<WriteFLoad,        [SBPort23]> { let Latency = 6; }
@@ -672,8 +678,6 @@ def: InstRW<[SBWriteResGroup21], (instrs
 def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0",
                                             "ADD_FST0r",
                                             "ADD_FrST0",
-                                            "BSF(16|32|64)rr",
-                                            "BSR(16|32|64)rr",
                                             "CRC32r(16|32|64)r8",
                                             "CRC32r(16|32|64)r64",
                                             "MMX_CVTPI2PSirr",
@@ -1412,9 +1416,7 @@ def SBWriteResGroup72 : SchedWriteRes<[S
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup72], (instregex "BSF(16|32|64)rm",
-                                            "BSR(16|32|64)rm",
-                                            "CRC32r(16|32|64)m64",
+def: InstRW<[SBWriteResGroup72], (instregex "CRC32r(16|32|64)m64",
                                             "CRC32r(16|32|64)m8",
                                             "FCOM32m",
                                             "FCOM64m",

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=328566&r1=328565&r2=328566&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Mon Mar 26 11:19:28 2018
@@ -110,6 +110,12 @@ defm : SKLWriteResPair<WriteIDiv, [SKLPo
 def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
 def : WriteRes<WriteLEA, [SKLPort15]>; // LEA instructions can't fold loads.
 
+// Bit counts.
+defm : SKLWriteResPair<WriteBitScan, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteLZCNT,   [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteTZCNT,   [SKLPort1], 3>;
+defm : SKLWriteResPair<WritePOPCNT,  [SKLPort1], 3>;
+
 // Integer shifts and rotates.
 defm : SKLWriteResPair<WriteShift, [SKLPort06],  1>;
 
@@ -862,15 +868,10 @@ def SKLWriteResGroup29 : SchedWriteRes<[
 }
 def: InstRW<[SKLWriteResGroup29], (instrs IMUL16rr, IMUL32rr, IMUL32rri, IMUL32rri8, IMUL64rr, IMUL64rri32, IMUL64rri8)>;
 def: InstRW<[SKLWriteResGroup29], (instrs IMUL8r, MUL8r)>;
-def: InstRW<[SKLWriteResGroup29], (instregex "BSF(16|32|64)rr",
-                                             "BSR(16|32|64)rr",
-                                             "LZCNT(16|32|64)rr",
-                                             "PDEP(32|64)rr",
+def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr",
                                              "PEXT(32|64)rr",
-                                             "POPCNT(16|32|64)rr",
                                              "SHLD(16|32|64)rri8",
-                                             "SHRD(16|32|64)rri8",
-                                             "TZCNT(16|32|64)rr")>;
+                                             "SHRD(16|32|64)rri8")>;
 
 def SKLWriteResGroup29_16i : SchedWriteRes<[SKLPort1, SKLPort0156]> {
   let Latency = 3;
@@ -1874,13 +1875,8 @@ def SKLWriteResGroup107 : SchedWriteRes<
 }
 def: InstRW<[SKLWriteResGroup107], (instrs IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>;
 def: InstRW<[SKLWriteResGroup107], (instrs IMUL8m, MUL8m)>;
-def: InstRW<[SKLWriteResGroup107], (instregex "BSF(16|32|64)rm",
-                                              "BSR(16|32|64)rm",
-                                              "LZCNT(16|32|64)rm",
-                                              "PDEP(32|64)rm",
-                                              "PEXT(32|64)rm",
-                                              "POPCNT(16|32|64)rm",
-                                              "TZCNT(16|32|64)rm")>;
+def: InstRW<[SKLWriteResGroup107], (instregex "PDEP(32|64)rm",
+                                              "PEXT(32|64)rm")>;
 
 def SKLWriteResGroup107_16 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> {
   let Latency = 8;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=328566&r1=328565&r2=328566&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Mon Mar 26 11:19:28 2018
@@ -113,6 +113,12 @@ def : WriteRes<WriteLEA, [SKXPort15]>; /
 // Integer shifts and rotates.
 defm : SKXWriteResPair<WriteShift, [SKXPort06],  1>;
 
+// Bit counts.
+defm : SKXWriteResPair<WriteBitScan, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteLZCNT,   [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteTZCNT,   [SKXPort1], 3>;
+defm : SKXWriteResPair<WritePOPCNT,  [SKXPort1], 3>;
+
 // Loads, stores, and moves, not folded with other operations.
 def : WriteRes<WriteLoad,  [SKXPort23]> { let Latency = 5; }
 def : WriteRes<WriteStore, [SKXPort237, SKXPort4]>;
@@ -1703,15 +1709,10 @@ def SKXWriteResGroup31 : SchedWriteRes<[
 }
 def: InstRW<[SKXWriteResGroup31], (instrs IMUL16rr, IMUL32rr, IMUL32rri, IMUL32rri8, IMUL64rr, IMUL64rri32, IMUL64rri8)>;
 def: InstRW<[SKXWriteResGroup31], (instrs IMUL8r, MUL8r)>;
-def: InstRW<[SKXWriteResGroup31], (instregex "BSF(16|32|64)rr",
-                                             "BSR(16|32|64)rr",
-                                             "LZCNT(16|32|64)rr",
-                                             "PDEP(32|64)rr",
+def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr",
                                              "PEXT(32|64)rr",
-                                             "POPCNT(16|32|64)rr",
                                              "SHLD(16|32|64)rri8",
-                                             "SHRD(16|32|64)rri8",
-                                             "TZCNT(16|32|64)rr")>;
+                                             "SHRD(16|32|64)rri8")>;
 
 def SKXWriteResGroup31_16i : SchedWriteRes<[SKXPort1, SKXPort0156]> {
   let Latency = 3;
@@ -3901,13 +3902,8 @@ def SKXWriteResGroup118 : SchedWriteRes<
 }
 def: InstRW<[SKXWriteResGroup118], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>;
 def: InstRW<[SKXWriteResGroup118], (instrs IMUL8m, MUL8m)>;
-def: InstRW<[SKXWriteResGroup118], (instregex "BSF(16|32|64)rm",
-                                              "BSR(16|32|64)rm",
-                                              "LZCNT(16|32|64)rm",
-                                              "PDEP(32|64)rm",
-                                              "PEXT(32|64)rm",
-                                              "POPCNT(16|32|64)rm",
-                                              "TZCNT(16|32|64)rm")>;
+def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm",
+                                              "PEXT(32|64)rm")>;
 
 def SKXWriteResGroup118_16_1 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> {
   let Latency = 8;

Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=328566&r1=328565&r2=328566&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Mon Mar 26 11:19:28 2018
@@ -46,6 +46,11 @@ def  WriteIMulH : SchedWrite;       // I
 defm WriteIDiv : X86SchedWritePair; // Integer division.
 def  WriteLEA  : SchedWrite;        // LEA instructions can't fold loads.
 
+defm WriteBitScan : X86SchedWritePair; // Bit scan forward/reverse.
+defm WritePOPCNT : X86SchedWritePair; // Bit population count.
+defm WriteLZCNT : X86SchedWritePair; // Leading zero count.
+defm WriteTZCNT : X86SchedWritePair; // Trailing zero count.
+
 // Integer shifts and rotates.
 defm WriteShift : X86SchedWritePair;
 

Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=328566&r1=328565&r2=328566&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Mon Mar 26 11:19:28 2018
@@ -134,27 +134,11 @@ def  : WriteRes<WriteIMulH, [JALU1]> {
 // FIXME: SAGU 3-operand LEA
 def : WriteRes<WriteLEA, [JALU01]>;
 
-// FIXME: Why do bitcounts use WriteIMul?
-def JWriteLZCNT : SchedWriteRes<[JALU01]> {
-}
-def JWriteLZCNTLd : SchedWriteRes<[JLAGU, JALU01]> {
-  let Latency = 4;
-}
-def : InstRW<[JWriteLZCNT], (instrs LZCNT16rr, LZCNT32rr, LZCNT64rr,
-                                    POPCNT16rr, POPCNT32rr, POPCNT64rr)>;
-def : InstRW<[JWriteLZCNTLd], (instrs LZCNT16rm, LZCNT32rm, LZCNT64rm,
-                                    POPCNT16rm, POPCNT32rm, POPCNT64rm)>;
-
-def JWriteTZCNT : SchedWriteRes<[JALU01]> {
-  let Latency = 2;
-  let ResourceCycles = [2];
-}
-def JWriteTZCNTLd : SchedWriteRes<[JLAGU, JALU01]> {
-  let Latency = 5;
-  let ResourceCycles = [1, 2];
-}
-def : InstRW<[JWriteTZCNT], (instrs TZCNT16rr, TZCNT32rr, TZCNT64rr)>;
-def : InstRW<[JWriteTZCNTLd], (instrs TZCNT16rm, TZCNT32rm, TZCNT64rm)>;
+// Bit counts.
+defm : JWriteResIntPair<WriteBitScan, [JALU01], 5, [4], 8>;
+defm : JWriteResIntPair<WritePOPCNT,  [JALU01], 1>;
+defm : JWriteResIntPair<WriteLZCNT,   [JALU01], 1>;
+defm : JWriteResIntPair<WriteTZCNT,   [JALU01], 2, [2]>;
 
 def JWriteIMul64 : SchedWriteRes<[JALU1, JMul]> {
   let Latency = 6;

Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=328566&r1=328565&r2=328566&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Mon Mar 26 11:19:28 2018
@@ -97,6 +97,12 @@ defm : SLMWriteResPair<WriteJump,  [SLM_
 // the port to read all inputs. We don't model that.
 def : WriteRes<WriteLEA, [SLM_IEC_RSV1]>;
 
+// Bit counts.
+defm : SLMWriteResPair<WriteBitScan, [SLM_IEC_RSV01], 10, [20], 10>;
+defm : SLMWriteResPair<WriteLZCNT,   [SLM_IEC_RSV0], 3>;
+defm : SLMWriteResPair<WriteTZCNT,   [SLM_IEC_RSV0], 3>;
+defm : SLMWriteResPair<WritePOPCNT,  [SLM_IEC_RSV0], 3>;
+
 // This is quite rough, latency depends on the dividend.
 defm : SLMWriteResPair<WriteIDiv, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
 

Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=328566&r1=328565&r2=328566&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Mon Mar 26 11:19:28 2018
@@ -152,6 +152,12 @@ defm : ZnWriteResPair<WriteIMul,   [ZnAL
 defm : ZnWriteResPair<WriteShift, [ZnALU], 1>;
 defm : ZnWriteResPair<WriteJump,  [ZnALU], 1>;
 
+// Bit counts.
+defm : ZnWriteResPair<WriteBitScan, [ZnALU], 3>;
+defm : ZnWriteResPair<WriteLZCNT,   [ZnALU], 2>;
+defm : ZnWriteResPair<WriteTZCNT,   [ZnALU], 2>;
+defm : ZnWriteResPair<WritePOPCNT,  [ZnALU], 1>;
+
 // Treat misc copies as a move.
 def : InstRW<[WriteMove], (instrs COPY)>;
 
@@ -522,19 +528,6 @@ def ZnWriteALULat2Ld : SchedWriteRes<[Zn
   let Latency = 6;
 }
 
-def ZnWriteALULat3 : SchedWriteRes<[ZnALU]> {
-  let Latency = 3;
-}
-def ZnWriteALULat3Ld : SchedWriteRes<[ZnAGU, ZnALU]> {
-  let Latency = 7;
-}
-
-// BSF BSR.
-// r,r.
-def : InstRW<[ZnWriteALULat3], (instregex "BS(R|F)(16|32|64)rr")>;
-// r,m.
-def : InstRW<[ZnWriteALULat3Ld, ReadAfterLd], (instregex "BS(R|F)(16|32|64)rm")>;
-
 // BT.
 // r,r/i.
 def : InstRW<[WriteShift], (instregex "BT(16|32|64)r(r|i8)")>;
@@ -630,12 +623,6 @@ def : InstRW<[WriteShift],
 def : InstRW<[WriteShift],
              (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)m")>;
 
-// LZCNT TZCNT.
-// r,r.
-def : InstRW<[ZnWriteALULat2], (instregex "(LZCNT|TZCNT)(16|32|64)rr")>;
-// r,m.
-def : InstRW<[ZnWriteALULat2Ld, ReadAfterLd], (instregex "(LZCNT|TZCNT)(16|32|64)rm")>;
-
 //-- Misc instructions --//
 // CMPXCHG.
 def ZnWriteCMPXCHG : SchedWriteRes<[ZnAGU, ZnALU]> {

Modified: llvm/trunk/test/CodeGen/X86/bmi-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi-schedule.ll?rev=328566&r1=328565&r2=328566&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi-schedule.ll Mon Mar 26 11:19:28 2018
@@ -1,732 +1,732 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl     | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2  | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1  | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define i16 @test_andn_i16(i16 zeroext %a0, i16 zeroext %a1, i16 *%a2) {
-; GENERIC-LABEL: test_andn_i16:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    andnl %esi, %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    notl %edi # sched: [1:0.33]
-; GENERIC-NEXT:    andw (%rdx), %di # sched: [6:0.50]
-; GENERIC-NEXT:    addl %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_andn_i16:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    andnl %esi, %edi, %eax # sched: [1:0.50]
-; HASWELL-NEXT:    notl %edi # sched: [1:0.25]
-; HASWELL-NEXT:    andw (%rdx), %di # sched: [6:0.50]
-; HASWELL-NEXT:    addl %edi, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    # kill: def $ax killed $ax killed $eax
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_andn_i16:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    andnl %esi, %edi, %eax # sched: [1:0.50]
-; BROADWELL-NEXT:    notl %edi # sched: [1:0.25]
-; BROADWELL-NEXT:    andw (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT:    addl %edi, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    # kill: def $ax killed $ax killed $eax
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_andn_i16:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    andnl %esi, %edi, %eax # sched: [1:0.50]
-; SKYLAKE-NEXT:    notl %edi # sched: [1:0.25]
-; SKYLAKE-NEXT:    andw (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT:    addl %edi, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    # kill: def $ax killed $ax killed $eax
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_andn_i16:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    andnl %esi, %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    notl %edi # sched: [1:0.50]
-; BTVER2-NEXT:    andw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT:    addl %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    # kill: def $ax killed $ax killed $eax
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_andn_i16:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    andnl %esi, %edi, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    notl %edi # sched: [1:0.25]
-; ZNVER1-NEXT:    andw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT:    addl %edi, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    # kill: def $ax killed $ax killed $eax
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i16, i16 *%a2
-  %2 = xor i16 %a0, -1
-  %3 = and i16 %2, %a1
-  %4 = and i16 %2, %1
-  %5 = add i16 %3, %4
-  ret i16 %5
-}
-
-define i32 @test_andn_i32(i32 %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_andn_i32:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    andnl (%rdx), %edi, %eax # sched: [5:0.50]
-; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_andn_i32:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.50]
-; HASWELL-NEXT:    andnl (%rdx), %edi, %eax # sched: [6:0.50]
-; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_andn_i32:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.50]
-; BROADWELL-NEXT:    andnl (%rdx), %edi, %eax # sched: [6:0.50]
-; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_andn_i32:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.50]
-; SKYLAKE-NEXT:    andnl (%rdx), %edi, %eax # sched: [6:0.50]
-; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_andn_i32:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    andnl (%rdx), %edi, %eax # sched: [4:1.00]
-; BTVER2-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.50]
-; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_andn_i32:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    andnl (%rdx), %edi, %eax # sched: [5:0.50]
-; ZNVER1-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.25]
-; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i32, i32 *%a2
-  %2 = xor i32 %a0, -1
-  %3 = and i32 %2, %a1
-  %4 = and i32 %2, %1
-  %5 = add i32 %3, %4
-  ret i32 %5
-}
-
-define i64 @test_andn_i64(i64 %a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: test_andn_i64:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT:    andnq (%rdx), %rdi, %rax # sched: [5:0.50]
-; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_andn_i64:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.50]
-; HASWELL-NEXT:    andnq (%rdx), %rdi, %rax # sched: [6:0.50]
-; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_andn_i64:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.50]
-; BROADWELL-NEXT:    andnq (%rdx), %rdi, %rax # sched: [6:0.50]
-; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_andn_i64:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.50]
-; SKYLAKE-NEXT:    andnq (%rdx), %rdi, %rax # sched: [6:0.50]
-; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_andn_i64:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    andnq (%rdx), %rdi, %rax # sched: [4:1.00]
-; BTVER2-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.50]
-; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_andn_i64:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    andnq (%rdx), %rdi, %rax # sched: [5:0.50]
-; ZNVER1-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.25]
-; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i64, i64 *%a2
-  %2 = xor i64 %a0, -1
-  %3 = and i64 %2, %a1
-  %4 = and i64 %2, %1
-  %5 = add i64 %3, %4
-  ret i64 %5
-}
-
-define i32 @test_bextr_i32(i32 %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_bextr_i32:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [5:0.50]
-; GENERIC-NEXT:    bextrl %edi, %esi, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bextr_i32:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [7:0.50]
-; HASWELL-NEXT:    bextrl %edi, %esi, %eax # sched: [2:0.50]
-; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bextr_i32:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [7:0.50]
-; BROADWELL-NEXT:    bextrl %edi, %esi, %eax # sched: [2:0.50]
-; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bextr_i32:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [7:0.50]
-; SKYLAKE-NEXT:    bextrl %edi, %esi, %eax # sched: [2:0.50]
-; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_bextr_i32:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [4:1.00]
-; BTVER2-NEXT:    bextrl %edi, %esi, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bextr_i32:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [5:0.50]
-; ZNVER1-NEXT:    bextrl %edi, %esi, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i32, i32 *%a2
-  %2 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %1, i32 %a0)
-  %3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a1, i32 %a0)
-  %4 = add i32 %2, %3
-  ret i32 %4
-}
-declare i32 @llvm.x86.bmi.bextr.32(i32, i32)
-
-define i64 @test_bextr_i64(i64 %a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: test_bextr_i64:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [5:0.50]
-; GENERIC-NEXT:    bextrq %rdi, %rsi, %rax # sched: [1:0.33]
-; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bextr_i64:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [7:0.50]
-; HASWELL-NEXT:    bextrq %rdi, %rsi, %rax # sched: [2:0.50]
-; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bextr_i64:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [7:0.50]
-; BROADWELL-NEXT:    bextrq %rdi, %rsi, %rax # sched: [2:0.50]
-; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bextr_i64:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [7:0.50]
-; SKYLAKE-NEXT:    bextrq %rdi, %rsi, %rax # sched: [2:0.50]
-; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_bextr_i64:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [4:1.00]
-; BTVER2-NEXT:    bextrq %rdi, %rsi, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bextr_i64:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [5:0.50]
-; ZNVER1-NEXT:    bextrq %rdi, %rsi, %rax # sched: [1:0.25]
-; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i64, i64 *%a2
-  %2 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %1, i64 %a0)
-  %3 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a1, i64 %a0)
-  %4 = add i64 %2, %3
-  ret i64 %4
-}
-declare i64 @llvm.x86.bmi.bextr.64(i64, i64)
-
-define i32 @test_blsi_i32(i32 %a0, i32 *%a1) {
-; GENERIC-LABEL: test_blsi_i32:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    blsil (%rsi), %ecx # sched: [5:0.50]
-; GENERIC-NEXT:    blsil %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blsi_i32:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    blsil (%rsi), %ecx # sched: [6:0.50]
-; HASWELL-NEXT:    blsil %edi, %eax # sched: [1:0.50]
-; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blsi_i32:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    blsil (%rsi), %ecx # sched: [6:0.50]
-; BROADWELL-NEXT:    blsil %edi, %eax # sched: [1:0.50]
-; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blsi_i32:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    blsil (%rsi), %ecx # sched: [6:0.50]
-; SKYLAKE-NEXT:    blsil %edi, %eax # sched: [1:0.50]
-; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_blsi_i32:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    blsil (%rsi), %ecx # sched: [4:1.00]
-; BTVER2-NEXT:    blsil %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blsi_i32:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    blsil (%rsi), %ecx # sched: [6:0.50]
-; ZNVER1-NEXT:    blsil %edi, %eax # sched: [2:0.25]
-; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i32, i32 *%a1
-  %2 = sub i32 0, %1
-  %3 = sub i32 0, %a0
-  %4 = and i32 %1, %2
-  %5 = and i32 %a0, %3
-  %6 = add i32 %4, %5
-  ret i32 %6
-}
-
-define i64 @test_blsi_i64(i64 %a0, i64 *%a1) {
-; GENERIC-LABEL: test_blsi_i64:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    blsiq (%rsi), %rcx # sched: [5:0.50]
-; GENERIC-NEXT:    blsiq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blsi_i64:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    blsiq (%rsi), %rcx # sched: [6:0.50]
-; HASWELL-NEXT:    blsiq %rdi, %rax # sched: [1:0.50]
-; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blsi_i64:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    blsiq (%rsi), %rcx # sched: [6:0.50]
-; BROADWELL-NEXT:    blsiq %rdi, %rax # sched: [1:0.50]
-; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blsi_i64:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    blsiq (%rsi), %rcx # sched: [6:0.50]
-; SKYLAKE-NEXT:    blsiq %rdi, %rax # sched: [1:0.50]
-; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_blsi_i64:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    blsiq (%rsi), %rcx # sched: [4:1.00]
-; BTVER2-NEXT:    blsiq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blsi_i64:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    blsiq (%rsi), %rcx # sched: [6:0.50]
-; ZNVER1-NEXT:    blsiq %rdi, %rax # sched: [2:0.25]
-; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i64, i64 *%a1
-  %2 = sub i64 0, %1
-  %3 = sub i64 0, %a0
-  %4 = and i64 %1, %2
-  %5 = and i64 %a0, %3
-  %6 = add i64 %4, %5
-  ret i64 %6
-}
-
-define i32 @test_blsmsk_i32(i32 %a0, i32 *%a1) {
-; GENERIC-LABEL: test_blsmsk_i32:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    blsmskl (%rsi), %ecx # sched: [5:0.50]
-; GENERIC-NEXT:    blsmskl %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blsmsk_i32:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    blsmskl (%rsi), %ecx # sched: [6:0.50]
-; HASWELL-NEXT:    blsmskl %edi, %eax # sched: [1:0.50]
-; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blsmsk_i32:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    blsmskl (%rsi), %ecx # sched: [6:0.50]
-; BROADWELL-NEXT:    blsmskl %edi, %eax # sched: [1:0.50]
-; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blsmsk_i32:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    blsmskl (%rsi), %ecx # sched: [6:0.50]
-; SKYLAKE-NEXT:    blsmskl %edi, %eax # sched: [1:0.50]
-; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_blsmsk_i32:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    blsmskl (%rsi), %ecx # sched: [4:1.00]
-; BTVER2-NEXT:    blsmskl %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blsmsk_i32:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    blsmskl (%rsi), %ecx # sched: [6:0.50]
-; ZNVER1-NEXT:    blsmskl %edi, %eax # sched: [2:0.25]
-; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i32, i32 *%a1
-  %2 = sub i32 %1, 1
-  %3 = sub i32 %a0, 1
-  %4 = xor i32 %1, %2
-  %5 = xor i32 %a0, %3
-  %6 = add i32 %4, %5
-  ret i32 %6
-}
-
-define i64 @test_blsmsk_i64(i64 %a0, i64 *%a1) {
-; GENERIC-LABEL: test_blsmsk_i64:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    blsmskq (%rsi), %rcx # sched: [5:0.50]
-; GENERIC-NEXT:    blsmskq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blsmsk_i64:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    blsmskq (%rsi), %rcx # sched: [6:0.50]
-; HASWELL-NEXT:    blsmskq %rdi, %rax # sched: [1:0.50]
-; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blsmsk_i64:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    blsmskq (%rsi), %rcx # sched: [6:0.50]
-; BROADWELL-NEXT:    blsmskq %rdi, %rax # sched: [1:0.50]
-; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blsmsk_i64:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    blsmskq (%rsi), %rcx # sched: [6:0.50]
-; SKYLAKE-NEXT:    blsmskq %rdi, %rax # sched: [1:0.50]
-; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_blsmsk_i64:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    blsmskq (%rsi), %rcx # sched: [4:1.00]
-; BTVER2-NEXT:    blsmskq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blsmsk_i64:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    blsmskq (%rsi), %rcx # sched: [6:0.50]
-; ZNVER1-NEXT:    blsmskq %rdi, %rax # sched: [2:0.25]
-; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i64, i64 *%a1
-  %2 = sub i64 %1, 1
-  %3 = sub i64 %a0, 1
-  %4 = xor i64 %1, %2
-  %5 = xor i64 %a0, %3
-  %6 = add i64 %4, %5
-  ret i64 %6
-}
-
-define i32 @test_blsr_i32(i32 %a0, i32 *%a1) {
-; GENERIC-LABEL: test_blsr_i32:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    blsrl (%rsi), %ecx # sched: [5:0.50]
-; GENERIC-NEXT:    blsrl %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blsr_i32:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    blsrl (%rsi), %ecx # sched: [6:0.50]
-; HASWELL-NEXT:    blsrl %edi, %eax # sched: [1:0.50]
-; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blsr_i32:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    blsrl (%rsi), %ecx # sched: [6:0.50]
-; BROADWELL-NEXT:    blsrl %edi, %eax # sched: [1:0.50]
-; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blsr_i32:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    blsrl (%rsi), %ecx # sched: [6:0.50]
-; SKYLAKE-NEXT:    blsrl %edi, %eax # sched: [1:0.50]
-; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_blsr_i32:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    blsrl (%rsi), %ecx # sched: [4:1.00]
-; BTVER2-NEXT:    blsrl %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blsr_i32:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    blsrl (%rsi), %ecx # sched: [6:0.50]
-; ZNVER1-NEXT:    blsrl %edi, %eax # sched: [2:0.25]
-; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i32, i32 *%a1
-  %2 = sub i32 %1, 1
-  %3 = sub i32 %a0, 1
-  %4 = and i32 %1, %2
-  %5 = and i32 %a0, %3
-  %6 = add i32 %4, %5
-  ret i32 %6
-}
-
-define i64 @test_blsr_i64(i64 %a0, i64 *%a1) {
-; GENERIC-LABEL: test_blsr_i64:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    blsrq (%rsi), %rcx # sched: [5:0.50]
-; GENERIC-NEXT:    blsrq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blsr_i64:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    blsrq (%rsi), %rcx # sched: [6:0.50]
-; HASWELL-NEXT:    blsrq %rdi, %rax # sched: [1:0.50]
-; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blsr_i64:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    blsrq (%rsi), %rcx # sched: [6:0.50]
-; BROADWELL-NEXT:    blsrq %rdi, %rax # sched: [1:0.50]
-; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blsr_i64:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    blsrq (%rsi), %rcx # sched: [6:0.50]
-; SKYLAKE-NEXT:    blsrq %rdi, %rax # sched: [1:0.50]
-; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_blsr_i64:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    blsrq (%rsi), %rcx # sched: [4:1.00]
-; BTVER2-NEXT:    blsrq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blsr_i64:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    blsrq (%rsi), %rcx # sched: [6:0.50]
-; ZNVER1-NEXT:    blsrq %rdi, %rax # sched: [2:0.25]
-; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i64, i64 *%a1
-  %2 = sub i64 %1, 1
-  %3 = sub i64 %a0, 1
-  %4 = and i64 %1, %2
-  %5 = and i64 %a0, %3
-  %6 = add i64 %4, %5
-  ret i64 %6
-}
-
-define i16 @test_cttz_i16(i16 zeroext %a0, i16 *%a1) {
-; GENERIC-LABEL: test_cttz_i16:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    tzcntw (%rsi), %cx # sched: [7:1.00]
-; GENERIC-NEXT:    tzcntw %di, %ax # sched: [3:1.00]
-; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cttz_i16:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    tzcntw (%rsi), %cx # sched: [8:1.00]
-; HASWELL-NEXT:    tzcntw %di, %ax # sched: [3:1.00]
-; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    # kill: def $ax killed $ax killed $eax
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cttz_i16:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    tzcntw (%rsi), %cx # sched: [8:1.00]
-; BROADWELL-NEXT:    tzcntw %di, %ax # sched: [3:1.00]
-; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    # kill: def $ax killed $ax killed $eax
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cttz_i16:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    tzcntw (%rsi), %cx # sched: [8:1.00]
-; SKYLAKE-NEXT:    tzcntw %di, %ax # sched: [3:1.00]
-; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    # kill: def $ax killed $ax killed $eax
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_cttz_i16:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    tzcntw (%rsi), %cx # sched: [5:1.00]
-; BTVER2-NEXT:    tzcntw %di, %ax # sched: [2:1.00]
-; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    # kill: def $ax killed $ax killed $eax
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cttz_i16:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    tzcntw (%rsi), %cx # sched: [6:0.50]
-; ZNVER1-NEXT:    tzcntw %di, %ax # sched: [2:0.25]
-; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    # kill: def $ax killed $ax killed $eax
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i16, i16 *%a1
-  %2 = tail call i16 @llvm.cttz.i16( i16 %1, i1 false )
-  %3 = tail call i16 @llvm.cttz.i16( i16 %a0, i1 false )
-  %4 = or i16 %2, %3
-  ret i16 %4
-}
-declare i16 @llvm.cttz.i16(i16, i1)
-
-define i32 @test_cttz_i32(i32 %a0, i32 *%a1) {
-; GENERIC-LABEL: test_cttz_i32:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    tzcntl (%rsi), %ecx # sched: [7:1.00]
-; GENERIC-NEXT:    tzcntl %edi, %eax # sched: [3:1.00]
-; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cttz_i32:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    tzcntl (%rsi), %ecx # sched: [8:1.00]
-; HASWELL-NEXT:    tzcntl %edi, %eax # sched: [3:1.00]
-; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cttz_i32:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    tzcntl (%rsi), %ecx # sched: [8:1.00]
-; BROADWELL-NEXT:    tzcntl %edi, %eax # sched: [3:1.00]
-; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cttz_i32:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    tzcntl (%rsi), %ecx # sched: [8:1.00]
-; SKYLAKE-NEXT:    tzcntl %edi, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_cttz_i32:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    tzcntl (%rsi), %ecx # sched: [5:1.00]
-; BTVER2-NEXT:    tzcntl %edi, %eax # sched: [2:1.00]
-; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cttz_i32:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    tzcntl (%rsi), %ecx # sched: [6:0.50]
-; ZNVER1-NEXT:    tzcntl %edi, %eax # sched: [2:0.25]
-; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i32, i32 *%a1
-  %2 = tail call i32 @llvm.cttz.i32( i32 %1, i1 false )
-  %3 = tail call i32 @llvm.cttz.i32( i32 %a0, i1 false )
-  %4 = or i32 %2, %3
-  ret i32 %4
-}
-declare i32 @llvm.cttz.i32(i32, i1)
-
-define i64 @test_cttz_i64(i64 %a0, i64 *%a1) {
-; GENERIC-LABEL: test_cttz_i64:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    tzcntq (%rsi), %rcx # sched: [7:1.00]
-; GENERIC-NEXT:    tzcntq %rdi, %rax # sched: [3:1.00]
-; GENERIC-NEXT:    orq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cttz_i64:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    tzcntq (%rsi), %rcx # sched: [8:1.00]
-; HASWELL-NEXT:    tzcntq %rdi, %rax # sched: [3:1.00]
-; HASWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cttz_i64:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    tzcntq (%rsi), %rcx # sched: [8:1.00]
-; BROADWELL-NEXT:    tzcntq %rdi, %rax # sched: [3:1.00]
-; BROADWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cttz_i64:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    tzcntq (%rsi), %rcx # sched: [8:1.00]
-; SKYLAKE-NEXT:    tzcntq %rdi, %rax # sched: [3:1.00]
-; SKYLAKE-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_cttz_i64:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    tzcntq (%rsi), %rcx # sched: [5:1.00]
-; BTVER2-NEXT:    tzcntq %rdi, %rax # sched: [2:1.00]
-; BTVER2-NEXT:    orq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cttz_i64:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    tzcntq (%rsi), %rcx # sched: [6:0.50]
-; ZNVER1-NEXT:    tzcntq %rdi, %rax # sched: [2:0.25]
-; ZNVER1-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i64, i64 *%a1
-  %2 = tail call i64 @llvm.cttz.i64( i64 %1, i1 false )
-  %3 = tail call i64 @llvm.cttz.i64( i64 %a0, i1 false )
-  %4 = or i64 %2, %3
-  ret i64 %4
-}
-declare i64 @llvm.cttz.i64(i64, i1)
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl     | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2  | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1  | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+
+define i16 @test_andn_i16(i16 zeroext %a0, i16 zeroext %a1, i16 *%a2) {
+; GENERIC-LABEL: test_andn_i16:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    andnl %esi, %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    notl %edi # sched: [1:0.33]
+; GENERIC-NEXT:    andw (%rdx), %di # sched: [6:0.50]
+; GENERIC-NEXT:    addl %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_andn_i16:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    andnl %esi, %edi, %eax # sched: [1:0.50]
+; HASWELL-NEXT:    notl %edi # sched: [1:0.25]
+; HASWELL-NEXT:    andw (%rdx), %di # sched: [6:0.50]
+; HASWELL-NEXT:    addl %edi, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    # kill: def $ax killed $ax killed $eax
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_andn_i16:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    andnl %esi, %edi, %eax # sched: [1:0.50]
+; BROADWELL-NEXT:    notl %edi # sched: [1:0.25]
+; BROADWELL-NEXT:    andw (%rdx), %di # sched: [6:0.50]
+; BROADWELL-NEXT:    addl %edi, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    # kill: def $ax killed $ax killed $eax
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_andn_i16:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    andnl %esi, %edi, %eax # sched: [1:0.50]
+; SKYLAKE-NEXT:    notl %edi # sched: [1:0.25]
+; SKYLAKE-NEXT:    andw (%rdx), %di # sched: [6:0.50]
+; SKYLAKE-NEXT:    addl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    # kill: def $ax killed $ax killed $eax
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_andn_i16:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    andnl %esi, %edi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    notl %edi # sched: [1:0.50]
+; BTVER2-NEXT:    andw (%rdx), %di # sched: [4:1.00]
+; BTVER2-NEXT:    addl %edi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    # kill: def $ax killed $ax killed $eax
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_andn_i16:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    andnl %esi, %edi, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    notl %edi # sched: [1:0.25]
+; ZNVER1-NEXT:    andw (%rdx), %di # sched: [5:0.50]
+; ZNVER1-NEXT:    addl %edi, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    # kill: def $ax killed $ax killed $eax
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i16, i16 *%a2
+  %2 = xor i16 %a0, -1
+  %3 = and i16 %2, %a1
+  %4 = and i16 %2, %1
+  %5 = add i16 %3, %4
+  ret i16 %5
+}
+
+define i32 @test_andn_i32(i32 %a0, i32 %a1, i32 *%a2) {
+; GENERIC-LABEL: test_andn_i32:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.33]
+; GENERIC-NEXT:    andnl (%rdx), %edi, %eax # sched: [5:0.50]
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_andn_i32:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.50]
+; HASWELL-NEXT:    andnl (%rdx), %edi, %eax # sched: [6:0.50]
+; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_andn_i32:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.50]
+; BROADWELL-NEXT:    andnl (%rdx), %edi, %eax # sched: [6:0.50]
+; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_andn_i32:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.50]
+; SKYLAKE-NEXT:    andnl (%rdx), %edi, %eax # sched: [6:0.50]
+; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_andn_i32:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    andnl (%rdx), %edi, %eax # sched: [4:1.00]
+; BTVER2-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.50]
+; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_andn_i32:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    andnl (%rdx), %edi, %eax # sched: [5:0.50]
+; ZNVER1-NEXT:    andnl %esi, %edi, %ecx # sched: [1:0.25]
+; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i32, i32 *%a2
+  %2 = xor i32 %a0, -1
+  %3 = and i32 %2, %a1
+  %4 = and i32 %2, %1
+  %5 = add i32 %3, %4
+  ret i32 %5
+}
+
+define i64 @test_andn_i64(i64 %a0, i64 %a1, i64 *%a2) {
+; GENERIC-LABEL: test_andn_i64:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.33]
+; GENERIC-NEXT:    andnq (%rdx), %rdi, %rax # sched: [5:0.50]
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_andn_i64:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.50]
+; HASWELL-NEXT:    andnq (%rdx), %rdi, %rax # sched: [6:0.50]
+; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_andn_i64:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.50]
+; BROADWELL-NEXT:    andnq (%rdx), %rdi, %rax # sched: [6:0.50]
+; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_andn_i64:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.50]
+; SKYLAKE-NEXT:    andnq (%rdx), %rdi, %rax # sched: [6:0.50]
+; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_andn_i64:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    andnq (%rdx), %rdi, %rax # sched: [4:1.00]
+; BTVER2-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.50]
+; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_andn_i64:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    andnq (%rdx), %rdi, %rax # sched: [5:0.50]
+; ZNVER1-NEXT:    andnq %rsi, %rdi, %rcx # sched: [1:0.25]
+; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i64, i64 *%a2
+  %2 = xor i64 %a0, -1
+  %3 = and i64 %2, %a1
+  %4 = and i64 %2, %1
+  %5 = add i64 %3, %4
+  ret i64 %5
+}
+
+define i32 @test_bextr_i32(i32 %a0, i32 %a1, i32 *%a2) {
+; GENERIC-LABEL: test_bextr_i32:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [5:0.50]
+; GENERIC-NEXT:    bextrl %edi, %esi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_bextr_i32:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [7:0.50]
+; HASWELL-NEXT:    bextrl %edi, %esi, %eax # sched: [2:0.50]
+; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_bextr_i32:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [7:0.50]
+; BROADWELL-NEXT:    bextrl %edi, %esi, %eax # sched: [2:0.50]
+; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_bextr_i32:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [7:0.50]
+; SKYLAKE-NEXT:    bextrl %edi, %esi, %eax # sched: [2:0.50]
+; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_bextr_i32:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [4:1.00]
+; BTVER2-NEXT:    bextrl %edi, %esi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_bextr_i32:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    bextrl %edi, (%rdx), %ecx # sched: [5:0.50]
+; ZNVER1-NEXT:    bextrl %edi, %esi, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i32, i32 *%a2
+  %2 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %1, i32 %a0)
+  %3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a1, i32 %a0)
+  %4 = add i32 %2, %3
+  ret i32 %4
+}
+declare i32 @llvm.x86.bmi.bextr.32(i32, i32)
+
+define i64 @test_bextr_i64(i64 %a0, i64 %a1, i64 *%a2) {
+; GENERIC-LABEL: test_bextr_i64:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [5:0.50]
+; GENERIC-NEXT:    bextrq %rdi, %rsi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_bextr_i64:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [7:0.50]
+; HASWELL-NEXT:    bextrq %rdi, %rsi, %rax # sched: [2:0.50]
+; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_bextr_i64:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [7:0.50]
+; BROADWELL-NEXT:    bextrq %rdi, %rsi, %rax # sched: [2:0.50]
+; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_bextr_i64:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [7:0.50]
+; SKYLAKE-NEXT:    bextrq %rdi, %rsi, %rax # sched: [2:0.50]
+; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_bextr_i64:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [4:1.00]
+; BTVER2-NEXT:    bextrq %rdi, %rsi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_bextr_i64:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    bextrq %rdi, (%rdx), %rcx # sched: [5:0.50]
+; ZNVER1-NEXT:    bextrq %rdi, %rsi, %rax # sched: [1:0.25]
+; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i64, i64 *%a2
+  %2 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %1, i64 %a0)
+  %3 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a1, i64 %a0)
+  %4 = add i64 %2, %3
+  ret i64 %4
+}
+declare i64 @llvm.x86.bmi.bextr.64(i64, i64)
+
+define i32 @test_blsi_i32(i32 %a0, i32 *%a1) {
+; GENERIC-LABEL: test_blsi_i32:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    blsil (%rsi), %ecx # sched: [5:0.50]
+; GENERIC-NEXT:    blsil %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_blsi_i32:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    blsil (%rsi), %ecx # sched: [6:0.50]
+; HASWELL-NEXT:    blsil %edi, %eax # sched: [1:0.50]
+; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_blsi_i32:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    blsil (%rsi), %ecx # sched: [6:0.50]
+; BROADWELL-NEXT:    blsil %edi, %eax # sched: [1:0.50]
+; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_blsi_i32:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    blsil (%rsi), %ecx # sched: [6:0.50]
+; SKYLAKE-NEXT:    blsil %edi, %eax # sched: [1:0.50]
+; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_blsi_i32:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    blsil (%rsi), %ecx # sched: [4:1.00]
+; BTVER2-NEXT:    blsil %edi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_blsi_i32:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    blsil (%rsi), %ecx # sched: [6:0.50]
+; ZNVER1-NEXT:    blsil %edi, %eax # sched: [2:0.25]
+; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i32, i32 *%a1
+  %2 = sub i32 0, %1
+  %3 = sub i32 0, %a0
+  %4 = and i32 %1, %2
+  %5 = and i32 %a0, %3
+  %6 = add i32 %4, %5
+  ret i32 %6
+}
+
+define i64 @test_blsi_i64(i64 %a0, i64 *%a1) {
+; GENERIC-LABEL: test_blsi_i64:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    blsiq (%rsi), %rcx # sched: [5:0.50]
+; GENERIC-NEXT:    blsiq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_blsi_i64:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    blsiq (%rsi), %rcx # sched: [6:0.50]
+; HASWELL-NEXT:    blsiq %rdi, %rax # sched: [1:0.50]
+; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_blsi_i64:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    blsiq (%rsi), %rcx # sched: [6:0.50]
+; BROADWELL-NEXT:    blsiq %rdi, %rax # sched: [1:0.50]
+; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_blsi_i64:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    blsiq (%rsi), %rcx # sched: [6:0.50]
+; SKYLAKE-NEXT:    blsiq %rdi, %rax # sched: [1:0.50]
+; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_blsi_i64:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    blsiq (%rsi), %rcx # sched: [4:1.00]
+; BTVER2-NEXT:    blsiq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_blsi_i64:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    blsiq (%rsi), %rcx # sched: [6:0.50]
+; ZNVER1-NEXT:    blsiq %rdi, %rax # sched: [2:0.25]
+; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i64, i64 *%a1
+  %2 = sub i64 0, %1
+  %3 = sub i64 0, %a0
+  %4 = and i64 %1, %2
+  %5 = and i64 %a0, %3
+  %6 = add i64 %4, %5
+  ret i64 %6
+}
+
+define i32 @test_blsmsk_i32(i32 %a0, i32 *%a1) {
+; GENERIC-LABEL: test_blsmsk_i32:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    blsmskl (%rsi), %ecx # sched: [5:0.50]
+; GENERIC-NEXT:    blsmskl %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_blsmsk_i32:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    blsmskl (%rsi), %ecx # sched: [6:0.50]
+; HASWELL-NEXT:    blsmskl %edi, %eax # sched: [1:0.50]
+; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_blsmsk_i32:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    blsmskl (%rsi), %ecx # sched: [6:0.50]
+; BROADWELL-NEXT:    blsmskl %edi, %eax # sched: [1:0.50]
+; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_blsmsk_i32:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    blsmskl (%rsi), %ecx # sched: [6:0.50]
+; SKYLAKE-NEXT:    blsmskl %edi, %eax # sched: [1:0.50]
+; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_blsmsk_i32:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    blsmskl (%rsi), %ecx # sched: [4:1.00]
+; BTVER2-NEXT:    blsmskl %edi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_blsmsk_i32:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    blsmskl (%rsi), %ecx # sched: [6:0.50]
+; ZNVER1-NEXT:    blsmskl %edi, %eax # sched: [2:0.25]
+; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i32, i32 *%a1
+  %2 = sub i32 %1, 1
+  %3 = sub i32 %a0, 1
+  %4 = xor i32 %1, %2
+  %5 = xor i32 %a0, %3
+  %6 = add i32 %4, %5
+  ret i32 %6
+}
+
+define i64 @test_blsmsk_i64(i64 %a0, i64 *%a1) {
+; GENERIC-LABEL: test_blsmsk_i64:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    blsmskq (%rsi), %rcx # sched: [5:0.50]
+; GENERIC-NEXT:    blsmskq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_blsmsk_i64:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    blsmskq (%rsi), %rcx # sched: [6:0.50]
+; HASWELL-NEXT:    blsmskq %rdi, %rax # sched: [1:0.50]
+; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_blsmsk_i64:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    blsmskq (%rsi), %rcx # sched: [6:0.50]
+; BROADWELL-NEXT:    blsmskq %rdi, %rax # sched: [1:0.50]
+; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_blsmsk_i64:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    blsmskq (%rsi), %rcx # sched: [6:0.50]
+; SKYLAKE-NEXT:    blsmskq %rdi, %rax # sched: [1:0.50]
+; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_blsmsk_i64:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    blsmskq (%rsi), %rcx # sched: [4:1.00]
+; BTVER2-NEXT:    blsmskq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_blsmsk_i64:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    blsmskq (%rsi), %rcx # sched: [6:0.50]
+; ZNVER1-NEXT:    blsmskq %rdi, %rax # sched: [2:0.25]
+; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i64, i64 *%a1
+  %2 = sub i64 %1, 1
+  %3 = sub i64 %a0, 1
+  %4 = xor i64 %1, %2
+  %5 = xor i64 %a0, %3
+  %6 = add i64 %4, %5
+  ret i64 %6
+}
+
+define i32 @test_blsr_i32(i32 %a0, i32 *%a1) {
+; GENERIC-LABEL: test_blsr_i32:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    blsrl (%rsi), %ecx # sched: [5:0.50]
+; GENERIC-NEXT:    blsrl %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_blsr_i32:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    blsrl (%rsi), %ecx # sched: [6:0.50]
+; HASWELL-NEXT:    blsrl %edi, %eax # sched: [1:0.50]
+; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_blsr_i32:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    blsrl (%rsi), %ecx # sched: [6:0.50]
+; BROADWELL-NEXT:    blsrl %edi, %eax # sched: [1:0.50]
+; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_blsr_i32:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    blsrl (%rsi), %ecx # sched: [6:0.50]
+; SKYLAKE-NEXT:    blsrl %edi, %eax # sched: [1:0.50]
+; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_blsr_i32:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    blsrl (%rsi), %ecx # sched: [4:1.00]
+; BTVER2-NEXT:    blsrl %edi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_blsr_i32:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    blsrl (%rsi), %ecx # sched: [6:0.50]
+; ZNVER1-NEXT:    blsrl %edi, %eax # sched: [2:0.25]
+; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i32, i32 *%a1
+  %2 = sub i32 %1, 1
+  %3 = sub i32 %a0, 1
+  %4 = and i32 %1, %2
+  %5 = and i32 %a0, %3
+  %6 = add i32 %4, %5
+  ret i32 %6
+}
+
+define i64 @test_blsr_i64(i64 %a0, i64 *%a1) {
+; GENERIC-LABEL: test_blsr_i64:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    blsrq (%rsi), %rcx # sched: [5:0.50]
+; GENERIC-NEXT:    blsrq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_blsr_i64:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    blsrq (%rsi), %rcx # sched: [6:0.50]
+; HASWELL-NEXT:    blsrq %rdi, %rax # sched: [1:0.50]
+; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_blsr_i64:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    blsrq (%rsi), %rcx # sched: [6:0.50]
+; BROADWELL-NEXT:    blsrq %rdi, %rax # sched: [1:0.50]
+; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_blsr_i64:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    blsrq (%rsi), %rcx # sched: [6:0.50]
+; SKYLAKE-NEXT:    blsrq %rdi, %rax # sched: [1:0.50]
+; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_blsr_i64:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    blsrq (%rsi), %rcx # sched: [4:1.00]
+; BTVER2-NEXT:    blsrq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_blsr_i64:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    blsrq (%rsi), %rcx # sched: [6:0.50]
+; ZNVER1-NEXT:    blsrq %rdi, %rax # sched: [2:0.25]
+; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i64, i64 *%a1
+  %2 = sub i64 %1, 1
+  %3 = sub i64 %a0, 1
+  %4 = and i64 %1, %2
+  %5 = and i64 %a0, %3
+  %6 = add i64 %4, %5
+  ret i64 %6
+}
+
+define i16 @test_cttz_i16(i16 zeroext %a0, i16 *%a1) {
+; GENERIC-LABEL: test_cttz_i16:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    tzcntw (%rsi), %cx # sched: [8:1.00]
+; GENERIC-NEXT:    tzcntw %di, %ax # sched: [3:1.00]
+; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_cttz_i16:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    tzcntw (%rsi), %cx # sched: [8:1.00]
+; HASWELL-NEXT:    tzcntw %di, %ax # sched: [3:1.00]
+; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    # kill: def $ax killed $ax killed $eax
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_cttz_i16:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    tzcntw (%rsi), %cx # sched: [8:1.00]
+; BROADWELL-NEXT:    tzcntw %di, %ax # sched: [3:1.00]
+; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    # kill: def $ax killed $ax killed $eax
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_cttz_i16:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    tzcntw (%rsi), %cx # sched: [8:1.00]
+; SKYLAKE-NEXT:    tzcntw %di, %ax # sched: [3:1.00]
+; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    # kill: def $ax killed $ax killed $eax
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_cttz_i16:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    tzcntw (%rsi), %cx # sched: [5:1.00]
+; BTVER2-NEXT:    tzcntw %di, %ax # sched: [2:1.00]
+; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    # kill: def $ax killed $ax killed $eax
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_cttz_i16:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    tzcntw (%rsi), %cx # sched: [6:0.50]
+; ZNVER1-NEXT:    tzcntw %di, %ax # sched: [2:0.25]
+; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    # kill: def $ax killed $ax killed $eax
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i16, i16 *%a1
+  %2 = tail call i16 @llvm.cttz.i16( i16 %1, i1 false )
+  %3 = tail call i16 @llvm.cttz.i16( i16 %a0, i1 false )
+  %4 = or i16 %2, %3
+  ret i16 %4
+}
+declare i16 @llvm.cttz.i16(i16, i1)
+
+define i32 @test_cttz_i32(i32 %a0, i32 *%a1) {
+; GENERIC-LABEL: test_cttz_i32:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    tzcntl (%rsi), %ecx # sched: [8:1.00]
+; GENERIC-NEXT:    tzcntl %edi, %eax # sched: [3:1.00]
+; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_cttz_i32:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    tzcntl (%rsi), %ecx # sched: [8:1.00]
+; HASWELL-NEXT:    tzcntl %edi, %eax # sched: [3:1.00]
+; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_cttz_i32:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    tzcntl (%rsi), %ecx # sched: [8:1.00]
+; BROADWELL-NEXT:    tzcntl %edi, %eax # sched: [3:1.00]
+; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_cttz_i32:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    tzcntl (%rsi), %ecx # sched: [8:1.00]
+; SKYLAKE-NEXT:    tzcntl %edi, %eax # sched: [3:1.00]
+; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_cttz_i32:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    tzcntl (%rsi), %ecx # sched: [5:1.00]
+; BTVER2-NEXT:    tzcntl %edi, %eax # sched: [2:1.00]
+; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_cttz_i32:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    tzcntl (%rsi), %ecx # sched: [6:0.50]
+; ZNVER1-NEXT:    tzcntl %edi, %eax # sched: [2:0.25]
+; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i32, i32 *%a1
+  %2 = tail call i32 @llvm.cttz.i32( i32 %1, i1 false )
+  %3 = tail call i32 @llvm.cttz.i32( i32 %a0, i1 false )
+  %4 = or i32 %2, %3
+  ret i32 %4
+}
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define i64 @test_cttz_i64(i64 %a0, i64 *%a1) {
+; GENERIC-LABEL: test_cttz_i64:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    tzcntq (%rsi), %rcx # sched: [8:1.00]
+; GENERIC-NEXT:    tzcntq %rdi, %rax # sched: [3:1.00]
+; GENERIC-NEXT:    orq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_cttz_i64:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    tzcntq (%rsi), %rcx # sched: [8:1.00]
+; HASWELL-NEXT:    tzcntq %rdi, %rax # sched: [3:1.00]
+; HASWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_cttz_i64:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    tzcntq (%rsi), %rcx # sched: [8:1.00]
+; BROADWELL-NEXT:    tzcntq %rdi, %rax # sched: [3:1.00]
+; BROADWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_cttz_i64:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    tzcntq (%rsi), %rcx # sched: [8:1.00]
+; SKYLAKE-NEXT:    tzcntq %rdi, %rax # sched: [3:1.00]
+; SKYLAKE-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_cttz_i64:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    tzcntq (%rsi), %rcx # sched: [5:1.00]
+; BTVER2-NEXT:    tzcntq %rdi, %rax # sched: [2:1.00]
+; BTVER2-NEXT:    orq %rcx, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_cttz_i64:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    tzcntq (%rsi), %rcx # sched: [6:0.50]
+; ZNVER1-NEXT:    tzcntq %rdi, %rax # sched: [2:0.25]
+; ZNVER1-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i64, i64 *%a1
+  %2 = tail call i64 @llvm.cttz.i64( i64 %1, i1 false )
+  %3 = tail call i64 @llvm.cttz.i64( i64 %a0, i1 false )
+  %4 = or i64 %2, %3
+  ret i64 %4
+}
+declare i64 @llvm.cttz.i64(i64, i1)

Modified: llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll?rev=328566&r1=328565&r2=328566&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll Mon Mar 26 11:19:28 2018
@@ -1,164 +1,164 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell   | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake   | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl       | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2    | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1    | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define i16 @test_ctlz_i16(i16 zeroext %a0, i16 *%a1) {
-; GENERIC-LABEL: test_ctlz_i16:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    lzcntw (%rsi), %cx # sched: [7:1.00]
-; GENERIC-NEXT:    lzcntw %di, %ax # sched: [3:1.00]
-; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_ctlz_i16:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    lzcntw (%rsi), %cx # sched: [8:1.00]
-; HASWELL-NEXT:    lzcntw %di, %ax # sched: [3:1.00]
-; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    # kill: def $ax killed $ax killed $eax
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ctlz_i16:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    lzcntw (%rsi), %cx # sched: [8:1.00]
-; BROADWELL-NEXT:    lzcntw %di, %ax # sched: [3:1.00]
-; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    # kill: def $ax killed $ax killed $eax
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ctlz_i16:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    lzcntw (%rsi), %cx # sched: [8:1.00]
-; SKYLAKE-NEXT:    lzcntw %di, %ax # sched: [3:1.00]
-; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    # kill: def $ax killed $ax killed $eax
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_ctlz_i16:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    lzcntw (%rsi), %cx # sched: [4:1.00]
-; BTVER2-NEXT:    lzcntw %di, %ax # sched: [1:0.50]
-; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    # kill: def $ax killed $ax killed $eax
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ctlz_i16:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    lzcntw (%rsi), %cx # sched: [6:0.50]
-; ZNVER1-NEXT:    lzcntw %di, %ax # sched: [2:0.25]
-; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    # kill: def $ax killed $ax killed $eax
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i16, i16 *%a1
-  %2 = tail call i16 @llvm.ctlz.i16( i16 %1, i1 false )
-  %3 = tail call i16 @llvm.ctlz.i16( i16 %a0, i1 false )
-  %4 = or i16 %2, %3
-  ret i16 %4
-}
-declare i16 @llvm.ctlz.i16(i16, i1)
-
-define i32 @test_ctlz_i32(i32 %a0, i32 *%a1) {
-; GENERIC-LABEL: test_ctlz_i32:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    lzcntl (%rsi), %ecx # sched: [7:1.00]
-; GENERIC-NEXT:    lzcntl %edi, %eax # sched: [3:1.00]
-; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_ctlz_i32:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    lzcntl (%rsi), %ecx # sched: [8:1.00]
-; HASWELL-NEXT:    lzcntl %edi, %eax # sched: [3:1.00]
-; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ctlz_i32:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    lzcntl (%rsi), %ecx # sched: [8:1.00]
-; BROADWELL-NEXT:    lzcntl %edi, %eax # sched: [3:1.00]
-; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ctlz_i32:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    lzcntl (%rsi), %ecx # sched: [8:1.00]
-; SKYLAKE-NEXT:    lzcntl %edi, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_ctlz_i32:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    lzcntl (%rsi), %ecx # sched: [4:1.00]
-; BTVER2-NEXT:    lzcntl %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ctlz_i32:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    lzcntl (%rsi), %ecx # sched: [6:0.50]
-; ZNVER1-NEXT:    lzcntl %edi, %eax # sched: [2:0.25]
-; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i32, i32 *%a1
-  %2 = tail call i32 @llvm.ctlz.i32( i32 %1, i1 false )
-  %3 = tail call i32 @llvm.ctlz.i32( i32 %a0, i1 false )
-  %4 = or i32 %2, %3
-  ret i32 %4
-}
-declare i32 @llvm.ctlz.i32(i32, i1)
-
-define i64 @test_ctlz_i64(i64 %a0, i64 *%a1) {
-; GENERIC-LABEL: test_ctlz_i64:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    lzcntq (%rsi), %rcx # sched: [7:1.00]
-; GENERIC-NEXT:    lzcntq %rdi, %rax # sched: [3:1.00]
-; GENERIC-NEXT:    orq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_ctlz_i64:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    lzcntq (%rsi), %rcx # sched: [8:1.00]
-; HASWELL-NEXT:    lzcntq %rdi, %rax # sched: [3:1.00]
-; HASWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ctlz_i64:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    lzcntq (%rsi), %rcx # sched: [8:1.00]
-; BROADWELL-NEXT:    lzcntq %rdi, %rax # sched: [3:1.00]
-; BROADWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ctlz_i64:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    lzcntq (%rsi), %rcx # sched: [8:1.00]
-; SKYLAKE-NEXT:    lzcntq %rdi, %rax # sched: [3:1.00]
-; SKYLAKE-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_ctlz_i64:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    lzcntq (%rsi), %rcx # sched: [4:1.00]
-; BTVER2-NEXT:    lzcntq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    orq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ctlz_i64:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    lzcntq (%rsi), %rcx # sched: [6:0.50]
-; ZNVER1-NEXT:    lzcntq %rdi, %rax # sched: [2:0.25]
-; ZNVER1-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i64, i64 *%a1
-  %2 = tail call i64 @llvm.ctlz.i64( i64 %1, i1 false )
-  %3 = tail call i64 @llvm.ctlz.i64( i64 %a0, i1 false )
-  %4 = or i64 %2, %3
-  ret i64 %4
-}
-declare i64 @llvm.ctlz.i64(i64, i1)
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell   | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake   | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl       | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2    | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1    | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+
+define i16 @test_ctlz_i16(i16 zeroext %a0, i16 *%a1) {
+; GENERIC-LABEL: test_ctlz_i16:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    lzcntw (%rsi), %cx # sched: [8:1.00]
+; GENERIC-NEXT:    lzcntw %di, %ax # sched: [3:1.00]
+; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_ctlz_i16:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    lzcntw (%rsi), %cx # sched: [8:1.00]
+; HASWELL-NEXT:    lzcntw %di, %ax # sched: [3:1.00]
+; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    # kill: def $ax killed $ax killed $eax
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_ctlz_i16:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    lzcntw (%rsi), %cx # sched: [8:1.00]
+; BROADWELL-NEXT:    lzcntw %di, %ax # sched: [3:1.00]
+; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    # kill: def $ax killed $ax killed $eax
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_ctlz_i16:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    lzcntw (%rsi), %cx # sched: [8:1.00]
+; SKYLAKE-NEXT:    lzcntw %di, %ax # sched: [3:1.00]
+; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    # kill: def $ax killed $ax killed $eax
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_ctlz_i16:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    lzcntw (%rsi), %cx # sched: [4:1.00]
+; BTVER2-NEXT:    lzcntw %di, %ax # sched: [1:0.50]
+; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    # kill: def $ax killed $ax killed $eax
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_ctlz_i16:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    lzcntw (%rsi), %cx # sched: [6:0.50]
+; ZNVER1-NEXT:    lzcntw %di, %ax # sched: [2:0.25]
+; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    # kill: def $ax killed $ax killed $eax
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i16, i16 *%a1
+  %2 = tail call i16 @llvm.ctlz.i16( i16 %1, i1 false )
+  %3 = tail call i16 @llvm.ctlz.i16( i16 %a0, i1 false )
+  %4 = or i16 %2, %3
+  ret i16 %4
+}
+declare i16 @llvm.ctlz.i16(i16, i1)
+
+define i32 @test_ctlz_i32(i32 %a0, i32 *%a1) {
+; GENERIC-LABEL: test_ctlz_i32:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    lzcntl (%rsi), %ecx # sched: [8:1.00]
+; GENERIC-NEXT:    lzcntl %edi, %eax # sched: [3:1.00]
+; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_ctlz_i32:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    lzcntl (%rsi), %ecx # sched: [8:1.00]
+; HASWELL-NEXT:    lzcntl %edi, %eax # sched: [3:1.00]
+; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_ctlz_i32:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    lzcntl (%rsi), %ecx # sched: [8:1.00]
+; BROADWELL-NEXT:    lzcntl %edi, %eax # sched: [3:1.00]
+; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_ctlz_i32:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    lzcntl (%rsi), %ecx # sched: [8:1.00]
+; SKYLAKE-NEXT:    lzcntl %edi, %eax # sched: [3:1.00]
+; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_ctlz_i32:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    lzcntl (%rsi), %ecx # sched: [4:1.00]
+; BTVER2-NEXT:    lzcntl %edi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_ctlz_i32:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    lzcntl (%rsi), %ecx # sched: [6:0.50]
+; ZNVER1-NEXT:    lzcntl %edi, %eax # sched: [2:0.25]
+; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i32, i32 *%a1
+  %2 = tail call i32 @llvm.ctlz.i32( i32 %1, i1 false )
+  %3 = tail call i32 @llvm.ctlz.i32( i32 %a0, i1 false )
+  %4 = or i32 %2, %3
+  ret i32 %4
+}
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+define i64 @test_ctlz_i64(i64 %a0, i64 *%a1) {
+; GENERIC-LABEL: test_ctlz_i64:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    lzcntq (%rsi), %rcx # sched: [8:1.00]
+; GENERIC-NEXT:    lzcntq %rdi, %rax # sched: [3:1.00]
+; GENERIC-NEXT:    orq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_ctlz_i64:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    lzcntq (%rsi), %rcx # sched: [8:1.00]
+; HASWELL-NEXT:    lzcntq %rdi, %rax # sched: [3:1.00]
+; HASWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_ctlz_i64:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    lzcntq (%rsi), %rcx # sched: [8:1.00]
+; BROADWELL-NEXT:    lzcntq %rdi, %rax # sched: [3:1.00]
+; BROADWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_ctlz_i64:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    lzcntq (%rsi), %rcx # sched: [8:1.00]
+; SKYLAKE-NEXT:    lzcntq %rdi, %rax # sched: [3:1.00]
+; SKYLAKE-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_ctlz_i64:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    lzcntq (%rsi), %rcx # sched: [4:1.00]
+; BTVER2-NEXT:    lzcntq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    orq %rcx, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_ctlz_i64:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    lzcntq (%rsi), %rcx # sched: [6:0.50]
+; ZNVER1-NEXT:    lzcntq %rdi, %rax # sched: [2:0.25]
+; ZNVER1-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i64, i64 *%a1
+  %2 = tail call i64 @llvm.ctlz.i64( i64 %1, i1 false )
+  %3 = tail call i64 @llvm.ctlz.i64( i64 %a0, i1 false )
+  %4 = or i64 %2, %3
+  ret i64 %4
+}
+declare i64 @llvm.ctlz.i64(i64, i1)

Modified: llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll?rev=328566&r1=328565&r2=328566&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll Mon Mar 26 11:19:28 2018
@@ -1,212 +1,212 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+popcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm         | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont    | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge   | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell     | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell   | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake     | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl         | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2      | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1      | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define i16 @test_ctpop_i16(i16 zeroext %a0, i16 *%a1) {
-; GENERIC-LABEL: test_ctpop_i16:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    popcntw (%rsi), %cx # sched: [9:1.00]
-; GENERIC-NEXT:    popcntw %di, %ax # sched: [3:1.00]
-; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_ctpop_i16:
-; SLM:       # %bb.0:
-; SLM-NEXT:    popcntw (%rsi), %cx # sched: [6:1.00]
-; SLM-NEXT:    popcntw %di, %ax # sched: [3:1.00]
-; SLM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT:    # kill: def $ax killed $ax killed $eax
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_ctpop_i16:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    popcntw (%rsi), %cx # sched: [9:1.00]
-; SANDY-NEXT:    popcntw %di, %ax # sched: [3:1.00]
-; SANDY-NEXT:    orl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT:    # kill: def $ax killed $ax killed $eax
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_ctpop_i16:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    popcntw (%rsi), %cx # sched: [8:1.00]
-; HASWELL-NEXT:    popcntw %di, %ax # sched: [3:1.00]
-; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    # kill: def $ax killed $ax killed $eax
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ctpop_i16:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    popcntw (%rsi), %cx # sched: [8:1.00]
-; BROADWELL-NEXT:    popcntw %di, %ax # sched: [3:1.00]
-; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    # kill: def $ax killed $ax killed $eax
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ctpop_i16:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    popcntw (%rsi), %cx # sched: [8:1.00]
-; SKYLAKE-NEXT:    popcntw %di, %ax # sched: [3:1.00]
-; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    # kill: def $ax killed $ax killed $eax
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_ctpop_i16:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    popcntw (%rsi), %cx # sched: [4:1.00]
-; BTVER2-NEXT:    popcntw %di, %ax # sched: [1:0.50]
-; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    # kill: def $ax killed $ax killed $eax
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ctpop_i16:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    popcntw (%rsi), %cx # sched: [10:1.00]
-; ZNVER1-NEXT:    popcntw %di, %ax # sched: [3:1.00]
-; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    # kill: def $ax killed $ax killed $eax
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i16, i16 *%a1
-  %2 = tail call i16 @llvm.ctpop.i16( i16 %1 )
-  %3 = tail call i16 @llvm.ctpop.i16( i16 %a0 )
-  %4 = or i16 %2, %3
-  ret i16 %4
-}
-declare i16 @llvm.ctpop.i16(i16)
-
-define i32 @test_ctpop_i32(i32 %a0, i32 *%a1) {
-; GENERIC-LABEL: test_ctpop_i32:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    popcntl (%rsi), %ecx # sched: [9:1.00]
-; GENERIC-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
-; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_ctpop_i32:
-; SLM:       # %bb.0:
-; SLM-NEXT:    popcntl (%rsi), %ecx # sched: [6:1.00]
-; SLM-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
-; SLM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_ctpop_i32:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    popcntl (%rsi), %ecx # sched: [9:1.00]
-; SANDY-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
-; SANDY-NEXT:    orl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_ctpop_i32:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    popcntl (%rsi), %ecx # sched: [8:1.00]
-; HASWELL-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
-; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ctpop_i32:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    popcntl (%rsi), %ecx # sched: [8:1.00]
-; BROADWELL-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
-; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ctpop_i32:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    popcntl (%rsi), %ecx # sched: [8:1.00]
-; SKYLAKE-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_ctpop_i32:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    popcntl (%rsi), %ecx # sched: [4:1.00]
-; BTVER2-NEXT:    popcntl %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ctpop_i32:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    popcntl (%rsi), %ecx # sched: [10:1.00]
-; ZNVER1-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
-; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i32, i32 *%a1
-  %2 = tail call i32 @llvm.ctpop.i32( i32 %1 )
-  %3 = tail call i32 @llvm.ctpop.i32( i32 %a0 )
-  %4 = or i32 %2, %3
-  ret i32 %4
-}
-declare i32 @llvm.ctpop.i32(i32)
-
-define i64 @test_ctpop_i64(i64 %a0, i64 *%a1) {
-; GENERIC-LABEL: test_ctpop_i64:
-; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    popcntq (%rsi), %rcx # sched: [9:1.00]
-; GENERIC-NEXT:    popcntq %rdi, %rax # sched: [3:1.00]
-; GENERIC-NEXT:    orq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT:    retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_ctpop_i64:
-; SLM:       # %bb.0:
-; SLM-NEXT:    popcntq (%rsi), %rcx # sched: [6:1.00]
-; SLM-NEXT:    popcntq %rdi, %rax # sched: [3:1.00]
-; SLM-NEXT:    orq %rcx, %rax # sched: [1:0.50]
-; SLM-NEXT:    retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_ctpop_i64:
-; SANDY:       # %bb.0:
-; SANDY-NEXT:    popcntq (%rsi), %rcx # sched: [9:1.00]
-; SANDY-NEXT:    popcntq %rdi, %rax # sched: [3:1.00]
-; SANDY-NEXT:    orq %rcx, %rax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_ctpop_i64:
-; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    popcntq (%rsi), %rcx # sched: [8:1.00]
-; HASWELL-NEXT:    popcntq %rdi, %rax # sched: [3:1.00]
-; HASWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT:    retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ctpop_i64:
-; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    popcntq (%rsi), %rcx # sched: [8:1.00]
-; BROADWELL-NEXT:    popcntq %rdi, %rax # sched: [3:1.00]
-; BROADWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ctpop_i64:
-; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    popcntq (%rsi), %rcx # sched: [8:1.00]
-; SKYLAKE-NEXT:    popcntq %rdi, %rax # sched: [3:1.00]
-; SKYLAKE-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT:    retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_ctpop_i64:
-; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    popcntq (%rsi), %rcx # sched: [4:1.00]
-; BTVER2-NEXT:    popcntq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    orq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ctpop_i64:
-; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    popcntq (%rsi), %rcx # sched: [10:1.00]
-; ZNVER1-NEXT:    popcntq %rdi, %rax # sched: [3:1.00]
-; ZNVER1-NEXT:    orq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = load i64, i64 *%a1
-  %2 = tail call i64 @llvm.ctpop.i64( i64 %1 )
-  %3 = tail call i64 @llvm.ctpop.i64( i64 %a0 )
-  %4 = or i64 %2, %3
-  ret i64 %4
-}
-declare i64 @llvm.ctpop.i64(i64)
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+popcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm         | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont    | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge   | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell     | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell   | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake     | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl         | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2      | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1      | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+
+define i16 @test_ctpop_i16(i16 zeroext %a0, i16 *%a1) {
+; GENERIC-LABEL: test_ctpop_i16:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    popcntw (%rsi), %cx # sched: [9:1.00]
+; GENERIC-NEXT:    popcntw %di, %ax # sched: [3:1.00]
+; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; SLM-LABEL: test_ctpop_i16:
+; SLM:       # %bb.0:
+; SLM-NEXT:    popcntw (%rsi), %cx # sched: [6:1.00]
+; SLM-NEXT:    popcntw %di, %ax # sched: [3:1.00]
+; SLM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
+; SLM-NEXT:    # kill: def $ax killed $ax killed $eax
+; SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_ctpop_i16:
+; SANDY:       # %bb.0:
+; SANDY-NEXT:    popcntw (%rsi), %cx # sched: [9:1.00]
+; SANDY-NEXT:    popcntw %di, %ax # sched: [3:1.00]
+; SANDY-NEXT:    orl %ecx, %eax # sched: [1:0.33]
+; SANDY-NEXT:    # kill: def $ax killed $ax killed $eax
+; SANDY-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_ctpop_i16:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    popcntw (%rsi), %cx # sched: [8:1.00]
+; HASWELL-NEXT:    popcntw %di, %ax # sched: [3:1.00]
+; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    # kill: def $ax killed $ax killed $eax
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_ctpop_i16:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    popcntw (%rsi), %cx # sched: [8:1.00]
+; BROADWELL-NEXT:    popcntw %di, %ax # sched: [3:1.00]
+; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    # kill: def $ax killed $ax killed $eax
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_ctpop_i16:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    popcntw (%rsi), %cx # sched: [8:1.00]
+; SKYLAKE-NEXT:    popcntw %di, %ax # sched: [3:1.00]
+; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    # kill: def $ax killed $ax killed $eax
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_ctpop_i16:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    popcntw (%rsi), %cx # sched: [4:1.00]
+; BTVER2-NEXT:    popcntw %di, %ax # sched: [1:0.50]
+; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    # kill: def $ax killed $ax killed $eax
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_ctpop_i16:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    popcntw (%rsi), %cx # sched: [5:0.50]
+; ZNVER1-NEXT:    popcntw %di, %ax # sched: [1:0.25]
+; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    # kill: def $ax killed $ax killed $eax
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i16, i16 *%a1
+  %2 = tail call i16 @llvm.ctpop.i16( i16 %1 )
+  %3 = tail call i16 @llvm.ctpop.i16( i16 %a0 )
+  %4 = or i16 %2, %3
+  ret i16 %4
+}
+declare i16 @llvm.ctpop.i16(i16)
+
+define i32 @test_ctpop_i32(i32 %a0, i32 *%a1) {
+; GENERIC-LABEL: test_ctpop_i32:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    popcntl (%rsi), %ecx # sched: [9:1.00]
+; GENERIC-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
+; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; SLM-LABEL: test_ctpop_i32:
+; SLM:       # %bb.0:
+; SLM-NEXT:    popcntl (%rsi), %ecx # sched: [6:1.00]
+; SLM-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
+; SLM-NEXT:    orl %ecx, %eax # sched: [1:0.50]
+; SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_ctpop_i32:
+; SANDY:       # %bb.0:
+; SANDY-NEXT:    popcntl (%rsi), %ecx # sched: [9:1.00]
+; SANDY-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
+; SANDY-NEXT:    orl %ecx, %eax # sched: [1:0.33]
+; SANDY-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_ctpop_i32:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    popcntl (%rsi), %ecx # sched: [8:1.00]
+; HASWELL-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
+; HASWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_ctpop_i32:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    popcntl (%rsi), %ecx # sched: [8:1.00]
+; BROADWELL-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
+; BROADWELL-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_ctpop_i32:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    popcntl (%rsi), %ecx # sched: [8:1.00]
+; SKYLAKE-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
+; SKYLAKE-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_ctpop_i32:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    popcntl (%rsi), %ecx # sched: [4:1.00]
+; BTVER2-NEXT:    popcntl %edi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    orl %ecx, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_ctpop_i32:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    popcntl (%rsi), %ecx # sched: [5:0.50]
+; ZNVER1-NEXT:    popcntl %edi, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i32, i32 *%a1
+  %2 = tail call i32 @llvm.ctpop.i32( i32 %1 )
+  %3 = tail call i32 @llvm.ctpop.i32( i32 %a0 )
+  %4 = or i32 %2, %3
+  ret i32 %4
+}
+declare i32 @llvm.ctpop.i32(i32)
+
+define i64 @test_ctpop_i64(i64 %a0, i64 *%a1) {
+; GENERIC-LABEL: test_ctpop_i64:
+; GENERIC:       # %bb.0:
+; GENERIC-NEXT:    popcntq (%rsi), %rcx # sched: [9:1.00]
+; GENERIC-NEXT:    popcntq %rdi, %rax # sched: [3:1.00]
+; GENERIC-NEXT:    orq %rcx, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    retq # sched: [1:1.00]
+;
+; SLM-LABEL: test_ctpop_i64:
+; SLM:       # %bb.0:
+; SLM-NEXT:    popcntq (%rsi), %rcx # sched: [6:1.00]
+; SLM-NEXT:    popcntq %rdi, %rax # sched: [3:1.00]
+; SLM-NEXT:    orq %rcx, %rax # sched: [1:0.50]
+; SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SANDY-LABEL: test_ctpop_i64:
+; SANDY:       # %bb.0:
+; SANDY-NEXT:    popcntq (%rsi), %rcx # sched: [9:1.00]
+; SANDY-NEXT:    popcntq %rdi, %rax # sched: [3:1.00]
+; SANDY-NEXT:    orq %rcx, %rax # sched: [1:0.33]
+; SANDY-NEXT:    retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_ctpop_i64:
+; HASWELL:       # %bb.0:
+; HASWELL-NEXT:    popcntq (%rsi), %rcx # sched: [8:1.00]
+; HASWELL-NEXT:    popcntq %rdi, %rax # sched: [3:1.00]
+; HASWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; HASWELL-NEXT:    retq # sched: [7:1.00]
+;
+; BROADWELL-LABEL: test_ctpop_i64:
+; BROADWELL:       # %bb.0:
+; BROADWELL-NEXT:    popcntq (%rsi), %rcx # sched: [8:1.00]
+; BROADWELL-NEXT:    popcntq %rdi, %rax # sched: [3:1.00]
+; BROADWELL-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_ctpop_i64:
+; SKYLAKE:       # %bb.0:
+; SKYLAKE-NEXT:    popcntq (%rsi), %rcx # sched: [8:1.00]
+; SKYLAKE-NEXT:    popcntq %rdi, %rax # sched: [3:1.00]
+; SKYLAKE-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-NEXT:    retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_ctpop_i64:
+; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    popcntq (%rsi), %rcx # sched: [4:1.00]
+; BTVER2-NEXT:    popcntq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    orq %rcx, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_ctpop_i64:
+; ZNVER1:       # %bb.0:
+; ZNVER1-NEXT:    popcntq (%rsi), %rcx # sched: [5:0.50]
+; ZNVER1-NEXT:    popcntq %rdi, %rax # sched: [1:0.25]
+; ZNVER1-NEXT:    orq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-NEXT:    retq # sched: [1:0.50]
+  %1 = load i64, i64 *%a1
+  %2 = tail call i64 @llvm.ctpop.i64( i64 %1 )
+  %3 = tail call i64 @llvm.ctpop.i64( i64 %a0 )
+  %4 = or i64 %2, %3
+  ret i64 %4
+}
+declare i64 @llvm.ctpop.i64(i64)




More information about the llvm-commits mailing list