[llvm] r334303 - [X86][BtVer2] Add support for all SUB/XOR 32/64 scalar instructions that should match the dependency-breaking 'zero-idiom'

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 8 10:00:46 PDT 2018


Author: rksimon
Date: Fri Jun  8 10:00:45 2018
New Revision: 334303

URL: http://llvm.org/viewvc/llvm-project?rev=334303&view=rev
Log:
[X86][BtVer2] Add support for all SUB/XOR 32/64 scalar instructions that should match the dependency-breaking 'zero-idiom'

As detailed on Agner's Microarchitecture doc (21.8 AMD Bobcat and Jaguar pipeline - Dependency-breaking instructions), these instructions are dependency breaking and fast-path zero the destination register (and appropriate EFLAGS bits).

Modified:
    llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
    llvm/trunk/test/CodeGen/X86/avx-schedule.ll
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s

Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=334303&r1=334302&r2=334303&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Fri Jun  8 10:00:45 2018
@@ -555,7 +555,7 @@ def JWriteZeroLatency : SchedWriteRes<[]
   let Latency = 0;
 }
 
-// Certain vector instructions that use the same register for both source 
+// Certain instructions that use the same register for both source 
 // operands do not have a real dependency on the previous contents of the
 // register, and thus, do not have to wait before completing. They can be
 // optimized out at register renaming stage.
@@ -564,6 +564,13 @@ def JWriteZeroLatency : SchedWriteRes<[]
 // Reference: Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
 // Section 21.8 [Dependency-breaking instructions].
 
+def JWriteZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
+    SchedVar<MCSchedPredicate<TruePred>,           [WriteALU]>
+]>;
+def : InstRW<[JWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+                                        XOR32rr, XOR64rr)>;
+
 def JWriteFZeroIdiom : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
     SchedVar<MCSchedPredicate<TruePred>,           [WriteFLogic]>

Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=334303&r1=334302&r2=334303&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Fri Jun  8 10:00:45 2018
@@ -4691,7 +4691,7 @@ define i32 @test_testpd(<2 x double> %a0
 ;
 ; BTVER2-LABEL: test_testpd:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    xorl %eax, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    xorl %eax, %eax # sched: [0:0.50]
 ; BTVER2-NEXT:    vtestpd %xmm1, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    setb %al # sched: [1:0.50]
 ; BTVER2-NEXT:    vtestpd (%rdi), %xmm0 # sched: [8:1.00]
@@ -4777,7 +4777,7 @@ define i32 @test_testpd_ymm(<4 x double>
 ;
 ; BTVER2-LABEL: test_testpd_ymm:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    xorl %eax, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    xorl %eax, %eax # sched: [0:0.50]
 ; BTVER2-NEXT:    vtestpd %ymm1, %ymm0 # sched: [4:2.00]
 ; BTVER2-NEXT:    setb %al # sched: [1:0.50]
 ; BTVER2-NEXT:    vtestpd (%rdi), %ymm0 # sched: [9:2.00]
@@ -4858,7 +4858,7 @@ define i32 @test_testps(<4 x float> %a0,
 ;
 ; BTVER2-LABEL: test_testps:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    xorl %eax, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    xorl %eax, %eax # sched: [0:0.50]
 ; BTVER2-NEXT:    vtestps %xmm1, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    setb %al # sched: [1:0.50]
 ; BTVER2-NEXT:    vtestps (%rdi), %xmm0 # sched: [8:1.00]
@@ -4944,7 +4944,7 @@ define i32 @test_testps_ymm(<8 x float>
 ;
 ; BTVER2-LABEL: test_testps_ymm:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    xorl %eax, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    xorl %eax, %eax # sched: [0:0.50]
 ; BTVER2-NEXT:    vtestps %ymm1, %ymm0 # sched: [4:2.00]
 ; BTVER2-NEXT:    setb %al # sched: [1:0.50]
 ; BTVER2-NEXT:    vtestps (%rdi), %ymm0 # sched: [9:2.00]

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s?rev=334303&r1=334302&r2=334303&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s Fri Jun  8 10:00:45 2018
@@ -67,9 +67,9 @@ vpxor  %xmm3, %xmm3, %xmm5
 
 # CHECK:      Iterations:        1
 # CHECK-NEXT: Instructions:      55
-# CHECK-NEXT: Total Cycles:      32
+# CHECK-NEXT: Total Cycles:      29
 # CHECK-NEXT: Dispatch Width:    2
-# CHECK-NEXT: IPC:               1.72
+# CHECK-NEXT: IPC:               1.90
 # CHECK-NEXT: Block RThroughput: 27.5
 
 # CHECK:      Instruction Info:
@@ -81,10 +81,10 @@ vpxor  %xmm3, %xmm3, %xmm5
 # CHECK-NEXT: [6]: HasSideEffects
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      1     0.50                        subl	%eax, %eax
-# CHECK-NEXT:  1      1     0.50                        subq	%rax, %rax
-# CHECK-NEXT:  1      1     0.50                        xorl	%eax, %eax
-# CHECK-NEXT:  1      1     0.50                        xorq	%rax, %rax
+# CHECK-NEXT:  1      0     0.50                        subl	%eax, %eax
+# CHECK-NEXT:  1      0     0.50                        subq	%rax, %rax
+# CHECK-NEXT:  1      0     0.50                        xorl	%eax, %eax
+# CHECK-NEXT:  1      0     0.50                        xorq	%rax, %rax
 # CHECK-NEXT:  1      0     0.50                        pcmpgtb	%mm2, %mm2
 # CHECK-NEXT:  1      0     0.50                        pcmpgtd	%mm2, %mm2
 # CHECK-NEXT:  1      0     0.50                        pcmpgtw	%mm2, %mm2
@@ -138,8 +138,8 @@ vpxor  %xmm3, %xmm3, %xmm5
 # CHECK-NEXT:  1      0     0.50                        vpxor	%xmm3, %xmm3, %xmm5
 
 # CHECK:      Register File statistics:
-# CHECK-NEXT: Total number of mappings created:    8
-# CHECK-NEXT: Max number of mappings used:         8
+# CHECK-NEXT: Total number of mappings created:    0
+# CHECK-NEXT: Max number of mappings used:         0
 
 # CHECK:      *  Register File #1 -- JFpuPRF:
 # CHECK-NEXT:    Number of physical registers:     72
@@ -148,8 +148,8 @@ vpxor  %xmm3, %xmm3, %xmm5
 
 # CHECK:      *  Register File #2 -- JIntegerPRF:
 # CHECK-NEXT:    Number of physical registers:     64
-# CHECK-NEXT:    Total number of mappings created: 8
-# CHECK-NEXT:    Max number of mappings used:      8
+# CHECK-NEXT:    Total number of mappings created: 0
+# CHECK-NEXT:    Max number of mappings used:      0
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - JALU0
@@ -169,14 +169,14 @@ vpxor  %xmm3, %xmm3, %xmm5
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
-# CHECK-NEXT: 2.00   2.00    -      -      -      -      -      -      -      -      -      -      -      -
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   Instructions:
-# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -     subl	%eax, %eax
-# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     subq	%rax, %rax
-# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -     xorl	%eax, %eax
-# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     xorq	%rax, %rax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     subl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     subq	%rax, %rax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     xorl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     xorq	%rax, %rax
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     pcmpgtb	%mm2, %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     pcmpgtd	%mm2, %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     pcmpgtw	%mm2, %mm2
@@ -230,64 +230,64 @@ vpxor  %xmm3, %xmm3, %xmm5
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm5
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456789          01
-# CHECK-NEXT: Index     0123456789          0123456789
+# CHECK-NEXT:                     0123456789
+# CHECK-NEXT: Index     0123456789          012345678
 
-# CHECK:      [0,0]     DeER .    .    .    .    .    ..   subl	%eax, %eax
-# CHECK-NEXT: [0,1]     D=eER.    .    .    .    .    ..   subq	%rax, %rax
-# CHECK-NEXT: [0,2]     .D=eER    .    .    .    .    ..   xorl	%eax, %eax
-# CHECK-NEXT: [0,3]     .D==eER   .    .    .    .    ..   xorq	%rax, %rax
-# CHECK-NEXT: [0,4]     . D---R   .    .    .    .    ..   pcmpgtb	%mm2, %mm2
-# CHECK-NEXT: [0,5]     . D----R  .    .    .    .    ..   pcmpgtd	%mm2, %mm2
-# CHECK-NEXT: [0,6]     .  D---R  .    .    .    .    ..   pcmpgtw	%mm2, %mm2
-# CHECK-NEXT: [0,7]     .  D----R .    .    .    .    ..   pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT: [0,8]     .   D---R .    .    .    .    ..   pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT: [0,9]     .   D----R.    .    .    .    ..   pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT: [0,10]    .    D---R.    .    .    .    ..   pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT: [0,11]    .    D----R    .    .    .    ..   vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,12]    .    .D---R    .    .    .    ..   vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,13]    .    .D----R   .    .    .    ..   vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,14]    .    . D---R   .    .    .    ..   vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,15]    .    . D----R  .    .    .    ..   vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,16]    .    .  D---R  .    .    .    ..   vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,17]    .    .  D----R .    .    .    ..   vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,18]    .    .   D---R .    .    .    ..   vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,19]    .    .   D----R.    .    .    ..   psubb	%mm2, %mm2
-# CHECK-NEXT: [0,20]    .    .    D---R.    .    .    ..   psubd	%mm2, %mm2
-# CHECK-NEXT: [0,21]    .    .    D----R    .    .    ..   psubq	%mm2, %mm2
-# CHECK-NEXT: [0,22]    .    .    .D---R    .    .    ..   psubw	%mm2, %mm2
-# CHECK-NEXT: [0,23]    .    .    .D----R   .    .    ..   psubb	%xmm2, %xmm2
-# CHECK-NEXT: [0,24]    .    .    . D---R   .    .    ..   psubd	%xmm2, %xmm2
-# CHECK-NEXT: [0,25]    .    .    . D----R  .    .    ..   psubq	%xmm2, %xmm2
-# CHECK-NEXT: [0,26]    .    .    .  D---R  .    .    ..   psubw	%xmm2, %xmm2
-# CHECK-NEXT: [0,27]    .    .    .  D----R .    .    ..   vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,28]    .    .    .   D---R .    .    ..   vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,29]    .    .    .   D----R.    .    ..   vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,30]    .    .    .    D---R.    .    ..   vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,31]    .    .    .    D----R    .    ..   vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,32]    .    .    .    .D---R    .    ..   vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,33]    .    .    .    .D----R   .    ..   vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,34]    .    .    .    . D---R   .    ..   vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,35]    .    .    .    . D----R  .    ..   andnps	%xmm0, %xmm0
-# CHECK-NEXT: [0,36]    .    .    .    .  D---R  .    ..   andnpd	%xmm1, %xmm1
-# CHECK-NEXT: [0,37]    .    .    .    .  D----R .    ..   vandnps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,38]    .    .    .    .   D---R .    ..   vandnpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,39]    .    .    .    .   D----R.    ..   pandn	%mm2, %mm2
-# CHECK-NEXT: [0,40]    .    .    .    .    D---R.    ..   pandn	%xmm2, %xmm2
-# CHECK-NEXT: [0,41]    .    .    .    .    D----R    ..   vpandn	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,42]    .    .    .    .    .D---R    ..   vandnps	%xmm2, %xmm2, %xmm5
-# CHECK-NEXT: [0,43]    .    .    .    .    .D----R   ..   vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT: [0,44]    .    .    .    .    . D---R   ..   vpandn	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,45]    .    .    .    .    . D----R  ..   xorps	%xmm0, %xmm0
-# CHECK-NEXT: [0,46]    .    .    .    .    .  D---R  ..   xorpd	%xmm1, %xmm1
-# CHECK-NEXT: [0,47]    .    .    .    .    .  D----R ..   vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,48]    .    .    .    .    .   D---R ..   vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,49]    .    .    .    .    .   D----R..   pxor	%mm2, %mm2
-# CHECK-NEXT: [0,50]    .    .    .    .    .    D---R..   pxor	%xmm2, %xmm2
-# CHECK-NEXT: [0,51]    .    .    .    .    .    D----R.   vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,52]    .    .    .    .    .    .D---R.   vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT: [0,53]    .    .    .    .    .    .D----R   vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT: [0,54]    .    .    .    .    .    . D---R   vpxor	%xmm3, %xmm3, %xmm5
+# CHECK:      [0,0]     DR   .    .    .    .    .  .   subl	%eax, %eax
+# CHECK-NEXT: [0,1]     DR   .    .    .    .    .  .   subq	%rax, %rax
+# CHECK-NEXT: [0,2]     .DR  .    .    .    .    .  .   xorl	%eax, %eax
+# CHECK-NEXT: [0,3]     .DR  .    .    .    .    .  .   xorq	%rax, %rax
+# CHECK-NEXT: [0,4]     . DR .    .    .    .    .  .   pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: [0,5]     . DR .    .    .    .    .  .   pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: [0,6]     .  DR.    .    .    .    .  .   pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: [0,7]     .  DR.    .    .    .    .  .   pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: [0,8]     .   DR    .    .    .    .  .   pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: [0,9]     .   DR    .    .    .    .  .   pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: [0,10]    .    DR   .    .    .    .  .   pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: [0,11]    .    DR   .    .    .    .  .   vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,12]    .    .DR  .    .    .    .  .   vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,13]    .    .DR  .    .    .    .  .   vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,14]    .    . DR .    .    .    .  .   vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,15]    .    . DR .    .    .    .  .   vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,16]    .    .  DR.    .    .    .  .   vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,17]    .    .  DR.    .    .    .  .   vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,18]    .    .   DR    .    .    .  .   vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,19]    .    .   DR    .    .    .  .   psubb	%mm2, %mm2
+# CHECK-NEXT: [0,20]    .    .    DR   .    .    .  .   psubd	%mm2, %mm2
+# CHECK-NEXT: [0,21]    .    .    DR   .    .    .  .   psubq	%mm2, %mm2
+# CHECK-NEXT: [0,22]    .    .    .DR  .    .    .  .   psubw	%mm2, %mm2
+# CHECK-NEXT: [0,23]    .    .    .DR  .    .    .  .   psubb	%xmm2, %xmm2
+# CHECK-NEXT: [0,24]    .    .    . DR .    .    .  .   psubd	%xmm2, %xmm2
+# CHECK-NEXT: [0,25]    .    .    . DR .    .    .  .   psubq	%xmm2, %xmm2
+# CHECK-NEXT: [0,26]    .    .    .  DR.    .    .  .   psubw	%xmm2, %xmm2
+# CHECK-NEXT: [0,27]    .    .    .  DR.    .    .  .   vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,28]    .    .    .   DR    .    .  .   vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,29]    .    .    .   DR    .    .  .   vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,30]    .    .    .    DR   .    .  .   vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,31]    .    .    .    DR   .    .  .   vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,32]    .    .    .    .DR  .    .  .   vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,33]    .    .    .    .DR  .    .  .   vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,34]    .    .    .    . DR .    .  .   vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,35]    .    .    .    . DR .    .  .   andnps	%xmm0, %xmm0
+# CHECK-NEXT: [0,36]    .    .    .    .  DR.    .  .   andnpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,37]    .    .    .    .  DR.    .  .   vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,38]    .    .    .    .   DR    .  .   vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,39]    .    .    .    .   DR    .  .   pandn	%mm2, %mm2
+# CHECK-NEXT: [0,40]    .    .    .    .    DR   .  .   pandn	%xmm2, %xmm2
+# CHECK-NEXT: [0,41]    .    .    .    .    DR   .  .   vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,42]    .    .    .    .    .DR  .  .   vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: [0,43]    .    .    .    .    .DR  .  .   vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: [0,44]    .    .    .    .    . DR .  .   vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,45]    .    .    .    .    . DR .  .   xorps	%xmm0, %xmm0
+# CHECK-NEXT: [0,46]    .    .    .    .    .  DR.  .   xorpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,47]    .    .    .    .    .  DR.  .   vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,48]    .    .    .    .    .   DR  .   vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,49]    .    .    .    .    .   DR  .   pxor	%mm2, %mm2
+# CHECK-NEXT: [0,50]    .    .    .    .    .    DR .   pxor	%xmm2, %xmm2
+# CHECK-NEXT: [0,51]    .    .    .    .    .    DR .   vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,52]    .    .    .    .    .    .DR.   vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: [0,53]    .    .    .    .    .    .DR.   vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: [0,54]    .    .    .    .    .    . DR   vpxor	%xmm3, %xmm3, %xmm5
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -296,58 +296,58 @@ vpxor  %xmm3, %xmm3, %xmm5
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     1     1.0    1.0    0.0       subl	%eax, %eax
-# CHECK-NEXT: 1.     1     2.0    0.0    0.0       subq	%rax, %rax
-# CHECK-NEXT: 2.     1     2.0    0.0    0.0       xorl	%eax, %eax
-# CHECK-NEXT: 3.     1     3.0    0.0    0.0       xorq	%rax, %rax
-# CHECK-NEXT: 4.     1     0.0    0.0    3.0       pcmpgtb	%mm2, %mm2
-# CHECK-NEXT: 5.     1     0.0    0.0    4.0       pcmpgtd	%mm2, %mm2
-# CHECK-NEXT: 6.     1     0.0    0.0    3.0       pcmpgtw	%mm2, %mm2
-# CHECK-NEXT: 7.     1     0.0    0.0    4.0       pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT: 8.     1     0.0    0.0    3.0       pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT: 9.     1     0.0    0.0    4.0       pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT: 10.    1     0.0    0.0    3.0       pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT: 11.    1     0.0    0.0    4.0       vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 12.    1     0.0    0.0    3.0       vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 13.    1     0.0    0.0    4.0       vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 14.    1     0.0    0.0    3.0       vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 15.    1     0.0    0.0    4.0       vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 16.    1     0.0    0.0    3.0       vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 17.    1     0.0    0.0    4.0       vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 18.    1     0.0    0.0    3.0       vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 19.    1     0.0    0.0    4.0       psubb	%mm2, %mm2
-# CHECK-NEXT: 20.    1     0.0    0.0    3.0       psubd	%mm2, %mm2
-# CHECK-NEXT: 21.    1     0.0    0.0    4.0       psubq	%mm2, %mm2
-# CHECK-NEXT: 22.    1     0.0    0.0    3.0       psubw	%mm2, %mm2
-# CHECK-NEXT: 23.    1     0.0    0.0    4.0       psubb	%xmm2, %xmm2
-# CHECK-NEXT: 24.    1     0.0    0.0    3.0       psubd	%xmm2, %xmm2
-# CHECK-NEXT: 25.    1     0.0    0.0    4.0       psubq	%xmm2, %xmm2
-# CHECK-NEXT: 26.    1     0.0    0.0    3.0       psubw	%xmm2, %xmm2
-# CHECK-NEXT: 27.    1     0.0    0.0    4.0       vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 28.    1     0.0    0.0    3.0       vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 29.    1     0.0    0.0    4.0       vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 30.    1     0.0    0.0    3.0       vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 31.    1     0.0    0.0    4.0       vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 32.    1     0.0    0.0    3.0       vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 33.    1     0.0    0.0    4.0       vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 34.    1     0.0    0.0    3.0       vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 35.    1     0.0    0.0    4.0       andnps	%xmm0, %xmm0
-# CHECK-NEXT: 36.    1     0.0    0.0    3.0       andnpd	%xmm1, %xmm1
-# CHECK-NEXT: 37.    1     0.0    0.0    4.0       vandnps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: 38.    1     0.0    0.0    3.0       vandnpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: 39.    1     0.0    0.0    4.0       pandn	%mm2, %mm2
-# CHECK-NEXT: 40.    1     0.0    0.0    3.0       pandn	%xmm2, %xmm2
-# CHECK-NEXT: 41.    1     0.0    0.0    4.0       vpandn	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 42.    1     0.0    0.0    3.0       vandnps	%xmm2, %xmm2, %xmm5
-# CHECK-NEXT: 43.    1     0.0    0.0    4.0       vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT: 44.    1     0.0    0.0    3.0       vpandn	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 45.    1     0.0    0.0    4.0       xorps	%xmm0, %xmm0
-# CHECK-NEXT: 46.    1     0.0    0.0    3.0       xorpd	%xmm1, %xmm1
-# CHECK-NEXT: 47.    1     0.0    0.0    4.0       vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: 48.    1     0.0    0.0    3.0       vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: 49.    1     0.0    0.0    4.0       pxor	%mm2, %mm2
-# CHECK-NEXT: 50.    1     0.0    0.0    3.0       pxor	%xmm2, %xmm2
-# CHECK-NEXT: 51.    1     0.0    0.0    4.0       vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 52.    1     0.0    0.0    3.0       vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT: 53.    1     0.0    0.0    4.0       vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT: 54.    1     0.0    0.0    3.0       vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 0.     1     0.0    0.0    0.0       subl	%eax, %eax
+# CHECK-NEXT: 1.     1     0.0    0.0    0.0       subq	%rax, %rax
+# CHECK-NEXT: 2.     1     0.0    0.0    0.0       xorl	%eax, %eax
+# CHECK-NEXT: 3.     1     0.0    0.0    0.0       xorq	%rax, %rax
+# CHECK-NEXT: 4.     1     0.0    0.0    0.0       pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: 5.     1     0.0    0.0    0.0       pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: 6.     1     0.0    0.0    0.0       pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: 7.     1     0.0    0.0    0.0       pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: 8.     1     0.0    0.0    0.0       pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: 9.     1     0.0    0.0    0.0       pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: 10.    1     0.0    0.0    0.0       pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: 11.    1     0.0    0.0    0.0       vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 12.    1     0.0    0.0    0.0       vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 13.    1     0.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 14.    1     0.0    0.0    0.0       vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 15.    1     0.0    0.0    0.0       vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 16.    1     0.0    0.0    0.0       vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 17.    1     0.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 18.    1     0.0    0.0    0.0       vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 19.    1     0.0    0.0    0.0       psubb	%mm2, %mm2
+# CHECK-NEXT: 20.    1     0.0    0.0    0.0       psubd	%mm2, %mm2
+# CHECK-NEXT: 21.    1     0.0    0.0    0.0       psubq	%mm2, %mm2
+# CHECK-NEXT: 22.    1     0.0    0.0    0.0       psubw	%mm2, %mm2
+# CHECK-NEXT: 23.    1     0.0    0.0    0.0       psubb	%xmm2, %xmm2
+# CHECK-NEXT: 24.    1     0.0    0.0    0.0       psubd	%xmm2, %xmm2
+# CHECK-NEXT: 25.    1     0.0    0.0    0.0       psubq	%xmm2, %xmm2
+# CHECK-NEXT: 26.    1     0.0    0.0    0.0       psubw	%xmm2, %xmm2
+# CHECK-NEXT: 27.    1     0.0    0.0    0.0       vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 28.    1     0.0    0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 29.    1     0.0    0.0    0.0       vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 30.    1     0.0    0.0    0.0       vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 31.    1     0.0    0.0    0.0       vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 32.    1     0.0    0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 33.    1     0.0    0.0    0.0       vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 34.    1     0.0    0.0    0.0       vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 35.    1     0.0    0.0    0.0       andnps	%xmm0, %xmm0
+# CHECK-NEXT: 36.    1     0.0    0.0    0.0       andnpd	%xmm1, %xmm1
+# CHECK-NEXT: 37.    1     0.0    0.0    0.0       vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 38.    1     0.0    0.0    0.0       vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 39.    1     0.0    0.0    0.0       pandn	%mm2, %mm2
+# CHECK-NEXT: 40.    1     0.0    0.0    0.0       pandn	%xmm2, %xmm2
+# CHECK-NEXT: 41.    1     0.0    0.0    0.0       vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 42.    1     0.0    0.0    0.0       vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: 43.    1     0.0    0.0    0.0       vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: 44.    1     0.0    0.0    0.0       vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 45.    1     0.0    0.0    0.0       xorps	%xmm0, %xmm0
+# CHECK-NEXT: 46.    1     0.0    0.0    0.0       xorpd	%xmm1, %xmm1
+# CHECK-NEXT: 47.    1     0.0    0.0    0.0       vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 48.    1     0.0    0.0    0.0       vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 49.    1     0.0    0.0    0.0       pxor	%mm2, %mm2
+# CHECK-NEXT: 50.    1     0.0    0.0    0.0       pxor	%xmm2, %xmm2
+# CHECK-NEXT: 51.    1     0.0    0.0    0.0       vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 52.    1     0.0    0.0    0.0       vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: 53.    1     0.0    0.0    0.0       vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: 54.    1     0.0    0.0    0.0       vpxor	%xmm3, %xmm3, %xmm5




More information about the llvm-commits mailing list