[llvm] r328694 - [X86][BtVer2] Fix the number of micro opcodes for a bunch of YMM instructions.

Andrea Di Biagio via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 28 03:49:33 PDT 2018


Author: adibiagio
Date: Wed Mar 28 03:49:33 2018
New Revision: 328694

URL: http://llvm.org/viewvc/llvm-project?rev=328694&view=rev
Log:
[X86][BtVer2] Fix the number of micro opcodes for a bunch of YMM instructions.

The Jaguar backend natively supports 128-bit data types. Operations on YMM
registers are split into two COPs (complex operations). Each COP consumes a slot
in the dispatch group, and in the reorder buffer.

The scheduling model for Jaguar should mark those instructions as `let
NumMicroOps = 2`.

This was found when testing AVX code for BtVer2 using llvm-mca.

Modified:
    llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s

Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=328694&r1=328693&r2=328694&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Wed Mar 28 03:49:33 2018
@@ -547,6 +547,7 @@ def : InstRW<[JWriteVDPPSYLd, ReadAfterL
 def JWriteFAddY: SchedWriteRes<[JFPU0, JFPA]> {
   let Latency = 3;
   let ResourceCycles = [2, 2];
+  let NumMicroOps = 2;
 }
 def : InstRW<[JWriteFAddY], (instrs VADDPDYrr, VADDPSYrr,
                                     VSUBPDYrr, VSUBPSYrr,
@@ -555,6 +556,7 @@ def : InstRW<[JWriteFAddY], (instrs VADD
 def JWriteFAddYLd: SchedWriteRes<[JLAGU, JFPU0, JFPA]> {
   let Latency = 8;
   let ResourceCycles = [2, 2, 2];
+  let NumMicroOps = 2;
 }
 def : InstRW<[JWriteFAddYLd, ReadAfterLd], (instrs VADDPDYrm, VADDPSYrm,
                                                    VSUBPDYrm, VSUBPSYrm,
@@ -563,36 +565,42 @@ def : InstRW<[JWriteFAddYLd, ReadAfterLd
 def JWriteFDivY: SchedWriteRes<[JFPU1, JFPM]> {
   let Latency = 38;
   let ResourceCycles = [2, 38];
+  let NumMicroOps = 2;
 }
 def : InstRW<[JWriteFDivY], (instrs VDIVPDYrr, VDIVPSYrr)>;
 
 def JWriteFDivYLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
   let Latency = 43;
   let ResourceCycles = [2, 2, 38];
+  let NumMicroOps = 2;
 }
 def : InstRW<[JWriteFDivYLd, ReadAfterLd], (instrs VDIVPDYrm, VDIVPSYrm)>;
 
 def JWriteVMULYPD: SchedWriteRes<[JFPU1, JFPM]> {
   let Latency = 4;
   let ResourceCycles = [2, 4];
+  let NumMicroOps = 2;
 }
 def : InstRW<[JWriteVMULYPD], (instrs VMULPDYrr)>;
 
 def JWriteVMULYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
   let Latency = 9;
   let ResourceCycles = [2, 2, 4];
+  let NumMicroOps = 2;
 }
 def : InstRW<[JWriteVMULYPDLd, ReadAfterLd], (instrs VMULPDYrm)>;
 
 def JWriteVMULYPS: SchedWriteRes<[JFPU1, JFPM]> {
   let Latency = 2;
   let ResourceCycles = [2, 2];
+  let NumMicroOps = 2;
 }
 def : InstRW<[JWriteVMULYPS], (instrs VMULPSYrr, VRCPPSYr, VRSQRTPSYr)>;
 
 def JWriteVMULYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
   let Latency = 7;
   let ResourceCycles = [2, 2, 2];
+  let NumMicroOps = 2;
 }
 def : InstRW<[JWriteVMULYPSLd, ReadAfterLd], (instrs VMULPSYrm, VRCPPSYm, VRSQRTPSYm)>;
 
@@ -611,6 +619,7 @@ def : InstRW<[JWriteVMULPDLd], (instrs M
 def JWriteVCVTY: SchedWriteRes<[JFPU1, JSTC]> {
   let Latency = 3;
   let ResourceCycles = [2, 2];
+  let NumMicroOps = 2;
 }
 def : InstRW<[JWriteVCVTY], (instrs VCVTDQ2PDYrr, VCVTDQ2PSYrr,
                                     VCVTPS2DQYrr, VCVTTPS2DQYrr,
@@ -619,6 +628,7 @@ def : InstRW<[JWriteVCVTY], (instrs VCVT
 def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
   let Latency = 8;
   let ResourceCycles = [2, 2, 2];
+  let NumMicroOps = 2;
 }
 def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm,
                                                    VCVTPS2DQYrm, VCVTTPS2DQYrm,
@@ -834,12 +844,14 @@ def : InstRW<[JWriteVSQRTYPDLd], (instrs
 def JWriteVSQRTYPS: SchedWriteRes<[JFPU1, JFPM]> {
   let Latency = 42;
   let ResourceCycles = [2, 42];
+  let NumMicroOps = 2;
 }
 def : InstRW<[JWriteVSQRTYPS], (instrs VSQRTPSYr)>;
 
 def JWriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
   let Latency = 47;
   let ResourceCycles = [2, 2, 42];
+  let NumMicroOps = 2;
 }
 def : InstRW<[JWriteVSQRTYPSLd], (instrs VSQRTPSYm)>;
 

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s?rev=328694&r1=328693&r2=328694&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s Wed Mar 28 03:49:33 2018
@@ -39,8 +39,8 @@ vsqrtps     %ymm0, %ymm2
 # CHECK-NEXT:  1      2     1.00                    	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      3     1.00                    	vaddps	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      21    21.00                   	vsqrtps	%xmm0, %xmm2
-# CHECK-NEXT:  1      3     2.00                    	vaddps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      42    42.00                   	vsqrtps	%ymm0, %ymm2
+# CHECK-NEXT:  2      3     2.00                    	vaddps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      42    42.00                   	vsqrtps	%ymm0, %ymm2
 
 
 # CHECK:      Resources:
@@ -87,13 +87,13 @@ vsqrtps     %ymm0, %ymm2
 # CHECK-NEXT: [0,4]	. DeeeER  .    .    .    .    .    .    .    .    .    .    .    . .	vaddps	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT: [0,5]	. DeeeeeeeeeeeeeeeeeeeeeER    .    .    .    .    .    .    .    . .	vsqrtps	%xmm0, %xmm2
 # CHECK-NEXT: [0,6]	.  DeeeE-----------------R    .    .    .    .    .    .    .    . .	vaddps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT: [0,7]	.  D====================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER	vsqrtps	%ymm0, %ymm2
+# CHECK-NEXT: [0,7]	.   D===================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER	vsqrtps	%ymm0, %ymm2
 
-# CHECK:      [1,0]	.   D=eeE----------------------------------------------------------R	vpmulld	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,1]	.   DeE------------------------------------------------------------R	vpand	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,2]	.    DeeeE---------------------------------------------------------R	vcvttps2dq	%xmm0, %xmm2
-# CHECK-NEXT: [1,3]	.    D=eeE---------------------------------------------------------R	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: [1,4]	.    .D=eeeE-------------------------------------------------------R	vaddps	%xmm0, %xmm1, %xmm2
+# CHECK:      [1,0]	.    DeeE----------------------------------------------------------R	vpmulld	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,1]	.    DeE-----------------------------------------------------------R	vpand	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,2]	.    .DeeeE--------------------------------------------------------R	vcvttps2dq	%xmm0, %xmm2
+# CHECK-NEXT: [1,3]	.    .DeeE---------------------------------------------------------R	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: [1,4]	.    . DeeeE-------------------------------------------------------R	vaddps	%xmm0, %xmm1, %xmm2
 
 
 # CHECK:      Average Wait times (based on the timeline view):
@@ -103,11 +103,11 @@ vsqrtps     %ymm0, %ymm2
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     2     1.5    1.5    29.0 	vpmulld	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1.     2     1.0    1.0    30.5 	vpand	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2.     2     1.0    1.0    28.5 	vcvttps2dq	%xmm0, %xmm2
-# CHECK-NEXT: 3.     2     1.5    1.5    29.0 	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4.     2     1.5    1.5    27.5 	vaddps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.     2     1.0    1.0    29.0 	vpmulld	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1.     2     1.0    1.0    30.0 	vpand	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2.     2     1.0    1.0    28.0 	vcvttps2dq	%xmm0, %xmm2
+# CHECK-NEXT: 3.     2     1.0    1.0    29.0 	vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4.     2     1.0    1.0    27.5 	vaddps	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 5.     1     1.0    1.0    0.0  	vsqrtps	%xmm0, %xmm2
 # CHECK-NEXT: 6.     1     1.0    1.0    17.0 	vaddps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 7.     1     21.0   21.0   0.0  	vsqrtps	%ymm0, %ymm2
+# CHECK-NEXT: 7.     1     20.0   20.0   0.0  	vsqrtps	%ymm0, %ymm2

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s?rev=328694&r1=328693&r2=328694&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s Wed Mar 28 03:49:33 2018
@@ -1008,6 +1008,701 @@ vxorps            (%rax), %ymm1, %ymm2
 vzeroall
 vzeroupper
 
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]	Instructions:
+# CHECK-NEXT:  1      3     1.00                    	vaddpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vaddpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      3     2.00                    	vaddpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     2.00    *               	vaddpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     1.00                    	vaddps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vaddps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      3     2.00                    	vaddps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     2.00    *               	vaddps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     1.00                    	vaddsd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vaddsd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vaddss	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vaddss	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vaddsubpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vaddsubpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      3     2.00                    	vaddsubpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     2.00    *               	vaddsubpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     1.00                    	vaddsubps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vaddsubps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      3     2.00                    	vaddsubps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     2.00    *               	vaddsubps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     1.00                    	vaesdec	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vaesdec	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vaesdeclast	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vaesdeclast	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vaesenc	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vaesenc	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vaesenclast	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vaesenclast	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vaesimc	%xmm0, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vaesimc	(%rax), %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vaeskeygenassist	$22, %xmm0, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vaeskeygenassist	$22, (%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vandnpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vandnpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vandnpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vandnpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vandnps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vandnps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vandnps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vandnps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vandpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vandpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vandpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vandpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vandps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vandps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vandps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vandps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vblendpd	$11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vblendpd	$11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vblendpd	$11, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vblendpd	$11, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vblendps	$11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vblendps	$11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vblendps	$11, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vblendps	$11, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  3      2     2.00                    	vblendvpd	%xmm3, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  3      7     2.00    *               	vblendvpd	%xmm3, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  6      3     3.00                    	vblendvpd	%ymm3, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  6      8     3.00    *               	vblendvpd	%ymm3, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  3      2     2.00                    	vblendvps	%xmm3, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  3      7     2.00    *               	vblendvps	%xmm3, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  6      3     3.00                    	vblendvps	%ymm3, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  6      8     3.00    *               	vblendvps	%ymm3, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      6     1.00    *               	vbroadcastf128	(%rax), %ymm2
+# CHECK-NEXT:  1      6     2.00    *               	vbroadcastsd	(%rax), %ymm2
+# CHECK-NEXT:  1      6     1.00    *               	vbroadcastss	(%rax), %xmm2
+# CHECK-NEXT:  1      6     2.00    *               	vbroadcastss	(%rax), %ymm2
+# CHECK-NEXT:  1      2     1.00                    	vcmppd	$0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vcmppd	$0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     2.00                    	vcmppd	$0, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      7     2.00    *               	vcmppd	$0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      2     1.00                    	vcmpps	$0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vcmpps	$0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     2.00                    	vcmpps	$0, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      7     2.00    *               	vcmpps	$0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      2     1.00                    	vcmpsd	$0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vcmpsd	$0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vcmpss	$0, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vcmpss	$0, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vcomisd	%xmm0, %xmm1
+# CHECK-NEXT:  1      8     1.00    *               	vcomisd	(%rax), %xmm1
+# CHECK-NEXT:  1      3     1.00                    	vcomiss	%xmm0, %xmm1
+# CHECK-NEXT:  1      8     1.00    *               	vcomiss	(%rax), %xmm1
+# CHECK-NEXT:  1      3     1.00                    	vcvtdq2pd	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vcvtdq2pd	(%rax), %xmm2
+# CHECK-NEXT:  2      3     2.00                    	vcvtdq2pd	%xmm0, %ymm2
+# CHECK-NEXT:  2      8     2.00    *               	vcvtdq2pd	(%rax), %ymm2
+# CHECK-NEXT:  1      3     1.00                    	vcvtdq2ps	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vcvtdq2ps	(%rax), %xmm2
+# CHECK-NEXT:  2      3     2.00                    	vcvtdq2ps	%ymm0, %ymm2
+# CHECK-NEXT:  2      8     2.00    *               	vcvtdq2ps	(%rax), %ymm2
+# CHECK-NEXT:  1      3     1.00                    	vcvtpd2dq	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vcvtpd2dqx	(%rax), %xmm2
+# CHECK-NEXT:  1      6     2.00                    	vcvtpd2dq	%ymm0, %xmm2
+# CHECK-NEXT:  1      11    2.00    *               	vcvtpd2dqy	(%rax), %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vcvtpd2ps	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vcvtpd2psx	(%rax), %xmm2
+# CHECK-NEXT:  1      6     2.00                    	vcvtpd2ps	%ymm0, %xmm2
+# CHECK-NEXT:  1      11    2.00    *               	vcvtpd2psy	(%rax), %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vcvtps2dq	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vcvtps2dq	(%rax), %xmm2
+# CHECK-NEXT:  2      3     2.00                    	vcvtps2dq	%ymm0, %ymm2
+# CHECK-NEXT:  2      8     2.00    *               	vcvtps2dq	(%rax), %ymm2
+# CHECK-NEXT:  1      3     1.00                    	vcvtps2pd	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vcvtps2pd	(%rax), %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vcvtps2pd	%xmm0, %ymm2
+# CHECK-NEXT:  1      8     1.00    *               	vcvtps2pd	(%rax), %ymm2
+# CHECK-NEXT:  2      7     1.00                    	vcvtsd2si	%xmm0, %ecx
+# CHECK-NEXT:  2      7     1.00                    	vcvtsd2si	%xmm0, %rcx
+# CHECK-NEXT:  2      12    1.00    *               	vcvtsd2si	(%rax), %ecx
+# CHECK-NEXT:  2      12    1.00    *               	vcvtsd2si	(%rax), %rcx
+# CHECK-NEXT:  2      7     2.00                    	vcvtsd2ss	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  2      12    2.00    *               	vcvtsd2ss	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      9     1.00                    	vcvtsi2sdl	%ecx, %xmm0, %xmm2
+# CHECK-NEXT:  2      9     1.00                    	vcvtsi2sdq	%rcx, %xmm0, %xmm2
+# CHECK-NEXT:  2      14    1.00    *               	vcvtsi2sdl	(%rax), %xmm0, %xmm2
+# CHECK-NEXT:  2      14    1.00    *               	vcvtsi2sdq	(%rax), %xmm0, %xmm2
+# CHECK-NEXT:  2      9     1.00                    	vcvtsi2ssl	%ecx, %xmm0, %xmm2
+# CHECK-NEXT:  2      9     1.00                    	vcvtsi2ssq	%rcx, %xmm0, %xmm2
+# CHECK-NEXT:  2      14    1.00    *               	vcvtsi2ssl	(%rax), %xmm0, %xmm2
+# CHECK-NEXT:  2      14    1.00    *               	vcvtsi2ssq	(%rax), %xmm0, %xmm2
+# CHECK-NEXT:  2      7     2.00                    	vcvtss2sd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  2      12    2.00    *               	vcvtss2sd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      7     1.00                    	vcvtss2si	%xmm0, %ecx
+# CHECK-NEXT:  2      7     1.00                    	vcvtss2si	%xmm0, %rcx
+# CHECK-NEXT:  2      12    1.00    *               	vcvtss2si	(%rax), %ecx
+# CHECK-NEXT:  2      12    1.00    *               	vcvtss2si	(%rax), %rcx
+# CHECK-NEXT:  1      3     1.00                    	vcvttpd2dq	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vcvttpd2dqx	(%rax), %xmm2
+# CHECK-NEXT:  1      6     2.00                    	vcvttpd2dq	%ymm0, %xmm2
+# CHECK-NEXT:  1      11    2.00    *               	vcvttpd2dqy	(%rax), %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vcvttps2dq	%xmm0, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vcvttps2dq	(%rax), %xmm2
+# CHECK-NEXT:  2      3     2.00                    	vcvttps2dq	%ymm0, %ymm2
+# CHECK-NEXT:  2      8     2.00    *               	vcvttps2dq	(%rax), %ymm2
+# CHECK-NEXT:  2      7     1.00                    	vcvttsd2si	%xmm0, %ecx
+# CHECK-NEXT:  2      7     1.00                    	vcvttsd2si	%xmm0, %rcx
+# CHECK-NEXT:  2      12    1.00    *               	vcvttsd2si	(%rax), %ecx
+# CHECK-NEXT:  2      12    1.00    *               	vcvttsd2si	(%rax), %rcx
+# CHECK-NEXT:  2      7     1.00                    	vcvttss2si	%xmm0, %ecx
+# CHECK-NEXT:  2      7     1.00                    	vcvttss2si	%xmm0, %rcx
+# CHECK-NEXT:  2      12    1.00    *               	vcvttss2si	(%rax), %ecx
+# CHECK-NEXT:  2      12    1.00    *               	vcvttss2si	(%rax), %rcx
+# CHECK-NEXT:  1      19    19.00                   	vdivpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      24    19.00   *               	vdivpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      38    38.00                   	vdivpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      43    38.00   *               	vdivpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      19    19.00                   	vdivps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      24    19.00   *               	vdivps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      38    38.00                   	vdivps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      43    38.00   *               	vdivps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      19    19.00                   	vdivsd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      24    19.00   *               	vdivsd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      19    19.00                   	vdivss	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      24    19.00   *               	vdivss	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  3      9     3.00                    	vdppd	$22, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  3      14    3.00    *               	vdppd	$22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  5      11    3.00                    	vdpps	$22, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  5      16    3.00    *               	vdpps	$22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  10     12    6.00                    	vdpps	$22, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  10     17    6.00    *               	vdpps	$22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vextractf128	$1, %ymm0, %xmm2
+# CHECK-NEXT:  1      1     1.00           *        	vextractf128	$1, %ymm0, (%rax)
+# CHECK-NEXT:  1      1     0.50                    	vextractps	$1, %xmm0, %ecx
+# CHECK-NEXT:  2      6     1.00           *        	vextractps	$1, %xmm0, (%rax)
+# CHECK-NEXT:  1      3     1.00                    	vhaddpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vhaddpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     2.00                    	vhaddpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      8     2.00    *               	vhaddpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     1.00                    	vhaddps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vhaddps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     2.00                    	vhaddps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      8     2.00    *               	vhaddps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     1.00                    	vhsubpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vhsubpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     2.00                    	vhsubpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      8     2.00    *               	vhsubpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     1.00                    	vhsubps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vhsubps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     2.00                    	vhsubps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      8     2.00    *               	vhsubps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vinsertf128	$1, %xmm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      6     1.00    *               	vinsertf128	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vinsertps	$1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vinsertps	$1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      5     1.00    *               	vlddqu	(%rax), %xmm2
+# CHECK-NEXT:  1      5     1.00    *               	vlddqu	(%rax), %ymm2
+# CHECK-NEXT:  1      5     1.00    *      *      * 	vldmxcsr	(%rax)
+# CHECK-NEXT:  1      1     1.00    *      *      * 	vmaskmovdqu	%xmm0, %xmm1
+# CHECK-NEXT:  1      6     1.00    *               	vmaskmovpd	(%rax), %xmm0, %xmm2
+# CHECK-NEXT:  2      6     2.00    *               	vmaskmovpd	(%rax), %ymm0, %ymm2
+# CHECK-NEXT:  1      6     2.00    *      *        	vmaskmovpd	%xmm0, %xmm1, (%rax)
+# CHECK-NEXT:  2      6     2.00    *      *        	vmaskmovpd	%ymm0, %ymm1, (%rax)
+# CHECK-NEXT:  1      6     1.00    *               	vmaskmovps	(%rax), %xmm0, %xmm2
+# CHECK-NEXT:  2      6     2.00    *               	vmaskmovps	(%rax), %ymm0, %ymm2
+# CHECK-NEXT:  1      6     2.00    *      *        	vmaskmovps	%xmm0, %xmm1, (%rax)
+# CHECK-NEXT:  2      6     2.00    *      *        	vmaskmovps	%ymm0, %ymm1, (%rax)
+# CHECK-NEXT:  1      2     1.00                    	vmaxpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vmaxpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     2.00                    	vmaxpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      7     2.00    *               	vmaxpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      2     1.00                    	vmaxps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vmaxps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     2.00                    	vmaxps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      7     2.00    *               	vmaxps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      2     1.00                    	vmaxsd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vmaxsd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vmaxss	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vmaxss	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vminpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vminpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     2.00                    	vminpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      7     2.00    *               	vminpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      2     1.00                    	vminps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vminps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     2.00                    	vminps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      7     2.00    *               	vminps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      2     1.00                    	vminsd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vminsd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vminss	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vminss	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vmovapd	%xmm0, %xmm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovapd	%xmm0, (%rax)
+# CHECK-NEXT:  1      5     1.00    *               	vmovapd	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vmovapd	%ymm0, %ymm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovapd	%ymm0, (%rax)
+# CHECK-NEXT:  1      5     1.00    *               	vmovapd	(%rax), %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vmovaps	%xmm0, %xmm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovaps	%xmm0, (%rax)
+# CHECK-NEXT:  1      5     1.00    *               	vmovaps	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vmovaps	%ymm0, %ymm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovaps	%ymm0, (%rax)
+# CHECK-NEXT:  1      5     1.00    *               	vmovaps	(%rax), %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vmovd	%eax, %xmm2
+# CHECK-NEXT:  1      5     1.00    *               	vmovd	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vmovd	%xmm0, %ecx
+# CHECK-NEXT:  1      1     1.00           *        	vmovd	%xmm0, (%rax)
+# CHECK-NEXT:  1      1     0.50                    	vmovddup	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vmovddup	(%rax), %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vmovddup	%ymm0, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vmovddup	(%rax), %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vmovdqa	%xmm0, %xmm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovdqa	%xmm0, (%rax)
+# CHECK-NEXT:  1      5     1.00    *               	vmovdqa	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vmovdqa	%ymm0, %ymm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovdqa	%ymm0, (%rax)
+# CHECK-NEXT:  1      5     1.00    *               	vmovdqa	(%rax), %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vmovdqu	%xmm0, %xmm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovdqu	%xmm0, (%rax)
+# CHECK-NEXT:  1      5     1.00    *               	vmovdqu	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vmovdqu	%ymm0, %ymm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovdqu	%ymm0, (%rax)
+# CHECK-NEXT:  1      5     1.00    *               	vmovdqu	(%rax), %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vmovhlps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vmovlhps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovhpd	%xmm0, (%rax)
+# CHECK-NEXT:  1      6     1.00    *               	vmovhpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovhps	%xmm0, (%rax)
+# CHECK-NEXT:  1      6     1.00    *               	vmovhps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovlpd	%xmm0, (%rax)
+# CHECK-NEXT:  1      6     1.00    *               	vmovlpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovlps	%xmm0, (%rax)
+# CHECK-NEXT:  1      6     1.00    *               	vmovlps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vmovmskpd	%xmm0, %ecx
+# CHECK-NEXT:  1      3     1.00                    	vmovmskpd	%ymm0, %ecx
+# CHECK-NEXT:  1      3     1.00                    	vmovmskps	%xmm0, %ecx
+# CHECK-NEXT:  1      3     1.00                    	vmovmskps	%ymm0, %ecx
+# CHECK-NEXT:  1      2     1.00           *        	vmovntdq	%xmm0, (%rax)
+# CHECK-NEXT:  1      3     2.00           *        	vmovntdq	%ymm0, (%rax)
+# CHECK-NEXT:  1      5     1.00    *               	vmovntdqa	(%rax), %xmm2
+# CHECK-NEXT:  1      5     1.00    *               	vmovntdqa	(%rax), %ymm2
+# CHECK-NEXT:  1      3     1.00           *        	vmovntpd	%xmm0, (%rax)
+# CHECK-NEXT:  1      3     2.00           *        	vmovntpd	%ymm0, (%rax)
+# CHECK-NEXT:  1      3     1.00           *        	vmovntps	%xmm0, (%rax)
+# CHECK-NEXT:  1      3     2.00           *        	vmovntps	%ymm0, (%rax)
+# CHECK-NEXT:  1      1     0.50                    	vmovq	%xmm0, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vmovq	%rax, %xmm2
+# CHECK-NEXT:  1      5     1.00    *               	vmovq	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vmovq	%xmm0, %rcx
+# CHECK-NEXT:  1      1     1.00           *        	vmovq	%xmm0, (%rax)
+# CHECK-NEXT:  1      1     0.50                    	vmovsd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovsd	%xmm0, (%rax)
+# CHECK-NEXT:  1      5     1.00    *               	vmovsd	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vmovshdup	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vmovshdup	(%rax), %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vmovshdup	%ymm0, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vmovshdup	(%rax), %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vmovsldup	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vmovsldup	(%rax), %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vmovsldup	%ymm0, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vmovsldup	(%rax), %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vmovss	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovss	%xmm0, (%rax)
+# CHECK-NEXT:  1      5     1.00    *               	vmovss	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vmovupd	%xmm0, %xmm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovupd	%xmm0, (%rax)
+# CHECK-NEXT:  1      5     1.00    *               	vmovupd	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vmovupd	%ymm0, %ymm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovupd	%ymm0, (%rax)
+# CHECK-NEXT:  1      5     1.00    *               	vmovupd	(%rax), %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vmovups	%xmm0, %xmm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovups	%xmm0, (%rax)
+# CHECK-NEXT:  1      5     1.00    *               	vmovups	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vmovups	%ymm0, %ymm2
+# CHECK-NEXT:  1      1     1.00           *        	vmovups	%ymm0, (%rax)
+# CHECK-NEXT:  1      5     1.00    *               	vmovups	(%rax), %ymm2
+# CHECK-NEXT:  1      3     2.00                    	vmpsadbw	$1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     2.00    *               	vmpsadbw	$1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      4     2.00                    	vmulpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      9     2.00    *               	vmulpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      4     4.00                    	vmulpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      9     4.00    *               	vmulpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      2     1.00                    	vmulps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vmulps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      2     2.00                    	vmulps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      7     2.00    *               	vmulps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      4     2.00                    	vmulsd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      9     2.00    *               	vmulsd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vmulss	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vmulss	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vorpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vorpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vorpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vorpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vorps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vorps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vorps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vorps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vpabsb	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpabsb	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpabsd	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpabsd	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpabsw	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpabsw	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpackssdw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpackssdw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpacksswb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpacksswb	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpackusdw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpackusdw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpackuswb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpackuswb	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpaddb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpaddb	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpaddd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpaddd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpaddq	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpaddq	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpaddsb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpaddsb	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpaddsw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpaddsw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpaddusb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpaddusb	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpaddusw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpaddusw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpaddw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpaddw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpalignr	$1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpalignr	$1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpand	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpand	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpandn	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpandn	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpavgb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpavgb	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpavgw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpavgw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  3      2     2.00                    	vpblendvb	%xmm3, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  3      7     2.00    *               	vpblendvb	%xmm3, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpblendw	$11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpblendw	$11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vpclmulqdq	$11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vpclmulqdq	$11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpcmpeqb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpcmpeqb	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpcmpeqd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpcmpeqd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpcmpeqq	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpcmpeqq	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpcmpeqw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpcmpeqw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpcmpgtb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpcmpgtb	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpcmpgtd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpcmpgtd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpcmpgtq	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpcmpgtq	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpcmpgtw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpcmpgtw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vperm2f128	$1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  1      6     1.00    *               	vperm2f128	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vpermilpd	$1, %xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpermilpd	$1, (%rax), %xmm2
+# CHECK-NEXT:  3      2     2.00                    	vpermilpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  3      7     2.00    *               	vpermilpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vpermilpd	$1, %ymm0, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vpermilpd	$1, (%rax), %ymm2
+# CHECK-NEXT:  6      3     3.00                    	vpermilpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  6      8     3.00    *               	vpermilpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vpermilps	$1, %xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpermilps	$1, (%rax), %xmm2
+# CHECK-NEXT:  3      2     2.00                    	vpermilps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  3      7     2.00    *               	vpermilps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vpermilps	$1, %ymm0, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vpermilps	$1, (%rax), %ymm2
+# CHECK-NEXT:  6      3     3.00                    	vpermilps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  6      8     3.00    *               	vpermilps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vpextrb	$1, %xmm0, %ecx
+# CHECK-NEXT:  2      6     1.00           *        	vpextrb	$1, %xmm0, (%rax)
+# CHECK-NEXT:  1      1     0.50                    	vpextrd	$1, %xmm0, %ecx
+# CHECK-NEXT:  2      6     1.00           *        	vpextrd	$1, %xmm0, (%rax)
+# CHECK-NEXT:  1      1     0.50                    	vpextrq	$1, %xmm0, %rcx
+# CHECK-NEXT:  2      6     1.00           *        	vpextrq	$1, %xmm0, (%rax)
+# CHECK-NEXT:  1      1     0.50                    	vpextrw	$1, %xmm0, %ecx
+# CHECK-NEXT:  2      6     1.00           *        	vpextrw	$1, %xmm0, (%rax)
+# CHECK-NEXT:  1      1     0.50                    	vphaddd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vphaddd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vphaddsw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vphaddsw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vphaddw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vphaddw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vphminposuw	%xmm0, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vphminposuw	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vphsubd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vphsubd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vphsubsw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vphsubsw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vphsubw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vphsubw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpinsrb	$1, %eax, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpinsrb	$1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpinsrd	$1, %eax, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpinsrd	$1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpinsrq	$1, %rax, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpinsrq	$1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpinsrw	$1, %eax, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpinsrw	$1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vpmaddubsw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vpmaddubsw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vpmaddwd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vpmaddwd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmaxsb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmaxsb	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmaxsd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmaxsd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmaxsw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmaxsw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmaxub	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmaxub	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmaxud	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmaxud	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmaxuw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmaxuw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpminsb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpminsb	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpminsd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpminsd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpminsw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpminsw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpminub	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpminub	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpminud	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpminud	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpminuw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpminuw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vpmovmskb	%xmm0, %ecx
+# CHECK-NEXT:  1      1     0.50                    	vpmovsxbd	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmovsxbd	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmovsxbq	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmovsxbq	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmovsxbw	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmovsxbw	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmovsxdq	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmovsxdq	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmovsxwd	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmovsxwd	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmovsxwq	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmovsxwq	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmovzxbd	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmovzxbd	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmovzxbq	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmovzxbq	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmovzxbw	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmovzxbw	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmovzxdq	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmovzxdq	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmovzxwd	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmovzxwd	(%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpmovzxwq	%xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpmovzxwq	(%rax), %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vpmuldq	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vpmuldq	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vpmulhrsw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vpmulhrsw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vpmulhuw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vpmulhuw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vpmulhw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vpmulhw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vpmulld	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vpmulld	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vpmullw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vpmullw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vpmuludq	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vpmuludq	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpor	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpor	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsadbw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsadbw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  3      2     2.00                    	vpshufb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  3      7     2.00    *               	vpshufb	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpshufd	$1, %xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpshufd	$1, (%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpshufhw	$1, %xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpshufhw	$1, (%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpshuflw	$1, %xmm0, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpshuflw	$1, (%rax), %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsignb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsignb	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsignd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsignd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsignw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsignw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpslld	$1, %xmm0, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpslld	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpslld	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpslldq	$1, %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsllq	$1, %xmm0, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsllq	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsllq	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsllw	$1, %xmm0, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsllw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsllw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsrad	$1, %xmm0, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsrad	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsrad	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsraw	$1, %xmm0, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsraw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsraw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsrld	$1, %xmm0, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsrld	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsrld	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsrldq	$1, %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsrlq	$1, %xmm0, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsrlq	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsrlq	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsrlw	$1, %xmm0, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsrlw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsrlw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsubb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsubb	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsubd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsubd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsubq	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsubq	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsubsb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsubsb	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsubsw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsubsw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsubusb	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsubusb	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsubusw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsubusw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpsubw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpsubw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vptest	%xmm0, %xmm1
+# CHECK-NEXT:  1      8     1.00    *               	vptest	(%rax), %xmm1
+# CHECK-NEXT:  3      4     2.00                    	vptest	%ymm0, %ymm1
+# CHECK-NEXT:  3      9     2.00    *               	vptest	(%rax), %ymm1
+# CHECK-NEXT:  1      1     0.50                    	vpunpckhbw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpunpckhbw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpunpckhdq	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpunpckhdq	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpunpckhqdq	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpunpckhqdq	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpunpckhwd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpunpckhwd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpunpcklbw	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpunpcklbw	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpunpckldq	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpunpckldq	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpunpcklqdq	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpunpcklqdq	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpunpcklwd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpunpcklwd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vpxor	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vpxor	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vrcpps	%xmm0, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vrcpps	(%rax), %xmm2
+# CHECK-NEXT:  2      2     2.00                    	vrcpps	%ymm0, %ymm2
+# CHECK-NEXT:  2      7     2.00    *               	vrcpps	(%rax), %ymm2
+# CHECK-NEXT:  1      2     1.00                    	vrcpss	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vrcpss	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vroundpd	$1, %xmm0, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vroundpd	$1, (%rax), %xmm2
+# CHECK-NEXT:  2      3     2.00                    	vroundpd	$1, %ymm0, %ymm2
+# CHECK-NEXT:  2      8     2.00    *               	vroundpd	$1, (%rax), %ymm2
+# CHECK-NEXT:  1      3     1.00                    	vroundps	$1, %xmm0, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vroundps	$1, (%rax), %xmm2
+# CHECK-NEXT:  2      3     2.00                    	vroundps	$1, %ymm0, %ymm2
+# CHECK-NEXT:  2      8     2.00    *               	vroundps	$1, (%rax), %ymm2
+# CHECK-NEXT:  1      3     1.00                    	vroundsd	$1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vroundsd	$1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vroundss	$1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vroundss	$1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      2     1.00                    	vrsqrtps	%xmm0, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vrsqrtps	(%rax), %xmm2
+# CHECK-NEXT:  2      2     2.00                    	vrsqrtps	%ymm0, %ymm2
+# CHECK-NEXT:  2      7     2.00    *               	vrsqrtps	(%rax), %ymm2
+# CHECK-NEXT:  1      2     1.00                    	vrsqrtss	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      7     1.00    *               	vrsqrtss	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     0.50                    	vshufpd	$1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vshufpd	$1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vshufpd	$1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vshufpd	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vshufps	$1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vshufps	$1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vshufps	$1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vshufps	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      27    27.00                   	vsqrtpd	%xmm0, %xmm2
+# CHECK-NEXT:  1      32    27.00   *               	vsqrtpd	(%rax), %xmm2
+# CHECK-NEXT:  1      54    54.00                   	vsqrtpd	%ymm0, %ymm2
+# CHECK-NEXT:  1      59    54.00   *               	vsqrtpd	(%rax), %ymm2
+# CHECK-NEXT:  1      21    21.00                   	vsqrtps	%xmm0, %xmm2
+# CHECK-NEXT:  1      26    21.00   *               	vsqrtps	(%rax), %xmm2
+# CHECK-NEXT:  2      42    42.00                   	vsqrtps	%ymm0, %ymm2
+# CHECK-NEXT:  2      47    42.00   *               	vsqrtps	(%rax), %ymm2
+# CHECK-NEXT:  1      27    27.00                   	vsqrtsd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      32    27.00   *               	vsqrtsd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      21    21.00                   	vsqrtss	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      26    21.00   *               	vsqrtss	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      1     1.00    *      *      * 	vstmxcsr	(%rax)
+# CHECK-NEXT:  1      3     1.00                    	vsubpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vsubpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      3     2.00                    	vsubpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     2.00    *               	vsubpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     1.00                    	vsubps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vsubps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      3     2.00                    	vsubps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      8     2.00    *               	vsubps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      3     1.00                    	vsubsd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vsubsd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vsubss	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      8     1.00    *               	vsubss	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  1      3     1.00                    	vtestpd	%xmm0, %xmm1
+# CHECK-NEXT:  1      8     1.00    *               	vtestpd	(%rax), %xmm1
+# CHECK-NEXT:  3      4     2.00                    	vtestpd	%ymm0, %ymm1
+# CHECK-NEXT:  3      9     2.00    *               	vtestpd	(%rax), %ymm1
+# CHECK-NEXT:  1      3     1.00                    	vtestps	%xmm0, %xmm1
+# CHECK-NEXT:  1      8     1.00    *               	vtestps	(%rax), %xmm1
+# CHECK-NEXT:  3      4     2.00                    	vtestps	%ymm0, %ymm1
+# CHECK-NEXT:  3      9     2.00    *               	vtestps	(%rax), %ymm1
+# CHECK-NEXT:  1      3     1.00                    	vucomisd	%xmm0, %xmm1
+# CHECK-NEXT:  1      8     1.00    *               	vucomisd	(%rax), %xmm1
+# CHECK-NEXT:  1      3     1.00                    	vucomiss	%xmm0, %xmm1
+# CHECK-NEXT:  1      8     1.00    *               	vucomiss	(%rax), %xmm1
+# CHECK-NEXT:  1      1     0.50                    	vunpckhpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vunpckhpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vunpckhpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vunpckhpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vunpckhps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vunpckhps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vunpckhps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vunpckhps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vunpcklpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vunpcklpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vunpcklpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vunpcklpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vunpcklps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vunpcklps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vunpcklps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vunpcklps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vxorpd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vxorpd	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vxorpd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vxorpd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  1      1     0.50                    	vxorps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      6     1.00    *               	vxorps	(%rax), %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                    	vxorps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      6     2.00    *               	vxorps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  73     90     -      *      *      * 	vzeroall
+# CHECK-NEXT:  37     46     -      *      *      * 	vzeroupper
+
+
 # CHECK:      Resources:
 # CHECK-NEXT: [0] - JALU0
 # CHECK-NEXT: [1] - JALU1




More information about the llvm-commits mailing list